paulhemb commited on
Commit
9760410
·
verified ·
1 Parent(s): 72e921c

Update chat/rag_engine.py

Browse files

improvements done on rag_engine.py

Files changed (1) hide show
  1. chat/rag_engine.py +720 -255
chat/rag_engine.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
  rag_engine.py - Production-Ready Medical RAG Engine
3
- Updated with explicit guideline citation detection
4
  """
5
 
6
  from typing import List, Dict, Any, Optional, Tuple
@@ -385,65 +385,218 @@ except ImportError:
385
 
386
 
387
  # ============================================================================
388
- # EVIDENCE-BASED REASONING FOR MEDICAL RESEARCH
389
  # ============================================================================
390
 
391
- class EvidenceBasedReasoning:
392
- """Single reasoning technique focused on evidence-based medical analysis"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
  @staticmethod
395
- def create_analysis_prompt(query: str, domain: str, user_context: str,
396
- papers_count: int = 0, guideline_info: Dict = None) -> str:
397
- """Create evidence-based reasoning prompt with guideline awareness"""
398
-
399
- # Map user context to focus
400
- context_focus = {
401
- "clinician": "clinical application, treatment decisions, patient management, guideline adherence",
402
- "researcher": "methodology, evidence quality, research implications, guideline gaps",
403
- "student": "understanding concepts, foundational knowledge, guideline-based learning",
404
- "patient": "personal implications, practical next steps, guideline-concordant care",
405
- "administrator": "implementation, resources, systemic considerations, guideline compliance",
406
- "general": "clear explanations, balanced overview, guideline context"
407
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
- focus = context_focus.get(user_context, "evidence-based medical insights")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
- # Add guideline-specific instructions
412
- guideline_context = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  if guideline_info:
414
  if guideline_info.get('guidelines_found'):
415
- guideline_context = f"\n**Guidelines Referenced:** Papers cite {', '.join(guideline_info['guidelines_found'])} guidelines."
416
  if guideline_info.get('critical_missing'):
417
- guideline_context += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'])} guidelines."
418
 
419
- return f"""You are a medical research expert specializing in {domain}.
420
- The user is a {user_context}, so focus on {focus}.
421
- QUERY: {query}
422
- DOMAIN: {domain}
423
- PAPERS ANALYZED: {papers_count}
424
- {guideline_context}
425
- **Perform Evidence-Based Analysis:**
426
- 1. **Evidence Assessment:**
427
- - What types of studies exist on this topic?
428
- - What is the quality and strength of evidence?
429
- - Are there systematic reviews or clinical trials?
430
- - How does the evidence align with current guidelines?
431
- 2. **Domain-Specific Analysis:**
432
- - How does this apply specifically to {domain}?
433
- - What are the standard approaches in this field?
434
- - What innovations or recent advances exist?
435
- - How do findings compare to guideline recommendations?
436
- 3. **Critical Evaluation:**
437
- - What are the strengths of current evidence?
438
- - What limitations or knowledge gaps exist?
439
- - Are there any controversies or alternative views?
440
- - How complete is guideline coverage?
441
- 4. **Practical Implications:**
442
- - What are the actionable insights for {user_context}?
443
- - What are the recommendations or next steps?
444
- - How should this evidence be applied in practice?
445
- - What guideline considerations are important?
446
- Provide a comprehensive, evidence-based answer that synthesizes medical knowledge with practical implications and guideline awareness."""
447
 
448
 
449
  # ============================================================================
@@ -1162,7 +1315,7 @@ class EnhancedRAGEngine:
1162
  print(f"⚠️ LLM not available - using fallback mode: {e}")
1163
  self.llm = None
1164
 
1165
- self.reasoning = EvidenceBasedReasoning()
1166
  self.ranker = PaperRanker()
1167
  self.confidence_scorer = ConfidenceScorer()
1168
  self.context_detector = UserContextDetector()
@@ -1204,6 +1357,7 @@ class EnhancedRAGEngine:
1204
  else:
1205
  print(" 📄 Real paper fetching: DISABLED (using demo papers)")
1206
  print(" 📋 Guideline detection: ENABLED")
 
1207
 
1208
  def answer_research_question(self,
1209
  query: str,
@@ -1212,8 +1366,10 @@ class EnhancedRAGEngine:
1212
  use_memory: bool = True,
1213
  user_context: str = "auto",
1214
  use_fallback: bool = False,
 
 
1215
  **kwargs) -> Dict[str, Any]:
1216
- """Answer medical research questions with evidence-based reasoning and guideline detection"""
1217
 
1218
  start_time = time.time()
1219
  self.metrics['total_queries'] += 1
@@ -1221,23 +1377,35 @@ class EnhancedRAGEngine:
1221
 
1222
  print(f"\n🔍 Processing query: '{query}'")
1223
  print(f" Domain: {domain}")
 
1224
  print(f" Max papers: {max_papers}")
1225
  print(f" Real-time search: {self.use_real_time}")
1226
 
1227
  try:
1228
- # Auto-detect user context if needed
1229
  if user_context == "auto":
1230
  user_context = self.context_detector.detect_context(query, domain)
1231
 
1232
  self.metrics['user_contexts'][user_context] += 1
1233
 
 
 
 
 
 
 
 
 
 
 
 
1234
  # Retrieve papers using MedicalResearchEngine
1235
  print("📚 Retrieving relevant papers...")
1236
  papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)
1237
 
1238
  if not papers:
1239
  print("⚠️ No papers found, creating fallback response...")
1240
- return self._create_no_results_response(query, domain, user_context)
1241
 
1242
  # Detect guideline citations
1243
  print("📋 Detecting guideline citations...")
@@ -1254,7 +1422,7 @@ class EnhancedRAGEngine:
1254
  })
1255
 
1256
  # Rank papers
1257
- ranked_papers = self.ranker.rank_papers(papers, query, domain, user_context)
1258
  print(f"📊 Papers found: {len(ranked_papers)}")
1259
 
1260
  # Track paper sources
@@ -1274,29 +1442,29 @@ class EnhancedRAGEngine:
1274
 
1275
  # Calculate confidence with guideline consideration
1276
  confidence = self.confidence_scorer.calculate_confidence(
1277
- ranked_papers, query, "summary", user_context, domain, guideline_info
1278
  )
1279
 
1280
- # Generate analysis using evidence-based reasoning with guideline context
1281
- print("🧠 Generating evidence-based analysis...")
1282
- analysis = self._generate_analysis(
1283
- query, domain, user_context, ranked_papers, guideline_info
1284
  )
1285
 
1286
- # Generate clinical bottom line with guideline awareness
1287
- bottom_line = self._generate_bottom_line(
1288
- query, domain, user_context, len(ranked_papers), real_papers, guideline_info
1289
  )
1290
 
1291
  # Synthesize final answer
1292
- final_answer = self._synthesize_answer(
1293
- query, domain, user_context, analysis, ranked_papers,
1294
  bottom_line, confidence, guideline_info
1295
  )
1296
 
1297
  # Update memory
1298
  if use_memory and self.memory:
1299
- self._update_memory(query, final_answer, domain, user_context, ranked_papers, guideline_info)
1300
 
1301
  # Update metrics
1302
  response_time = time.time() - start_time
@@ -1317,7 +1485,300 @@ class EnhancedRAGEngine:
1317
  print(f"❌ Error in research analysis: {e}")
1318
  import traceback
1319
  traceback.print_exc()
1320
- return self._create_error_response(query, domain, user_context, str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1321
 
1322
  def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
1323
  use_fallback: bool = False) -> List[Dict]:
@@ -1515,53 +1976,51 @@ class EnhancedRAGEngine:
1515
 
1516
  return papers
1517
 
1518
- def _generate_analysis(self, query: str, domain: str, user_context: str,
1519
- papers: List[Dict], guideline_info: Dict = None) -> str:
1520
- """Generate evidence-based analysis with guideline context"""
 
1521
 
1522
  if not self.llm:
1523
- return self._create_fallback_analysis(query, domain, user_context, papers, guideline_info)
1524
 
1525
- # Create reasoning prompt with guideline information
1526
- prompt = self.reasoning.create_analysis_prompt(
1527
- query, domain, user_context, len(papers), guideline_info
1528
  )
1529
 
1530
- # Add paper information
1531
- paper_info = "\n".join([
1532
- f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})"
1533
- for i, p in enumerate(papers[:5])
1534
- ])
 
 
1535
 
1536
  # Add demo paper disclaimer if any demo papers
1537
  demo_count = sum(1 for p in papers if p.get('is_demo', False))
1538
  if demo_count > 0:
1539
- prompt += f"\n\nNote: {demo_count} of the papers are illustrative examples."
1540
-
1541
- # Add guideline details if available
1542
- if guideline_info:
1543
- if guideline_info.get('guidelines_found'):
1544
- prompt += f"\n\nGuideline Context: Papers reference {len(guideline_info['guidelines_found'])} major guidelines."
1545
- if guideline_info.get('critical_missing'):
1546
- missing_guidelines = ', '.join(guideline_info['critical_missing'][:3])
1547
- prompt += f"\nGuideline Gap: Missing explicit citations for {missing_guidelines} guidelines."
1548
-
1549
- full_prompt = f"{prompt}\n\n**Relevant Papers:**\n{paper_info}\n\n**Analysis:**"
1550
 
1551
  try:
 
 
 
1552
  analysis = self.llm.generate(
1553
- full_prompt,
1554
- system_message=f"You are a {domain.replace('_', ' ')} expert providing evidence-based analysis for a {user_context}. Consider guideline adherence in your assessment.",
1555
- max_tokens=4000
1556
  )
1557
  return analysis
1558
  except Exception as e:
1559
- print(f"⚠️ LLM analysis failed: {e}")
1560
- return self._create_fallback_analysis(query, domain, user_context, papers, guideline_info)
1561
 
1562
- def _create_fallback_analysis(self, query: str, domain: str, user_context: str,
1563
- papers: List[Dict], guideline_info: Dict = None) -> str:
1564
  """Create fallback analysis when LLM is unavailable"""
 
 
1565
  if CONFIG_AVAILABLE:
1566
  try:
1567
  domain_name = get_domain_display_name(domain)
@@ -1574,12 +2033,10 @@ class EnhancedRAGEngine:
1574
  real_papers = [p for p in papers if not p.get('is_demo', False)]
1575
  demo_papers = [p for p in papers if p.get('is_demo', False)]
1576
 
1577
- paper_titles = [p.get('title', '') for p in papers[:3]]
1578
-
1579
- analysis = f"""**Evidence-Based Analysis for {domain_name}**
1580
  **Query:** {query}
1581
- **User Context:** {user_context}
1582
- **Papers Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)"""
1583
 
1584
  # Add guideline information
1585
  if guideline_info:
@@ -1589,39 +2046,44 @@ class EnhancedRAGEngine:
1589
  analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"
1590
 
1591
  analysis += f"""
1592
- **Key Findings:**
1593
- Based on analysis of {len(papers)} relevant papers, several key points emerge:
1594
- 1. **Current Evidence:** The literature shows evolving understanding of this topic within {domain_name}. Recent studies have contributed significantly to our knowledge base."""
1595
 
1596
- # Add guideline-specific observations
1597
- if guideline_info and guideline_info.get('critical_missing'):
1598
- analysis += f"\n2. **Guideline Alignment:** Evidence may not fully align with major clinical guidelines. Consider consulting {', '.join(guideline_info['critical_missing'][:2])} guidelines for comprehensive recommendations."
1599
- else:
1600
- analysis += f"\n2. **Clinical Context:** Findings should be interpreted within established clinical frameworks and guidelines."
1601
 
1602
- analysis += f"""
1603
- 3. **Methodological Approaches:** Studies employ various designs including clinical trials, cohort studies, and systematic reviews. The quality of evidence varies across studies.
1604
- 4. **Clinical Implications:** Findings have relevance for {user_context} practice and decision-making. Practical applications should consider individual patient factors and clinical context.
1605
- 5. **Research Gaps:** Further studies are needed to address remaining questions, particularly regarding long-term outcomes and specific patient subgroups.
1606
- **Selected Papers:**
1607
- {chr(10).join([f"- {title}" for title in paper_titles if title])}
1608
- **Recommendations for {user_context}:**
1609
- - Consider the evidence in context of individual circumstances
1610
- - Consult with specialists when appropriate
1611
- - Stay updated with emerging research
1612
- - Apply evidence-based guidelines when available
1613
- - {"Pay attention to guideline gaps noted above" if guideline_info and guideline_info.get('critical_missing') else "Reference established clinical guidelines"}
1614
- *Note: This analysis is based on available literature. For specific cases, consult with healthcare professionals.*"""
 
 
 
 
 
1615
 
1616
  if demo_papers:
1617
- analysis += f"\n\n*Disclaimer: {len(demo_papers)} papers are illustrative examples generated for demonstration purposes.*"
1618
 
1619
  return analysis
1620
 
1621
- def _generate_bottom_line(self, query: str, domain: str, user_context: str,
1622
- papers_count: int, real_papers_count: int,
1623
- guideline_info: Dict = None) -> str:
1624
- """Generate clinical bottom line with guideline awareness"""
 
 
1625
  if CONFIG_AVAILABLE:
1626
  try:
1627
  domain_name = get_domain_display_name(domain)
@@ -1630,41 +2092,43 @@ Based on analysis of {len(papers)} relevant papers, several key points emerge:
1630
  else:
1631
  domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
1632
 
1633
- bottom_line = f"""**Clinical Bottom Line for {user_context}:**
1634
- Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count} real papers), current evidence provides actionable insights for clinical practice."""
1635
 
1636
  # Add guideline-specific bottom line
1637
  if guideline_info:
1638
  if guideline_info.get('guidelines_found'):
1639
- bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced ({', '.join(guideline_info['guidelines_found'][:3])})."
1640
 
1641
  if guideline_info.get('critical_missing'):
1642
- missing_list = ', '.join(guideline_info['critical_missing'][:3])
1643
- bottom_line += f"\n**Important Note:** Missing explicit guideline citations ({missing_list}). Consider consulting these for comprehensive recommendations."
1644
 
1645
  coverage = guideline_info.get('coverage_percentage', 0)
1646
  if coverage < 50:
1647
- bottom_line += f"\n**Evidence Limitations:** Guideline coverage is limited ({coverage}%)."
1648
 
1649
  bottom_line += f"""
1650
- **Key Considerations:**
1651
- - Patient-specific factors and individual risk-benefit assessments
1652
- - Treatment availability and resource constraints
1653
- - Consultation with specialists for complex cases
1654
- - {"Particular attention to guideline gaps noted above" if guideline_info and guideline_info.get('critical_missing') else "Adherence to established clinical guidelines"}
1655
- - Integration with clinical judgment and patient preferences"""
1656
 
1657
  if papers_count > real_papers_count:
1658
- bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for comprehensive analysis.*"
1659
 
1660
  return bottom_line
1661
 
1662
- def _synthesize_answer(self, query: str, domain: str, user_context: str,
1663
- analysis: str, papers: List[Dict],
1664
- bottom_line: str, confidence: Dict[str, Any],
1665
- guideline_info: Dict = None) -> Dict[str, Any]:
1666
- """Synthesize final answer with guideline information"""
1667
 
 
 
1668
  if CONFIG_AVAILABLE:
1669
  try:
1670
  domain_info = {
@@ -1685,18 +2149,13 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1685
  'description': f'Research in {domain.replace("_", " ")}'
1686
  })
1687
 
1688
- context_info = USER_CONTEXT_INFO.get(user_context, {
1689
- 'name': user_context.title(),
1690
- 'icon': '👤'
1691
- })
1692
-
1693
  # Count real vs demo papers
1694
  real_papers = [p for p in papers if not p.get('is_demo', False)]
1695
  demo_papers = [p for p in papers if p.get('is_demo', False)]
1696
 
1697
- # Format paper citations with guideline indicators
1698
  paper_citations = []
1699
- for i, paper in enumerate(papers[:7], 1):
1700
  title = paper.get('title', 'Untitled')
1701
  authors = paper.get('authors', [])
1702
  year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
@@ -1705,11 +2164,6 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1705
  is_demo = paper.get('is_demo', False)
1706
  is_preprint = paper.get('is_preprint', False)
1707
 
1708
- # Check if paper mentions guidelines
1709
- text = f"{title} {paper.get('abstract', '')}".lower()
1710
- has_guidelines = any(keyword in text for keyword in ['guideline', 'recommendation', 'consensus',
1711
- 'ada', 'aha', 'acc', 'esc', 'idsa', 'ats'])
1712
-
1713
  # Format authors
1714
  if authors and isinstance(authors, list) and len(authors) > 0:
1715
  if len(authors) == 1:
@@ -1726,23 +2180,19 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1726
 
1727
  demo_indicator = "📄 " if is_demo else ""
1728
  preprint_indicator = "⚡ " if is_preprint else ""
1729
- guideline_indicator = "📋 " if has_guidelines else ""
1730
 
1731
  if author_str and year:
1732
- citation += f"\n {demo_indicator}{preprint_indicator}{guideline_indicator}*{author_str} ({year})*"
1733
  elif author_str:
1734
- citation += f"\n {demo_indicator}{preprint_indicator}{guideline_indicator}*{author_str}*"
1735
  else:
1736
- citation += f"\n {demo_indicator}{preprint_indicator}{guideline_indicator}*Unknown authors*"
1737
 
1738
  if journal:
1739
  citation += f"\n Journal: {journal}"
1740
  elif source and source != 'unknown':
1741
  citation += f"\n Source: {source}"
1742
 
1743
- if has_guidelines:
1744
- citation += f"\n *References clinical guidelines*"
1745
-
1746
  paper_citations.append(citation)
1747
 
1748
  # Build guideline summary section
@@ -1753,13 +2203,6 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1753
  if guideline_info.get('guidelines_found'):
1754
  guideline_summary += f"**✅ Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n"
1755
 
1756
- # Show papers that mention guidelines
1757
- if guideline_info.get('papers_with_guidelines'):
1758
- guideline_summary += "**Papers Citing Guidelines:**\n"
1759
- for paper_info in guideline_info['papers_with_guidelines'][:3]:
1760
- guideline_summary += f"- {paper_info['title']} ({', '.join(paper_info['guidelines'][:2])})\n"
1761
- guideline_summary += "\n"
1762
-
1763
  if guideline_info.get('critical_missing'):
1764
  missing_list = ', '.join(guideline_info['critical_missing'])
1765
  guideline_summary += f"**⚠️ Missing Guideline Citations:** {missing_list}\n\n"
@@ -1767,11 +2210,11 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1767
  guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n"
1768
 
1769
  # Build answer
1770
- answer = f"""# 🔬 **Medical Research Analysis**
 
1771
  **Domain:** {domain_info['name']} {domain_info.get('icon', '')}
1772
- **User Context:** {context_info['name']} {context_info.get('icon', '')}
1773
  **Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100)
1774
- **Papers Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)
1775
  ---
1776
  ## 📋 **Executive Summary**
1777
  {bottom_line}
@@ -1780,22 +2223,22 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1780
  {analysis}
1781
  ---
1782
  ## 📊 **Supporting Evidence**
1783
- {chr(10).join(paper_citations)}
1784
  ---
1785
- ## 🎯 **Key Takeaways**
1786
- 1. Evidence-based insights relevant to {context_info['name'].lower()} perspective
1787
  2. Domain-specific considerations for {domain_info['name'].lower()}
1788
- 3. {"Guideline-aware recommendations" if guideline_info else "Practical implications for application"}
1789
- 4. {"Attention to guideline gaps noted" if guideline_info and guideline_info.get('critical_missing') else "Integration with clinical guidelines"}
1790
- *Analysis performed using evidence-based medical reasoning with guideline assessment*
1791
  *Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*"""
1792
 
1793
  return {
1794
  "query": query,
1795
  "domain": domain,
1796
  "domain_info": domain_info,
1797
- "user_context": user_context,
1798
- "user_context_info": context_info,
1799
  "answer": answer,
1800
  "analysis": analysis,
1801
  "bottom_line": bottom_line,
@@ -1804,27 +2247,27 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1804
  "demo_papers_used": len(demo_papers),
1805
  "confidence_score": confidence,
1806
  "guideline_info": guideline_info,
1807
- "reasoning_method": "evidence_based",
1808
  "real_time_search": self.use_real_time,
1809
  "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
1810
  "metrics": {
1811
  'response_time': time.time(),
1812
  'papers_analyzed': len(papers),
1813
  'domain': domain,
1814
- 'user_context': user_context
1815
  }
1816
  }
1817
 
1818
  def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
1819
- user_context: str, papers: List[Dict], guideline_info: Dict = None):
1820
- """Update conversation memory with guideline info"""
1821
  if not self.memory:
1822
  return
1823
 
1824
  memory_data = {
1825
  'query': query,
1826
  'domain': domain,
1827
- 'user_context': user_context,
1828
  'papers_used': len(papers),
1829
  'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
1830
  'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
@@ -1844,8 +2287,10 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1844
  metadata=memory_data
1845
  )
1846
 
1847
- def _create_no_results_response(self, query: str, domain: str, user_context: str) -> Dict[str, Any]:
1848
  """Create response when no papers are found"""
 
 
1849
  if CONFIG_AVAILABLE:
1850
  try:
1851
  domain_info = {
@@ -1864,29 +2309,38 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1864
  })
1865
 
1866
  answer = f"""# 🔍 **Limited Research Found**
1867
- **Query:** {query}
 
1868
  **Domain:** {domain_info['name']}
1869
- **User Context:** {user_context}
1870
- **Suggestions:**
 
 
1871
  1. Try broadening your search terms
1872
- 2. Consider related {domain_info['name']} topics
1873
- 3. Check spelling of medical terms
1874
  4. Try a more general domain selection
1875
- **For Guideline-Conscious Searches:**
1876
- - Search specific guideline names (e.g., "ADA guidelines diabetes")
1877
- - Include "guideline" or "recommendation" in search terms
1878
- - Check official guideline organization websites
1879
- **Example queries:**
1880
- - "Current treatments for [condition] according to guidelines"
1881
- - "Recent advances in {domain_info['name'].lower()} with guideline updates"
1882
- - "Clinical guidelines for [topic]"
1883
- *Note: Some specialized topics may have limited published research. Check official guideline sources directly.*"""
 
 
 
 
 
1884
 
1885
  return {
1886
  "query": query,
1887
  "domain": domain,
1888
  "domain_info": domain_info,
1889
- "user_context": user_context,
 
1890
  "answer": answer,
1891
  "papers_used": 0,
1892
  "real_papers_used": 0,
@@ -1900,8 +2354,10 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1900
  "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
1901
  }
1902
 
1903
- def _create_error_response(self, query: str, domain: str, user_context: str, error: str) -> Dict[str, Any]:
1904
  """Create error response"""
 
 
1905
  if CONFIG_AVAILABLE:
1906
  try:
1907
  domain_info = {
@@ -1920,26 +2376,33 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
1920
  })
1921
 
1922
  answer = f"""# 🚨 **Analysis Error**
1923
- **Query:** {query}
 
1924
  **Domain:** {domain_info['name']}
1925
- **User Context:** {user_context}
 
1926
  **Error:** {error}
1927
- **Troubleshooting:**
 
1928
  1. Check your internet connection
1929
- 2. Try a simpler query
1930
- 3. Verify domain selection
1931
  4. Contact support if problem persists
1932
- **For Guideline Searches:**
1933
- - Try searching guideline names directly
1934
- - Check if specific guideline databases are accessible
1935
- - Consider searching PubMed with guideline filters
1936
- Please try again or reformulate your question."""
 
 
 
1937
 
1938
  return {
1939
  "query": query,
1940
  "domain": domain,
1941
  "domain_info": domain_info,
1942
- "user_context": user_context,
 
1943
  "answer": answer,
1944
  "papers_used": 0,
1945
  "real_papers_used": 0,
@@ -2033,7 +2496,7 @@ Please try again or reformulate your question."""
2033
  }
2034
 
2035
  def get_engine_status(self) -> Dict[str, Any]:
2036
- """Get engine status with guideline metrics"""
2037
  # Calculate average guideline coverage
2038
  avg_guideline_coverage = 0
2039
  if self.metrics['guideline_coverage']:
@@ -2042,12 +2505,13 @@ Please try again or reformulate your question."""
2042
 
2043
  return {
2044
  "engine_name": "Medical Research RAG Engine",
2045
- "version": "2.1.0",
2046
  "model": self.model if hasattr(self, 'model') else "Unknown",
2047
- "features": ["evidence_based_reasoning", "real_paper_fetching",
2048
- "confidence_scoring", "guideline_detection", "guideline_gap_analysis"],
2049
  "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
2050
  "real_time_search": self.use_real_time,
 
2051
  "guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
2052
  "metrics": {
2053
  "total_queries": self.metrics['total_queries'],
@@ -2059,7 +2523,7 @@ Please try again or reformulate your question."""
2059
  "demo_papers_used": self.metrics['demo_papers_used']
2060
  },
2061
  "domains_supported": len(DOMAIN_INFO),
2062
- "user_contexts_supported": len(USER_CONTEXT_INFO)
2063
  }
2064
 
2065
  def clear_memory(self):
@@ -2075,74 +2539,75 @@ Please try again or reformulate your question."""
2075
  # TEST FUNCTION
2076
  # ============================================================================
2077
 
2078
- def test_medical_rag_engine():
2079
- """Test the medical RAG engine with guideline detection"""
2080
  print("\n" + "=" * 60)
2081
- print("🧪 TESTING MEDICAL RAG ENGINE WITH GUIDELINE DETECTION")
2082
  print("=" * 60)
2083
 
2084
  try:
2085
  # Initialize engine
2086
  engine = EnhancedRAGEngine(
2087
- session_id="medical_test",
2088
  model="gpt-oss-120b",
2089
- use_real_time=True # Enable real-time paper fetching
2090
  )
2091
 
2092
- # Test queries with different domains to test guideline detection
2093
  test_cases = [
2094
  {
2095
- "query": "Compare first-line antibiotics for community-acquired pneumonia based on recent evidence",
2096
- "domain": "infectious_disease",
2097
- "user_context": "clinician"
2098
  },
2099
  {
2100
- "query": "Newest GLP-1 agonists for type 2 diabetes and comparative effectiveness",
 
 
 
 
 
2101
  "domain": "endocrinology",
2102
- "user_context": "clinician"
2103
  },
2104
  {
2105
- "query": "Management of hypertension in elderly patients with diabetes",
2106
- "domain": "cardiology",
2107
- "user_context": "researcher"
2108
  }
2109
  ]
2110
 
2111
- for i, test_case in enumerate(test_cases[:1], 1): # Test first one for speed
2112
  print(f"\n📝 Test Case {i}:")
2113
  print(f" Query: '{test_case['query']}'")
2114
  print(f" Domain: {test_case['domain']}")
2115
- print(f" User Context: {test_case['user_context']}")
2116
 
2117
  # Process query
2118
  response = engine.answer_research_question(
2119
  query=test_case['query'],
2120
  domain=test_case['domain'],
2121
- max_papers=15,
2122
- user_context=test_case['user_context'],
2123
  use_fallback=True
2124
  )
2125
 
2126
  if response and 'error' not in response:
2127
  print(f"\n✅ Test Successful!")
 
2128
  print(f" Papers used: {response.get('papers_used', 0)}")
2129
- print(f" Real papers: {response.get('real_papers_used', 0)}")
2130
  print(f" Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")
2131
 
2132
- # Check guideline info
2133
- guideline_info = response.get('guideline_info', {})
2134
- if guideline_info:
2135
- print(f" Guidelines found: {len(guideline_info.get('guidelines_found', []))}")
2136
- if guideline_info.get('critical_missing'):
2137
- print(f" Missing guidelines: {', '.join(guideline_info['critical_missing'][:3])}")
2138
- print(f" Guideline coverage: {guideline_info.get('coverage_percentage', 0)}%")
2139
 
2140
  # Show engine status
2141
  status = engine.get_engine_status()
2142
  print(f"\n🔧 Engine Status:")
2143
- print(f" Research engine available: {status.get('research_engine_available', False)}")
2144
- print(f" Guideline detection: ENABLED")
2145
- print(f" Average guideline coverage: {status['metrics']['average_guideline_coverage']}%")
2146
  print(f" Total queries: {status['metrics']['total_queries']}")
2147
 
2148
  return True
@@ -2156,15 +2621,15 @@ def test_medical_rag_engine():
2156
 
2157
  if __name__ == "__main__":
2158
  # Run test
2159
- test_result = test_medical_rag_engine()
2160
 
2161
  if test_result:
2162
  print(f"\n{'=' * 60}")
2163
- print("🎉 MEDICAL RAG ENGINE TEST COMPLETE!")
2164
- print(" Evidence-based reasoning: ✓")
2165
- print(" Real paper fetching: ✓")
 
2166
  print(" Guideline detection: ✓")
2167
- print(" Guideline gap analysis: ✓")
2168
  print(f"{'=' * 60}")
2169
  else:
2170
  print("\n❌ Engine test failed")
 
1
  """
2
  rag_engine.py - Production-Ready Medical RAG Engine
3
+ Updated with role-based response handling and improved simple query detection
4
  """
5
 
6
  from typing import List, Dict, Any, Optional, Tuple
 
385
 
386
 
387
  # ============================================================================
388
+ # ROLE-BASED REASONING FOR MEDICAL RESEARCH
389
  # ============================================================================
390
 
391
+ class RoleBasedReasoning:
392
+ """Role-based reasoning technique focused on domain-agnostic, role-appropriate responses"""
393
+
394
+ # Role definitions with domain-agnostic prompts
395
+ ROLE_SYSTEM_PROMPTS = {
396
+ 'patient': {
397
+ 'name': 'Patient',
398
+ 'icon': '🩺',
399
+ 'prompt': '''You are helping a patient understand information. Use simple, clear, reassuring language.
400
+ - Focus on practical implications and what they need to know
401
+ - Avoid complex terminology or jargon
402
+ - Emphasize safety and when to seek professional help
403
+ - Be compassionate and supportive
404
+ - Do not provide diagnoses or specific medical advice
405
+ - Explain concepts in everyday terms'''
406
+ },
407
+ 'student': {
408
+ 'name': 'Student',
409
+ 'icon': '🎓',
410
+ 'prompt': '''You are teaching a student. Focus on educational value and understanding.
411
+ - Explain foundational concepts and definitions
412
+ - Provide examples and analogies
413
+ - Encourage critical thinking and questions
414
+ - Structure information logically
415
+ - Connect to broader knowledge areas
416
+ - Mention learning resources when helpful'''
417
+ },
418
+ 'clinician': {
419
+ 'name': 'Clinician',
420
+ 'icon': '👨‍⚕️',
421
+ 'prompt': '''You are assisting a healthcare professional. Be concise, actionable, and evidence-based.
422
+ - Focus on practical implications and decision-making
423
+ - Reference guidelines and evidence levels when relevant
424
+ - Consider workflow and implementation
425
+ - Be precise but efficient with time
426
+ - Address risks and benefits clearly
427
+ - Maintain professional tone'''
428
+ },
429
+ 'doctor': {
430
+ 'name': 'Doctor',
431
+ 'icon': '⚕️',
432
+ 'prompt': '''You are assisting a physician. Use appropriate terminology and clinical reasoning.
433
+ - Focus on differential diagnosis, treatment options, and management
434
+ - Reference current standards of care and guidelines
435
+ - Consider patient factors and comorbidities
436
+ - Discuss evidence quality and limitations
437
+ - Be thorough but organized
438
+ - Maintain clinical accuracy'''
439
+ },
440
+ 'researcher': {
441
+ 'name': 'Researcher',
442
+ 'icon': '🔬',
443
+ 'prompt': '''You are assisting a research scientist. Focus on methodology and evidence.
444
+ - Discuss study designs, methods, and limitations
445
+ - Analyze evidence quality and gaps
446
+ - Consider statistical significance and clinical relevance
447
+ - Reference current literature and trends
448
+ - Discuss implications for future research
449
+ - Maintain scientific rigor'''
450
+ },
451
+ 'professor': {
452
+ 'name': 'Professor',
453
+ 'icon': '📚',
454
+ 'prompt': '''You are assisting an academic educator. Focus on knowledge synthesis and pedagogy.
455
+ - Provide comprehensive overviews with context
456
+ - Compare theories, methods, and findings
457
+ - Discuss historical development and future directions
458
+ - Emphasize critical evaluation and synthesis
459
+ - Connect to broader academic discourse
460
+ - Support teaching and learning objectives'''
461
+ },
462
+ 'pharmacist': {
463
+ 'name': 'Pharmacist',
464
+ 'icon': '💊',
465
+ 'prompt': '''You are assisting a pharmacy professional. Focus on medications and safety.
466
+ - Discuss drug mechanisms, interactions, and pharmacokinetics
467
+ - Emphasize safety profiles and monitoring
468
+ - Consider dosing, administration, and compliance
469
+ - Address patient counseling points
470
+ - Reference formularies and guidelines
471
+ - Maintain focus on medication optimization'''
472
+ },
473
+ 'general': {
474
+ 'name': 'General User',
475
+ 'icon': '👤',
476
+ 'prompt': '''You are assisting a general user. Provide balanced, accessible information.
477
+ - Adjust complexity based on the query
478
+ - Be helpful and informative without overwhelming
479
+ - Provide context and practical implications
480
+ - Use clear language with minimal jargon
481
+ - Consider diverse backgrounds and knowledge levels
482
+ - Maintain neutral, objective tone'''
483
+ }
484
+ }
485
 
486
  @staticmethod
487
+ def create_role_prompt(query: str, domain: str, role: str,
488
+ papers_count: int = 0, guideline_info: Dict = None) -> str:
489
+ """Create role-appropriate prompt with domain-agnostic focus"""
490
+
491
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
492
+
493
+ # Simple query detection - greetings and basic questions
494
+ simple_queries = ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good afternoon',
495
+ 'good evening', 'how are you', "what's up", 'sup']
496
+ query_lower = query.lower().strip()
497
+
498
+ if query_lower in simple_queries or len(query.split()) <= 2:
499
+ # Simple greeting or short query
500
+ if role == 'patient':
501
+ return f"""You are helping a patient. Use warm, reassuring tone.
502
+
503
+ Query: {query}
504
+
505
+ Respond with a friendly greeting and invitation to ask questions. Keep it brief and welcoming.
506
+ Example: "Hello! I'm here to help answer your health questions in simple, clear terms. What would you like to know?""""
507
+
508
+ elif role == 'student':
509
+ return f"""You are teaching a student.
510
+
511
+ Query: {query}
512
+
513
+ Respond with an encouraging greeting that invites learning questions.
514
+ Example: "Hi there! I'm here to help you learn about medical topics. What are you curious about today?""""
515
+
516
+ elif role in ['clinician', 'doctor']:
517
+ return f"""You are assisting a healthcare professional.
518
+
519
+ Query: {query}
520
+
521
+ Respond with a professional greeting appropriate for clinical setting.
522
+ Example: "Hello. I'm ready to assist with evidence-based medical information. How can I help you today?""""
523
+
524
+ elif role in ['researcher', 'professor']:
525
+ return f"""You are assisting an academic professional.
526
+
527
+ Query: {query}
528
 
529
+ Respond with a scholarly greeting that invites research questions.
530
+ Example: "Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?""""
531
+
532
+ elif role == 'pharmacist':
533
+ return f"""You are assisting a pharmacy professional.
534
+
535
+ Query: {query}
536
+
537
+ Respond with a professional greeting focused on medication information.
538
+ Example: "Hello. I can help with medication-related questions and information. How can I assist you today?""""
539
+
540
+ else: # general
541
+ return f"""You are assisting a general user.
542
+
543
+ Query: {query}
544
+
545
+ Respond with a friendly, welcoming greeting.
546
+ Example: "Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?""""
547
+
548
+ # For substantive queries, use role-appropriate analysis
549
+ role_prompt = role_info['prompt']
550
+
551
+ # Domain-agnostic instruction
552
+ domain_agnostic = f"""DOMAIN-AGNOSTIC APPROACH:
553
+ - This system can answer questions from ANY domain (tech, finance, health, education, general)
554
+ - Adapt your expertise to the query domain naturally
555
+ - Do NOT force medical framing on non-medical questions
556
+ - Only emphasize citations/guidelines when the query domain and role demand it
557
+ - Use appropriate terminology for the query's domain"""
558
+
559
+ # Build comprehensive prompt
560
+ prompt = f"""ROLE: {role_info['name']} {role_info['icon']}
561
+ {role_prompt}
562
+
563
+ {domain_agnostic}
564
 
565
+ QUERY: {query}
566
+ QUERY DOMAIN CONTEXT: {domain} (adapt your response appropriately)
567
+
568
+ RESPONSE GUIDELINES:
569
+ 1. **Role-Appropriate Depth:**
570
+ - {role}: Adjust response complexity for {role_info['name'].lower()} needs
571
+
572
+ 2. **Terminology Level:**
573
+ - Use language appropriate for {role_info['name'].lower()} understanding
574
+
575
+ 3. **Evidence Awareness:**
576
+ - { 'Reference evidence/guidelines when domain-appropriate' if role in ['clinician', 'doctor', 'researcher'] else 'Mention evidence when helpful, not required' }
577
+
578
+ 4. **Safety & Practicality:**
579
+ - { 'Include appropriate disclaimers' if role == 'patient' else 'Maintain professional standards' }
580
+
581
+ 5. **Response Structure:**
582
+ - Organize information logically for {role_info['name'].lower()} understanding
583
+ - Prioritize most relevant information first
584
+ - Keep response focused and actionable"""
585
+
586
+ # Add research context if we have papers
587
+ if papers_count > 0:
588
+ prompt += f"\n\nRESEARCH CONTEXT: Analyzing {papers_count} relevant sources"
589
+
590
+ # Add guideline context if available
591
  if guideline_info:
592
  if guideline_info.get('guidelines_found'):
593
+ prompt += f"\nGUIDELINES REFERENCED: {', '.join(guideline_info['guidelines_found'])}"
594
  if guideline_info.get('critical_missing'):
595
+ prompt += f"\nGUIDELINE GAPS: Missing {', '.join(guideline_info['critical_missing'][:2])}"
596
 
597
+ prompt += f"\n\nPlease provide a {role_info['name'].lower()}-appropriate response to: {query}"
598
+
599
+ return prompt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
 
601
 
602
  # ============================================================================
 
1315
  print(f"⚠️ LLM not available - using fallback mode: {e}")
1316
  self.llm = None
1317
 
1318
+ self.role_reasoning = RoleBasedReasoning() # NEW: Role-based reasoning
1319
  self.ranker = PaperRanker()
1320
  self.confidence_scorer = ConfidenceScorer()
1321
  self.context_detector = UserContextDetector()
 
1357
  else:
1358
  print(" 📄 Real paper fetching: DISABLED (using demo papers)")
1359
  print(" 📋 Guideline detection: ENABLED")
1360
+ print(" 👤 Role-based responses: ENABLED")
1361
 
1362
  def answer_research_question(self,
1363
  query: str,
 
1366
  use_memory: bool = True,
1367
  user_context: str = "auto",
1368
  use_fallback: bool = False,
1369
+ role: str = "general", # NEW: Explicit role parameter
1370
+ role_system_prompt: str = None, # NEW: Custom role prompt from frontend
1371
  **kwargs) -> Dict[str, Any]:
1372
+ """Answer medical research questions with role-based reasoning"""
1373
 
1374
  start_time = time.time()
1375
  self.metrics['total_queries'] += 1
 
1377
 
1378
  print(f"\n🔍 Processing query: '{query}'")
1379
  print(f" Domain: {domain}")
1380
+ print(f" Role: {role}")
1381
  print(f" Max papers: {max_papers}")
1382
  print(f" Real-time search: {self.use_real_time}")
1383
 
1384
  try:
1385
+ # Auto-detect user context if needed (backward compatibility)
1386
  if user_context == "auto":
1387
  user_context = self.context_detector.detect_context(query, domain)
1388
 
1389
  self.metrics['user_contexts'][user_context] += 1
1390
 
1391
+ # NEW: Check for simple queries first (greetings, basic questions)
1392
+ simple_response = self._handle_simple_query(query, domain, role)
1393
+ if simple_response:
1394
+ return simple_response
1395
+
1396
+ # Check if query requires research analysis
1397
+ requires_research = self._requires_research_analysis(query)
1398
+ if not requires_research:
1399
+ # For non-research queries, provide direct role-appropriate response
1400
+ return self._handle_direct_query(query, domain, role)
1401
+
1402
  # Retrieve papers using MedicalResearchEngine
1403
  print("📚 Retrieving relevant papers...")
1404
  papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)
1405
 
1406
  if not papers:
1407
  print("⚠️ No papers found, creating fallback response...")
1408
+ return self._create_no_results_response(query, domain, role)
1409
 
1410
  # Detect guideline citations
1411
  print("📋 Detecting guideline citations...")
 
1422
  })
1423
 
1424
  # Rank papers
1425
+ ranked_papers = self.ranker.rank_papers(papers, query, domain, role)
1426
  print(f"📊 Papers found: {len(ranked_papers)}")
1427
 
1428
  # Track paper sources
 
1442
 
1443
  # Calculate confidence with guideline consideration
1444
  confidence = self.confidence_scorer.calculate_confidence(
1445
+ ranked_papers, query, "summary", role, domain, guideline_info
1446
  )
1447
 
1448
+ # Generate analysis using role-based reasoning
1449
+ print("🧠 Generating role-based analysis...")
1450
+ analysis = self._generate_role_based_analysis(
1451
+ query, domain, role, ranked_papers, guideline_info, role_system_prompt
1452
  )
1453
 
1454
+ # Generate clinical bottom line with role awareness
1455
+ bottom_line = self._generate_role_bottom_line(
1456
+ query, domain, role, len(ranked_papers), real_papers, guideline_info
1457
  )
1458
 
1459
  # Synthesize final answer
1460
+ final_answer = self._synthesize_role_answer(
1461
+ query, domain, role, analysis, ranked_papers,
1462
  bottom_line, confidence, guideline_info
1463
  )
1464
 
1465
  # Update memory
1466
  if use_memory and self.memory:
1467
+ self._update_memory(query, final_answer, domain, role, ranked_papers, guideline_info)
1468
 
1469
  # Update metrics
1470
  response_time = time.time() - start_time
 
1485
  print(f"❌ Error in research analysis: {e}")
1486
  import traceback
1487
  traceback.print_exc()
1488
+ return self._create_error_response(query, domain, role, str(e))
1489
+
1490
+ def _handle_simple_query(self, query: str, domain: str, role: str) -> Optional[Dict[str, Any]]:
1491
+ """Handle simple queries like greetings with role-appropriate responses"""
1492
+ query_lower = query.lower().strip()
1493
+
1494
+ # Simple greetings
1495
+ simple_greetings = ['hi', 'hello', 'hey', 'greetings', 'good morning',
1496
+ 'good afternoon', 'good evening', 'howdy']
1497
+
1498
+ if query_lower in simple_greetings:
1499
+ print(" 👋 Detected simple greeting")
1500
+ return self._create_greeting_response(query, domain, role)
1501
+
1502
+ # Very short queries (1-2 words) that aren't research questions
1503
+ if len(query.split()) <= 2 and not self._looks_like_research_query(query):
1504
+ print(" 💬 Detected simple query")
1505
+ return self._create_simple_response(query, domain, role)
1506
+
1507
+ return None
1508
+
1509
+ def _looks_like_research_query(self, query: str) -> bool:
1510
+ """Check if query looks like a research question"""
1511
+ query_lower = query.lower()
1512
+
1513
+ # Research question indicators
1514
+ research_indicators = [
1515
+ 'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis',
1516
+ 'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical',
1517
+ 'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis',
1518
+ 'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk',
1519
+ 'benefit', 'recommendation', 'guideline', 'standard', 'protocol'
1520
+ ]
1521
+
1522
+ # Check if query contains research indicators
1523
+ for indicator in research_indicators:
1524
+ if indicator in query_lower:
1525
+ return True
1526
+
1527
+ # Check question words
1528
+ question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who']
1529
+ if any(query_lower.startswith(word) for word in question_words):
1530
+ # Check if it's a complex question (more than basic)
1531
+ if len(query.split()) > 3:
1532
+ return True
1533
+
1534
+ return False
1535
+
1536
+ def _requires_research_analysis(self, query: str) -> bool:
1537
+ """Determine if query requires full research analysis"""
1538
+ query_lower = query.lower().strip()
1539
+
1540
+ # Definitely simple queries
1541
+ simple_patterns = [
1542
+ r'^hi$', r'^hello$', r'^hey$', r'^greetings$',
1543
+ r'^good morning$', r'^good afternoon$', r'^good evening$',
1544
+ r'^how are you$', r"^what's up$", r'^sup$',
1545
+ r'^thanks$', r'^thank you$', r'^bye$', r'^goodbye$'
1546
+ ]
1547
+
1548
+ for pattern in simple_patterns:
1549
+ if re.match(pattern, query_lower):
1550
+ return False
1551
+
1552
+ # Check if it's a substantive question
1553
+ if len(query.split()) <= 2 and not self._looks_like_research_query(query):
1554
+ return False
1555
+
1556
+ return True
1557
+
1558
+ def _create_greeting_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
1559
+ """Create role-appropriate greeting response"""
1560
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
1561
+
1562
+ # Role-specific greetings
1563
+ greetings = {
1564
+ 'patient': "👋 Hello! I'm here to help you understand health topics in simple, clear terms. What would you like to know?",
1565
+ 'student': "👋 Hi there! I'm here to help you learn about medical topics. What are you curious about today?",
1566
+ 'clinician': "👋 Hello. I'm ready to assist with evidence-based medical information. How can I help you today?",
1567
+ 'doctor': "👋 Hello, doctor. I'm available to discuss clinical questions and evidence. What would you like to explore?",
1568
+ 'researcher': "👋 Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?",
1569
+ 'professor': "👋 Hello. I can assist with academic discussions and evidence synthesis. What topic interests you?",
1570
+ 'pharmacist': "👋 Hello. I can help with medication-related questions and information. How can I assist you today?",
1571
+ 'general': "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?"
1572
+ }
1573
+
1574
+ greeting = greetings.get(role, greetings['general'])
1575
+
1576
+ if CONFIG_AVAILABLE:
1577
+ try:
1578
+ domain_info = {
1579
+ 'name': get_domain_display_name(domain),
1580
+ 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
1581
+ }
1582
+ except:
1583
+ domain_info = DOMAIN_INFO.get(domain, {
1584
+ 'name': domain.replace('_', ' ').title(),
1585
+ 'icon': '⚕️'
1586
+ })
1587
+ else:
1588
+ domain_info = DOMAIN_INFO.get(domain, {
1589
+ 'name': domain.replace('_', ' ').title(),
1590
+ 'icon': '⚕️'
1591
+ })
1592
+
1593
+ answer = f"""# {greeting}
1594
+
1595
+ **Role:** {role_info['name']} {role_info['icon']}
1596
+ **Domain:** {domain_info['name']} {domain_info.get('icon', '')}
1597
+
1598
+ Feel free to ask me anything! I'll provide information tailored to your needs as a {role_info['name'].lower()}."""
1599
+
1600
+ return {
1601
+ "query": query,
1602
+ "domain": domain,
1603
+ "domain_info": domain_info,
1604
+ "user_context": role,
1605
+ "user_context_info": role_info,
1606
+ "answer": answer,
1607
+ "analysis": greeting,
1608
+ "bottom_line": greeting,
1609
+ "papers_used": 0,
1610
+ "real_papers_used": 0,
1611
+ "demo_papers_used": 0,
1612
+ "confidence_score": {
1613
+ 'overall_score': 95.0,
1614
+ 'level': 'HIGH 🟢',
1615
+ 'explanation': 'Simple greeting response'
1616
+ },
1617
+ "guideline_info": None,
1618
+ "reasoning_method": "greeting",
1619
+ "real_time_search": self.use_real_time,
1620
+ "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
1621
+ "metrics": {
1622
+ 'response_time': time.time(),
1623
+ 'papers_analyzed': 0,
1624
+ 'domain': domain,
1625
+ 'user_context': role
1626
+ }
1627
+ }
1628
+
1629
+ def _create_simple_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
1630
+ """Create role-appropriate response for simple queries"""
1631
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
1632
+
1633
+ # Generate simple, direct response
1634
+ simple_responses = {
1635
+ 'patient': f"I'd be happy to help with '{query}'. Could you tell me a bit more about what you're looking for?",
1636
+ 'student': f"That's an interesting topic! To help you best, could you provide more details about what you'd like to know regarding '{query}'?",
1637
+ 'clinician': f"Regarding '{query}', I can provide evidence-based information. Please share more specifics about your clinical question.",
1638
+ 'doctor': f"For '{query}', I can offer medical information. Could you elaborate on the clinical context or specific aspects you're interested in?",
1639
+ 'researcher': f"On the topic of '{query}', I can discuss research perspectives. What specific aspect would you like to explore?",
1640
+ 'professor': f"Regarding '{query}', I can provide academic perspectives. What particular angle or detail would you like to discuss?",
1641
+ 'pharmacist': f"About '{query}', I can offer medication-related information. Could you specify what you'd like to know?",
1642
+ 'general': f"I can help with information about '{query}'. Could you provide more details about what specifically you're interested in?"
1643
+ }
1644
+
1645
+ response = simple_responses.get(role, simple_responses['general'])
1646
+
1647
+ if CONFIG_AVAILABLE:
1648
+ try:
1649
+ domain_info = {
1650
+ 'name': get_domain_display_name(domain),
1651
+ 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
1652
+ }
1653
+ except:
1654
+ domain_info = DOMAIN_INFO.get(domain, {
1655
+ 'name': domain.replace('_', ' ').title(),
1656
+ 'icon': '⚕️'
1657
+ })
1658
+ else:
1659
+ domain_info = DOMAIN_INFO.get(domain, {
1660
+ 'name': domain.replace('_', ' ').title(),
1661
+ 'icon': '⚕️'
1662
+ })
1663
+
1664
+ answer = f"""# 💬 **Response**
1665
+
1666
+ **Role:** {role_info['name']} {role_info['icon']}
1667
+ **Domain:** {domain_info['name']} {domain_info.get('icon', '')}
1668
+
1669
+ {response}
1670
+
1671
+ *Tip: For more detailed information, try asking a more specific question!*"""
1672
+
1673
+ return {
1674
+ "query": query,
1675
+ "domain": domain,
1676
+ "domain_info": domain_info,
1677
+ "user_context": role,
1678
+ "user_context_info": role_info,
1679
+ "answer": answer,
1680
+ "analysis": response,
1681
+ "bottom_line": response,
1682
+ "papers_used": 0,
1683
+ "real_papers_used": 0,
1684
+ "demo_papers_used": 0,
1685
+ "confidence_score": {
1686
+ 'overall_score': 85.0,
1687
+ 'level': 'HIGH 🟢',
1688
+ 'explanation': 'Simple query response'
1689
+ },
1690
+ "guideline_info": None,
1691
+ "reasoning_method": "simple_response",
1692
+ "real_time_search": self.use_real_time,
1693
+ "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
1694
+ "metrics": {
1695
+ 'response_time': time.time(),
1696
+ 'papers_analyzed': 0,
1697
+ 'domain': domain,
1698
+ 'user_context': role
1699
+ }
1700
+ }
1701
+
1702
+ def _handle_direct_query(self, query: str, domain: str, role: str) -> Dict[str, Any]:
1703
+ """Handle direct queries without research papers"""
1704
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
1705
+
1706
+ # Use LLM for direct response if available
1707
+ if self.llm:
1708
+ try:
1709
+ prompt = self.role_reasoning.create_role_prompt(query, domain, role, 0, None)
1710
+
1711
+ response = self.llm.generate(
1712
+ prompt,
1713
+ system_message=f"You are assisting a {role_info['name'].lower()}. Provide helpful, accurate information.",
1714
+ max_tokens=1000
1715
+ )
1716
+
1717
+ # Clean up response
1718
+ response = response.strip()
1719
+ if not response:
1720
+ response = f"I'd be happy to help with '{query}'. Could you provide more details about what specifically you're looking for?"
1721
+
1722
+ except Exception as e:
1723
+ print(f"⚠️ LLM direct response failed: {e}")
1724
+ response = f"I can help with information about '{query}'. Please feel free to ask more specific questions!"
1725
+ else:
1726
+ response = f"I'd be happy to discuss '{query}'. What specific aspect would you like to know more about?"
1727
+
1728
+ if CONFIG_AVAILABLE:
1729
+ try:
1730
+ domain_info = {
1731
+ 'name': get_domain_display_name(domain),
1732
+ 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
1733
+ }
1734
+ except:
1735
+ domain_info = DOMAIN_INFO.get(domain, {
1736
+ 'name': domain.replace('_', ' ').title(),
1737
+ 'icon': '⚕️'
1738
+ })
1739
+ else:
1740
+ domain_info = DOMAIN_INFO.get(domain, {
1741
+ 'name': domain.replace('_', ' ').title(),
1742
+ 'icon': '⚕️'
1743
+ })
1744
+
1745
+ answer = f"""# 💬 **Response**
1746
+
1747
+ **Role:** {role_info['name']} {role_info['icon']}
1748
+ **Domain:** {domain_info['name']} {domain_info.get('icon', '')}
1749
+
1750
+ {response}
1751
+
1752
+ *Note: This is a direct response. For evidence-based research analysis with papers, please ask a more specific research question.*"""
1753
+
1754
+ return {
1755
+ "query": query,
1756
+ "domain": domain,
1757
+ "domain_info": domain_info,
1758
+ "user_context": role,
1759
+ "user_context_info": role_info,
1760
+ "answer": answer,
1761
+ "analysis": response,
1762
+ "bottom_line": response,
1763
+ "papers_used": 0,
1764
+ "real_papers_used": 0,
1765
+ "demo_papers_used": 0,
1766
+ "confidence_score": {
1767
+ 'overall_score': 80.0,
1768
+ 'level': 'HIGH 🟢',
1769
+ 'explanation': 'Direct query response without papers'
1770
+ },
1771
+ "guideline_info": None,
1772
+ "reasoning_method": "direct_response",
1773
+ "real_time_search": self.use_real_time,
1774
+ "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
1775
+ "metrics": {
1776
+ 'response_time': time.time(),
1777
+ 'papers_analyzed': 0,
1778
+ 'domain': domain,
1779
+ 'user_context': role
1780
+ }
1781
+ }
1782
 
1783
  def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
1784
  use_fallback: bool = False) -> List[Dict]:
 
1976
 
1977
  return papers
1978
 
1979
+ def _generate_role_based_analysis(self, query: str, domain: str, role: str,
1980
+ papers: List[Dict], guideline_info: Dict = None,
1981
+ custom_role_prompt: str = None) -> str:
1982
+ """Generate role-based analysis using LLM if available"""
1983
 
1984
  if not self.llm:
1985
+ return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
1986
 
1987
+ # Create role-based prompt
1988
+ prompt = self.role_reasoning.create_role_prompt(
1989
+ query, domain, role, len(papers), guideline_info
1990
  )
1991
 
1992
+ # Add paper information for research context
1993
+ if papers:
1994
+ paper_info = "\n".join([
1995
+ f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})"
1996
+ for i, p in enumerate(papers[:3])
1997
+ ])
1998
+ prompt += f"\n\n**Relevant Sources:**\n{paper_info}"
1999
 
2000
  # Add demo paper disclaimer if any demo papers
2001
  demo_count = sum(1 for p in papers if p.get('is_demo', False))
2002
  if demo_count > 0:
2003
+ prompt += f"\n\nNote: {demo_count} illustrative examples included for context."
 
 
 
 
 
 
 
 
 
 
2004
 
2005
  try:
2006
+ # Use custom role prompt if provided, otherwise use default
2007
+ system_message = custom_role_prompt if custom_role_prompt else f"You are assisting a {role}. Provide helpful, accurate information."
2008
+
2009
  analysis = self.llm.generate(
2010
+ prompt,
2011
+ system_message=system_message,
2012
+ max_tokens=2000
2013
  )
2014
  return analysis
2015
  except Exception as e:
2016
+ print(f"⚠️ LLM role-based analysis failed: {e}")
2017
+ return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
2018
 
2019
+ def _create_fallback_role_analysis(self, query: str, domain: str, role: str,
2020
+ papers: List[Dict], guideline_info: Dict = None) -> str:
2021
  """Create fallback analysis when LLM is unavailable"""
2022
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
2023
+
2024
  if CONFIG_AVAILABLE:
2025
  try:
2026
  domain_name = get_domain_display_name(domain)
 
2033
  real_papers = [p for p in papers if not p.get('is_demo', False)]
2034
  demo_papers = [p for p in papers if p.get('is_demo', False)]
2035
 
2036
+ analysis = f"""**{role_info['name']}-Focused Analysis**
 
 
2037
  **Query:** {query}
2038
+ **Domain Context:** {domain_name}
2039
+ **Role Perspective:** {role_info['name']}"""
2040
 
2041
  # Add guideline information
2042
  if guideline_info:
 
2046
  analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"
2047
 
2048
  analysis += f"""
2049
+ **Key Information for {role_info['name']}:**
2050
+ Based on analysis of {len(papers)} relevant sources ({len(real_papers)} real, {len(demo_papers)} illustrative):
 
2051
 
2052
+ 1. **{role_info['name']}-Relevant Insights:**
2053
+ - Information tailored to {role_info['name'].lower()} needs and perspective
2054
+ - Practical implications for {role_info['name'].lower()} context
2055
+ - Actionable takeaways appropriate for this role
 
2056
 
2057
+ 2. **Domain Context:**
2058
+ - Considerations specific to {domain_name}
2059
+ - Relevant standards and approaches in this field
2060
+ - Important context for application
2061
+
2062
+ 3. **Evidence Considerations:**
2063
+ - {len(papers)} sources analyzed
2064
+ - Quality and relevance assessed for {role_info['name'].lower()} needs
2065
+ - {"Guideline awareness as noted above" if guideline_info else "Standard evidence considerations"}
2066
+
2067
+ **Recommendations for {role_info['name']}:**
2068
+ - Apply information within {role_info['name'].lower()} role context
2069
+ - Consider individual circumstances and specific needs
2070
+ - {"Consult referenced guidelines as appropriate" if guideline_info and guideline_info.get('guidelines_found') else "Reference standard practices"}
2071
+ - Seek additional information for specific cases
2072
+ - Integrate with professional judgment and experience
2073
+
2074
+ *Note: This analysis is tailored for {role_info['name'].lower()} perspective. For other perspectives, different considerations may apply.*"""
2075
 
2076
  if demo_papers:
2077
+ analysis += f"\n\n*Includes {len(demo_papers)} illustrative examples for comprehensive analysis.*"
2078
 
2079
  return analysis
2080
 
2081
+ def _generate_role_bottom_line(self, query: str, domain: str, role: str,
2082
+ papers_count: int, real_papers_count: int,
2083
+ guideline_info: Dict = None) -> str:
2084
+ """Generate role-appropriate bottom line"""
2085
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
2086
+
2087
  if CONFIG_AVAILABLE:
2088
  try:
2089
  domain_name = get_domain_display_name(domain)
 
2092
  else:
2093
  domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
2094
 
2095
+ bottom_line = f"""**Bottom Line for {role_info['name']}:**
2096
+ Based on {papers_count} sources in {domain_name} ({real_papers_count} real sources), here are the key takeaways for {role_info['name'].lower()} perspective."""
2097
 
2098
  # Add guideline-specific bottom line
2099
  if guideline_info:
2100
  if guideline_info.get('guidelines_found'):
2101
+ bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced."
2102
 
2103
  if guideline_info.get('critical_missing'):
2104
+ missing_list = ', '.join(guideline_info['critical_missing'][:2])
2105
+ bottom_line += f"\n**Consider:** Missing explicit guideline citations for {missing_list}."
2106
 
2107
  coverage = guideline_info.get('coverage_percentage', 0)
2108
  if coverage < 50:
2109
+ bottom_line += f"\n**Evidence Note:** Guideline coverage is limited."
2110
 
2111
  bottom_line += f"""
2112
+ **{role_info['name']}-Specific Considerations:**
2113
+ - Information tailored to {role_info['name'].lower()} role and needs
2114
+ - Practical application within {role_info['name'].lower()} context
2115
+ - Integration with {role_info['name'].lower()} knowledge and experience
2116
+ - {"Guideline-aware decision making" if guideline_info else "Evidence-informed approach"}
2117
+ - Consideration of specific circumstances and constraints"""
2118
 
2119
  if papers_count > real_papers_count:
2120
+ bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for context.*"
2121
 
2122
  return bottom_line
2123
 
2124
+ def _synthesize_role_answer(self, query: str, domain: str, role: str,
2125
+ analysis: str, papers: List[Dict],
2126
+ bottom_line: str, confidence: Dict[str, Any],
2127
+ guideline_info: Dict = None) -> Dict[str, Any]:
2128
+ """Synthesize final answer with role information"""
2129
 
2130
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
2131
+
2132
  if CONFIG_AVAILABLE:
2133
  try:
2134
  domain_info = {
 
2149
  'description': f'Research in {domain.replace("_", " ")}'
2150
  })
2151
 
 
 
 
 
 
2152
  # Count real vs demo papers
2153
  real_papers = [p for p in papers if not p.get('is_demo', False)]
2154
  demo_papers = [p for p in papers if p.get('is_demo', False)]
2155
 
2156
+ # Format paper citations
2157
  paper_citations = []
2158
+ for i, paper in enumerate(papers[:5], 1):
2159
  title = paper.get('title', 'Untitled')
2160
  authors = paper.get('authors', [])
2161
  year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
 
2164
  is_demo = paper.get('is_demo', False)
2165
  is_preprint = paper.get('is_preprint', False)
2166
 
 
 
 
 
 
2167
  # Format authors
2168
  if authors and isinstance(authors, list) and len(authors) > 0:
2169
  if len(authors) == 1:
 
2180
 
2181
  demo_indicator = "📄 " if is_demo else ""
2182
  preprint_indicator = "⚡ " if is_preprint else ""
 
2183
 
2184
  if author_str and year:
2185
+ citation += f"\n {demo_indicator}{preprint_indicator}*{author_str} ({year})*"
2186
  elif author_str:
2187
+ citation += f"\n {demo_indicator}{preprint_indicator}*{author_str}*"
2188
  else:
2189
+ citation += f"\n {demo_indicator}{preprint_indicator}*Unknown authors*"
2190
 
2191
  if journal:
2192
  citation += f"\n Journal: {journal}"
2193
  elif source and source != 'unknown':
2194
  citation += f"\n Source: {source}"
2195
 
 
 
 
2196
  paper_citations.append(citation)
2197
 
2198
  # Build guideline summary section
 
2203
  if guideline_info.get('guidelines_found'):
2204
  guideline_summary += f"**✅ Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n"
2205
 
 
 
 
 
 
 
 
2206
  if guideline_info.get('critical_missing'):
2207
  missing_list = ', '.join(guideline_info['critical_missing'])
2208
  guideline_summary += f"**⚠️ Missing Guideline Citations:** {missing_list}\n\n"
 
2210
  guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n"
2211
 
2212
  # Build answer
2213
+ answer = f"""# 🔬 **{role_info['name']}-Focused Analysis**
2214
+ **Role:** {role_info['name']} {role_info['icon']}
2215
  **Domain:** {domain_info['name']} {domain_info.get('icon', '')}
 
2216
  **Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100)
2217
+ **Sources Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)
2218
  ---
2219
  ## 📋 **Executive Summary**
2220
  {bottom_line}
 
2223
  {analysis}
2224
  ---
2225
  ## 📊 **Supporting Evidence**
2226
+ {chr(10).join(paper_citations) if paper_citations else "*No papers cited for this simple query*"}
2227
  ---
2228
+ ## 🎯 **Key Takeaways for {role_info['name']}**
2229
+ 1. Role-appropriate information and insights
2230
  2. Domain-specific considerations for {domain_info['name'].lower()}
2231
+ 3. Practical implications tailored to {role_info['name'].lower()} needs
2232
+ 4. {"Guideline-aware recommendations" if guideline_info else "Evidence-informed approach"}
2233
+ *Analysis performed with {role_info['name'].lower()}-focused reasoning*
2234
  *Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*"""
2235
 
2236
  return {
2237
  "query": query,
2238
  "domain": domain,
2239
  "domain_info": domain_info,
2240
+ "user_context": role,
2241
+ "user_context_info": role_info,
2242
  "answer": answer,
2243
  "analysis": analysis,
2244
  "bottom_line": bottom_line,
 
2247
  "demo_papers_used": len(demo_papers),
2248
  "confidence_score": confidence,
2249
  "guideline_info": guideline_info,
2250
+ "reasoning_method": "role_based",
2251
  "real_time_search": self.use_real_time,
2252
  "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
2253
  "metrics": {
2254
  'response_time': time.time(),
2255
  'papers_analyzed': len(papers),
2256
  'domain': domain,
2257
+ 'user_context': role
2258
  }
2259
  }
2260
 
2261
  def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
2262
+ role: str, papers: List[Dict], guideline_info: Dict = None):
2263
+ """Update conversation memory with role info"""
2264
  if not self.memory:
2265
  return
2266
 
2267
  memory_data = {
2268
  'query': query,
2269
  'domain': domain,
2270
+ 'role': role,
2271
  'papers_used': len(papers),
2272
  'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
2273
  'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
 
2287
  metadata=memory_data
2288
  )
2289
 
2290
+ def _create_no_results_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
2291
  """Create response when no papers are found"""
2292
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
2293
+
2294
  if CONFIG_AVAILABLE:
2295
  try:
2296
  domain_info = {
 
2309
  })
2310
 
2311
  answer = f"""# 🔍 **Limited Research Found**
2312
+
2313
+ **Role:** {role_info['name']} {role_info['icon']}
2314
  **Domain:** {domain_info['name']}
2315
+
2316
+ **Query:** {query}
2317
+
2318
+ **Suggestions for {role_info['name']}:**
2319
  1. Try broadening your search terms
2320
+ 2. Consider related topics in {domain_info['name']}
2321
+ 3. Check spelling of technical terms
2322
  4. Try a more general domain selection
2323
+
2324
+ **For Role-Appropriate Information:**
2325
+ - Ask more general questions about the topic
2326
+ - Request explanations of concepts
2327
+ - Inquire about standard approaches or practices
2328
+ - Seek practical guidance rather than specific research
2329
+
2330
+ **Example {role_info['name'].lower()}-appropriate queries:**
2331
+ - "Basic explanation of [topic] for {role_info['name'].lower()}"
2332
+ - "Standard approaches to [issue]"
2333
+ - "Practical guidance for [situation]"
2334
+ - "Key concepts about [subject]"
2335
+
2336
+ *Note: Some specialized topics may have limited published research. I can still provide general information and guidance tailored to your role.*"""
2337
 
2338
  return {
2339
  "query": query,
2340
  "domain": domain,
2341
  "domain_info": domain_info,
2342
+ "user_context": role,
2343
+ "user_context_info": role_info,
2344
  "answer": answer,
2345
  "papers_used": 0,
2346
  "real_papers_used": 0,
 
2354
  "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
2355
  }
2356
 
2357
+ def _create_error_response(self, query: str, domain: str, role: str, error: str) -> Dict[str, Any]:
2358
  """Create error response"""
2359
+ role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
2360
+
2361
  if CONFIG_AVAILABLE:
2362
  try:
2363
  domain_info = {
 
2376
  })
2377
 
2378
  answer = f"""# 🚨 **Analysis Error**
2379
+
2380
+ **Role:** {role_info['name']} {role_info['icon']}
2381
  **Domain:** {domain_info['name']}
2382
+
2383
+ **Query:** {query}
2384
  **Error:** {error}
2385
+
2386
+ **Troubleshooting for {role_info['name']}:**
2387
  1. Check your internet connection
2388
+ 2. Try a simpler query or rephrase
2389
+ 3. Verify domain selection is appropriate
2390
  4. Contact support if problem persists
2391
+
2392
+ **For Role-Appropriate Alternatives:**
2393
+ - Ask a simpler version of your question
2394
+ - Request general information instead of specific research
2395
+ - Try breaking complex questions into smaller parts
2396
+ - Use more common terminology
2397
+
2398
+ Please try again or reformulate your question for {role_info['name'].lower()}-appropriate assistance."""
2399
 
2400
  return {
2401
  "query": query,
2402
  "domain": domain,
2403
  "domain_info": domain_info,
2404
+ "user_context": role,
2405
+ "user_context_info": role_info,
2406
  "answer": answer,
2407
  "papers_used": 0,
2408
  "real_papers_used": 0,
 
2496
  }
2497
 
2498
  def get_engine_status(self) -> Dict[str, Any]:
2499
+ """Get engine status with role metrics"""
2500
  # Calculate average guideline coverage
2501
  avg_guideline_coverage = 0
2502
  if self.metrics['guideline_coverage']:
 
2505
 
2506
  return {
2507
  "engine_name": "Medical Research RAG Engine",
2508
+ "version": "2.2.0",
2509
  "model": self.model if hasattr(self, 'model') else "Unknown",
2510
+ "features": ["role_based_reasoning", "real_paper_fetching",
2511
+ "confidence_scoring", "guideline_detection", "simple_query_handling"],
2512
  "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
2513
  "real_time_search": self.use_real_time,
2514
+ "roles_supported": list(RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.keys()),
2515
  "guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
2516
  "metrics": {
2517
  "total_queries": self.metrics['total_queries'],
 
2523
  "demo_papers_used": self.metrics['demo_papers_used']
2524
  },
2525
  "domains_supported": len(DOMAIN_INFO),
2526
+ "simple_query_handling": "ENABLED"
2527
  }
2528
 
2529
  def clear_memory(self):
 
2539
  # TEST FUNCTION
2540
  # ============================================================================
2541
 
2542
+ def test_role_based_rag_engine():
2543
+ """Test the medical RAG engine with role-based responses"""
2544
  print("\n" + "=" * 60)
2545
+ print("🧪 TESTING ROLE-BASED RAG ENGINE")
2546
  print("=" * 60)
2547
 
2548
  try:
2549
  # Initialize engine
2550
  engine = EnhancedRAGEngine(
2551
+ session_id="role_test",
2552
  model="gpt-oss-120b",
2553
+ use_real_time=False # Disable real-time for faster testing
2554
  )
2555
 
2556
+ # Test queries with different roles
2557
  test_cases = [
2558
  {
2559
+ "query": "hi",
2560
+ "domain": "general_medical",
2561
+ "role": "patient"
2562
  },
2563
  {
2564
+ "query": "hello",
2565
+ "domain": "cardiology",
2566
+ "role": "doctor"
2567
+ },
2568
+ {
2569
+ "query": "hey",
2570
  "domain": "endocrinology",
2571
+ "role": "student"
2572
  },
2573
  {
2574
+ "query": "Compare first-line antibiotics for community-acquired pneumonia",
2575
+ "domain": "infectious_disease",
2576
+ "role": "clinician"
2577
  }
2578
  ]
2579
 
2580
+ for i, test_case in enumerate(test_cases, 1):
2581
  print(f"\n📝 Test Case {i}:")
2582
  print(f" Query: '{test_case['query']}'")
2583
  print(f" Domain: {test_case['domain']}")
2584
+ print(f" Role: {test_case['role']}")
2585
 
2586
  # Process query
2587
  response = engine.answer_research_question(
2588
  query=test_case['query'],
2589
  domain=test_case['domain'],
2590
+ max_papers=5,
2591
+ role=test_case['role'],
2592
  use_fallback=True
2593
  )
2594
 
2595
  if response and 'error' not in response:
2596
  print(f"\n✅ Test Successful!")
2597
+ print(f" Response type: {response.get('reasoning_method', 'unknown')}")
2598
  print(f" Papers used: {response.get('papers_used', 0)}")
 
2599
  print(f" Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")
2600
 
2601
+ # Check if it's a simple response
2602
+ if response.get('reasoning_method') in ['greeting', 'simple_response', 'direct_response']:
2603
+ print(f" ⭐ Simple query handled appropriately!")
 
 
 
 
2604
 
2605
  # Show engine status
2606
  status = engine.get_engine_status()
2607
  print(f"\n🔧 Engine Status:")
2608
+ print(f" Role-based responses: ENABLED")
2609
+ print(f" Simple query handling: ENABLED")
2610
+ print(f" Roles supported: {len(status['roles_supported'])}")
2611
  print(f" Total queries: {status['metrics']['total_queries']}")
2612
 
2613
  return True
 
2621
 
2622
  if __name__ == "__main__":
2623
  # Run test
2624
+ test_result = test_role_based_rag_engine()
2625
 
2626
  if test_result:
2627
  print(f"\n{'=' * 60}")
2628
+ print("🎉 ROLE-BASED RAG ENGINE TEST COMPLETE!")
2629
+ print(" Role-based reasoning: ✓")
2630
+ print(" Simple query handling: ✓")
2631
+ print(" Domain-agnostic approach: ✓")
2632
  print(" Guideline detection: ✓")
 
2633
  print(f"{'=' * 60}")
2634
  else:
2635
  print("\n❌ Engine test failed")