Spaces:

paulhemb
/

MedSearchPro

Running

App Files Files Community

paulhemb commited on Jan 26

Commit

9760410

verified ·

1 Parent(s): 72e921c

Update chat/rag_engine.py

Browse files

improvements done on rag_engine.py

Files changed (1) hide show

chat/rag_engine.py +720 -255

chat/rag_engine.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
 rag_engine.py - Production-Ready Medical RAG Engine
-Updated with explicit guideline citation detection
 """
 from typing import List, Dict, Any, Optional, Tuple
@@ -385,65 +385,218 @@ except ImportError:
 # ============================================================================
-# EVIDENCE-BASED REASONING FOR MEDICAL RESEARCH
 # ============================================================================
-class EvidenceBasedReasoning:
-    """Single reasoning technique focused on evidence-based medical analysis"""
     @staticmethod
-    def create_analysis_prompt(query: str, domain: str, user_context: str,
-                               papers_count: int = 0, guideline_info: Dict = None) -> str:
-        """Create evidence-based reasoning prompt with guideline awareness"""
-        # Map user context to focus
-        context_focus = {
-            "clinician": "clinical application, treatment decisions, patient management, guideline adherence",
-            "researcher": "methodology, evidence quality, research implications, guideline gaps",
-            "student": "understanding concepts, foundational knowledge, guideline-based learning",
-            "patient": "personal implications, practical next steps, guideline-concordant care",
-            "administrator": "implementation, resources, systemic considerations, guideline compliance",
-            "general": "clear explanations, balanced overview, guideline context"
-        }
-        focus = context_focus.get(user_context, "evidence-based medical insights")
-        # Add guideline-specific instructions
-        guideline_context = ""
         if guideline_info:
             if guideline_info.get('guidelines_found'):
-                guideline_context = f"\n**Guidelines Referenced:** Papers cite {', '.join(guideline_info['guidelines_found'])} guidelines."
             if guideline_info.get('critical_missing'):
-                guideline_context += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'])} guidelines."
-        return f"""You are a medical research expert specializing in {domain}.
-The user is a {user_context}, so focus on {focus}.
-QUERY: {query}
-DOMAIN: {domain}
-PAPERS ANALYZED: {papers_count}
-{guideline_context}
-**Perform Evidence-Based Analysis:**
-1. **Evidence Assessment:**
-   - What types of studies exist on this topic?
-   - What is the quality and strength of evidence?
-   - Are there systematic reviews or clinical trials?
-   - How does the evidence align with current guidelines?
-2. **Domain-Specific Analysis:**
-   - How does this apply specifically to {domain}?
-   - What are the standard approaches in this field?
-   - What innovations or recent advances exist?
-   - How do findings compare to guideline recommendations?
-3. **Critical Evaluation:**
-   - What are the strengths of current evidence?
-   - What limitations or knowledge gaps exist?
-   - Are there any controversies or alternative views?
-   - How complete is guideline coverage?
-4. **Practical Implications:**
-   - What are the actionable insights for {user_context}?
-   - What are the recommendations or next steps?
-   - How should this evidence be applied in practice?
-   - What guideline considerations are important?
-Provide a comprehensive, evidence-based answer that synthesizes medical knowledge with practical implications and guideline awareness."""
 # ============================================================================
@@ -1162,7 +1315,7 @@ class EnhancedRAGEngine:
             print(f"⚠️  LLM not available - using fallback mode: {e}")
             self.llm = None
-        self.reasoning = EvidenceBasedReasoning()
         self.ranker = PaperRanker()
         self.confidence_scorer = ConfidenceScorer()
         self.context_detector = UserContextDetector()
@@ -1204,6 +1357,7 @@ class EnhancedRAGEngine:
         else:
             print("   📄 Real paper fetching: DISABLED (using demo papers)")
         print("   📋 Guideline detection: ENABLED")
     def answer_research_question(self,
                                  query: str,
@@ -1212,8 +1366,10 @@ class EnhancedRAGEngine:
                                  use_memory: bool = True,
                                  user_context: str = "auto",
                                  use_fallback: bool = False,
                                  **kwargs) -> Dict[str, Any]:
-        """Answer medical research questions with evidence-based reasoning and guideline detection"""
         start_time = time.time()
         self.metrics['total_queries'] += 1
@@ -1221,23 +1377,35 @@ class EnhancedRAGEngine:
         print(f"\n🔍 Processing query: '{query}'")
         print(f"   Domain: {domain}")
         print(f"   Max papers: {max_papers}")
         print(f"   Real-time search: {self.use_real_time}")
         try:
-            # Auto-detect user context if needed
             if user_context == "auto":
                 user_context = self.context_detector.detect_context(query, domain)
             self.metrics['user_contexts'][user_context] += 1
             # Retrieve papers using MedicalResearchEngine
             print("📚 Retrieving relevant papers...")
             papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)
             if not papers:
                 print("⚠️  No papers found, creating fallback response...")
-                return self._create_no_results_response(query, domain, user_context)
             # Detect guideline citations
             print("📋 Detecting guideline citations...")
@@ -1254,7 +1422,7 @@ class EnhancedRAGEngine:
                 })
             # Rank papers
-            ranked_papers = self.ranker.rank_papers(papers, query, domain, user_context)
             print(f"📊 Papers found: {len(ranked_papers)}")
             # Track paper sources
@@ -1274,29 +1442,29 @@ class EnhancedRAGEngine:
             # Calculate confidence with guideline consideration
             confidence = self.confidence_scorer.calculate_confidence(
-                ranked_papers, query, "summary", user_context, domain, guideline_info
             )
-            # Generate analysis using evidence-based reasoning with guideline context
-            print("🧠 Generating evidence-based analysis...")
-            analysis = self._generate_analysis(
-                query, domain, user_context, ranked_papers, guideline_info
             )
-            # Generate clinical bottom line with guideline awareness
-            bottom_line = self._generate_bottom_line(
-                query, domain, user_context, len(ranked_papers), real_papers, guideline_info
             )
             # Synthesize final answer
-            final_answer = self._synthesize_answer(
-                query, domain, user_context, analysis, ranked_papers,
                 bottom_line, confidence, guideline_info
             )
             # Update memory
             if use_memory and self.memory:
-                self._update_memory(query, final_answer, domain, user_context, ranked_papers, guideline_info)
             # Update metrics
             response_time = time.time() - start_time
@@ -1317,7 +1485,300 @@ class EnhancedRAGEngine:
             print(f"❌ Error in research analysis: {e}")
             import traceback
             traceback.print_exc()
-            return self._create_error_response(query, domain, user_context, str(e))
     def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
                               use_fallback: bool = False) -> List[Dict]:
@@ -1515,53 +1976,51 @@ class EnhancedRAGEngine:
         return papers
-    def _generate_analysis(self, query: str, domain: str, user_context: str,
-                           papers: List[Dict], guideline_info: Dict = None) -> str:
-        """Generate evidence-based analysis with guideline context"""
         if not self.llm:
-            return self._create_fallback_analysis(query, domain, user_context, papers, guideline_info)
-        # Create reasoning prompt with guideline information
-        prompt = self.reasoning.create_analysis_prompt(
-            query, domain, user_context, len(papers), guideline_info
         )
-        # Add paper information
-        paper_info = "\n".join([
-            f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})"
-            for i, p in enumerate(papers[:5])
-        ])
         # Add demo paper disclaimer if any demo papers
         demo_count = sum(1 for p in papers if p.get('is_demo', False))
         if demo_count > 0:
-            prompt += f"\n\nNote: {demo_count} of the papers are illustrative examples."
-        # Add guideline details if available
-        if guideline_info:
-            if guideline_info.get('guidelines_found'):
-                prompt += f"\n\nGuideline Context: Papers reference {len(guideline_info['guidelines_found'])} major guidelines."
-            if guideline_info.get('critical_missing'):
-                missing_guidelines = ', '.join(guideline_info['critical_missing'][:3])
-                prompt += f"\nGuideline Gap: Missing explicit citations for {missing_guidelines} guidelines."
-        full_prompt = f"{prompt}\n\n**Relevant Papers:**\n{paper_info}\n\n**Analysis:**"
         try:
             analysis = self.llm.generate(
-                full_prompt,
-                system_message=f"You are a {domain.replace('_', ' ')} expert providing evidence-based analysis for a {user_context}. Consider guideline adherence in your assessment.",
-                max_tokens=4000
             )
             return analysis
         except Exception as e:
-            print(f"⚠️  LLM analysis failed: {e}")
-            return self._create_fallback_analysis(query, domain, user_context, papers, guideline_info)
-    def _create_fallback_analysis(self, query: str, domain: str, user_context: str,
-                                  papers: List[Dict], guideline_info: Dict = None) -> str:
         """Create fallback analysis when LLM is unavailable"""
         if CONFIG_AVAILABLE:
             try:
                 domain_name = get_domain_display_name(domain)
@@ -1574,12 +2033,10 @@ class EnhancedRAGEngine:
         real_papers = [p for p in papers if not p.get('is_demo', False)]
         demo_papers = [p for p in papers if p.get('is_demo', False)]
-        paper_titles = [p.get('title', '') for p in papers[:3]]
-        analysis = f"""**Evidence-Based Analysis for {domain_name}**
 **Query:** {query}
-**User Context:** {user_context}
-**Papers Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)"""
         # Add guideline information
         if guideline_info:
@@ -1589,39 +2046,44 @@ class EnhancedRAGEngine:
                 analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"
         analysis += f"""
-**Key Findings:**
-Based on analysis of {len(papers)} relevant papers, several key points emerge:
-1. **Current Evidence:** The literature shows evolving understanding of this topic within {domain_name}. Recent studies have contributed significantly to our knowledge base."""
-        # Add guideline-specific observations
-        if guideline_info and guideline_info.get('critical_missing'):
-            analysis += f"\n2. **Guideline Alignment:** Evidence may not fully align with major clinical guidelines. Consider consulting {', '.join(guideline_info['critical_missing'][:2])} guidelines for comprehensive recommendations."
-        else:
-            analysis += f"\n2. **Clinical Context:** Findings should be interpreted within established clinical frameworks and guidelines."
-        analysis += f"""
-3. **Methodological Approaches:** Studies employ various designs including clinical trials, cohort studies, and systematic reviews. The quality of evidence varies across studies.
-4. **Clinical Implications:** Findings have relevance for {user_context} practice and decision-making. Practical applications should consider individual patient factors and clinical context.
-5. **Research Gaps:** Further studies are needed to address remaining questions, particularly regarding long-term outcomes and specific patient subgroups.
-**Selected Papers:**
-{chr(10).join([f"- {title}" for title in paper_titles if title])}
-**Recommendations for {user_context}:**
-- Consider the evidence in context of individual circumstances
-- Consult with specialists when appropriate
-- Stay updated with emerging research
-- Apply evidence-based guidelines when available
-- {"Pay attention to guideline gaps noted above" if guideline_info and guideline_info.get('critical_missing') else "Reference established clinical guidelines"}
-*Note: This analysis is based on available literature. For specific cases, consult with healthcare professionals.*"""
         if demo_papers:
-            analysis += f"\n\n*Disclaimer: {len(demo_papers)} papers are illustrative examples generated for demonstration purposes.*"
         return analysis
-    def _generate_bottom_line(self, query: str, domain: str, user_context: str,
-                              papers_count: int, real_papers_count: int,
-                              guideline_info: Dict = None) -> str:
-        """Generate clinical bottom line with guideline awareness"""
         if CONFIG_AVAILABLE:
             try:
                 domain_name = get_domain_display_name(domain)
@@ -1630,41 +2092,43 @@ Based on analysis of {len(papers)} relevant papers, several key points emerge:
         else:
             domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
-        bottom_line = f"""**Clinical Bottom Line for {user_context}:**
-Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count} real papers), current evidence provides actionable insights for clinical practice."""
         # Add guideline-specific bottom line
         if guideline_info:
             if guideline_info.get('guidelines_found'):
-                bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced ({', '.join(guideline_info['guidelines_found'][:3])})."
             if guideline_info.get('critical_missing'):
-                missing_list = ', '.join(guideline_info['critical_missing'][:3])
-                bottom_line += f"\n**Important Note:** Missing explicit guideline citations ({missing_list}). Consider consulting these for comprehensive recommendations."
             coverage = guideline_info.get('coverage_percentage', 0)
             if coverage < 50:
-                bottom_line += f"\n**Evidence Limitations:** Guideline coverage is limited ({coverage}%)."
         bottom_line += f"""
-**Key Considerations:**
-- Patient-specific factors and individual risk-benefit assessments
-- Treatment availability and resource constraints
-- Consultation with specialists for complex cases
-- {"Particular attention to guideline gaps noted above" if guideline_info and guideline_info.get('critical_missing') else "Adherence to established clinical guidelines"}
-- Integration with clinical judgment and patient preferences"""
         if papers_count > real_papers_count:
-            bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for comprehensive analysis.*"
         return bottom_line
-    def _synthesize_answer(self, query: str, domain: str, user_context: str,
-                           analysis: str, papers: List[Dict],
-                           bottom_line: str, confidence: Dict[str, Any],
-                           guideline_info: Dict = None) -> Dict[str, Any]:
-        """Synthesize final answer with guideline information"""
         if CONFIG_AVAILABLE:
             try:
                 domain_info = {
@@ -1685,18 +2149,13 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
                 'description': f'Research in {domain.replace("_", " ")}'
             })
-        context_info = USER_CONTEXT_INFO.get(user_context, {
-            'name': user_context.title(),
-            'icon': '👤'
-        })
         # Count real vs demo papers
         real_papers = [p for p in papers if not p.get('is_demo', False)]
         demo_papers = [p for p in papers if p.get('is_demo', False)]
-        # Format paper citations with guideline indicators
         paper_citations = []
-        for i, paper in enumerate(papers[:7], 1):
             title = paper.get('title', 'Untitled')
             authors = paper.get('authors', [])
             year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
@@ -1705,11 +2164,6 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             is_demo = paper.get('is_demo', False)
             is_preprint = paper.get('is_preprint', False)
-            # Check if paper mentions guidelines
-            text = f"{title} {paper.get('abstract', '')}".lower()
-            has_guidelines = any(keyword in text for keyword in ['guideline', 'recommendation', 'consensus',
-                                                                 'ada', 'aha', 'acc', 'esc', 'idsa', 'ats'])
             # Format authors
             if authors and isinstance(authors, list) and len(authors) > 0:
                 if len(authors) == 1:
@@ -1726,23 +2180,19 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             demo_indicator = "📄 " if is_demo else ""
             preprint_indicator = "⚡ " if is_preprint else ""
-            guideline_indicator = "📋 " if has_guidelines else ""
             if author_str and year:
-                citation += f"\n   {demo_indicator}{preprint_indicator}{guideline_indicator}*{author_str} ({year})*"
             elif author_str:
-                citation += f"\n   {demo_indicator}{preprint_indicator}{guideline_indicator}*{author_str}*"
             else:
-                citation += f"\n   {demo_indicator}{preprint_indicator}{guideline_indicator}*Unknown authors*"
             if journal:
                 citation += f"\n   Journal: {journal}"
             elif source and source != 'unknown':
                 citation += f"\n   Source: {source}"
-            if has_guidelines:
-                citation += f"\n   *References clinical guidelines*"
             paper_citations.append(citation)
         # Build guideline summary section
@@ -1753,13 +2203,6 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             if guideline_info.get('guidelines_found'):
                 guideline_summary += f"**✅ Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n"
-                # Show papers that mention guidelines
-                if guideline_info.get('papers_with_guidelines'):
-                    guideline_summary += "**Papers Citing Guidelines:**\n"
-                    for paper_info in guideline_info['papers_with_guidelines'][:3]:
-                        guideline_summary += f"- {paper_info['title']} ({', '.join(paper_info['guidelines'][:2])})\n"
-                    guideline_summary += "\n"
             if guideline_info.get('critical_missing'):
                 missing_list = ', '.join(guideline_info['critical_missing'])
                 guideline_summary += f"**⚠️  Missing Guideline Citations:** {missing_list}\n\n"
@@ -1767,11 +2210,11 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n"
         # Build answer
-        answer = f"""# 🔬 **Medical Research Analysis**
 **Domain:** {domain_info['name']} {domain_info.get('icon', '')}
-**User Context:** {context_info['name']} {context_info.get('icon', '')}
 **Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100)
-**Papers Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)
 ---
 ## 📋 **Executive Summary**
 {bottom_line}
@@ -1780,22 +2223,22 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
 {analysis}
 ---
 ## 📊 **Supporting Evidence**
-{chr(10).join(paper_citations)}
 ---
-## 🎯 **Key Takeaways**
-1. Evidence-based insights relevant to {context_info['name'].lower()} perspective
 2. Domain-specific considerations for {domain_info['name'].lower()}
-3. {"Guideline-aware recommendations" if guideline_info else "Practical implications for application"}
-4. {"Attention to guideline gaps noted" if guideline_info and guideline_info.get('critical_missing') else "Integration with clinical guidelines"}
-*Analysis performed using evidence-based medical reasoning with guideline assessment*
 *Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*"""
         return {
             "query": query,
             "domain": domain,
             "domain_info": domain_info,
-            "user_context": user_context,
-            "user_context_info": context_info,
             "answer": answer,
             "analysis": analysis,
             "bottom_line": bottom_line,
@@ -1804,27 +2247,27 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             "demo_papers_used": len(demo_papers),
             "confidence_score": confidence,
             "guideline_info": guideline_info,
-            "reasoning_method": "evidence_based",
             "real_time_search": self.use_real_time,
             "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
             "metrics": {
                 'response_time': time.time(),
                 'papers_analyzed': len(papers),
                 'domain': domain,
-                'user_context': user_context
             }
         }
     def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
-                       user_context: str, papers: List[Dict], guideline_info: Dict = None):
-        """Update conversation memory with guideline info"""
         if not self.memory:
             return
         memory_data = {
             'query': query,
             'domain': domain,
-            'user_context': user_context,
             'papers_used': len(papers),
             'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
             'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
@@ -1844,8 +2287,10 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             metadata=memory_data
         )
-    def _create_no_results_response(self, query: str, domain: str, user_context: str) -> Dict[str, Any]:
         """Create response when no papers are found"""
         if CONFIG_AVAILABLE:
             try:
                 domain_info = {
@@ -1864,29 +2309,38 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             })
         answer = f"""# 🔍 **Limited Research Found**
-**Query:** {query}
 **Domain:** {domain_info['name']}
-**User Context:** {user_context}
-**Suggestions:**
 1. Try broadening your search terms
-2. Consider related {domain_info['name']} topics
-3. Check spelling of medical terms
 4. Try a more general domain selection
-**For Guideline-Conscious Searches:**
-- Search specific guideline names (e.g., "ADA guidelines diabetes")
-- Include "guideline" or "recommendation" in search terms
-- Check official guideline organization websites
-**Example queries:**
-- "Current treatments for [condition] according to guidelines"
-- "Recent advances in {domain_info['name'].lower()} with guideline updates"
-- "Clinical guidelines for [topic]"
-*Note: Some specialized topics may have limited published research. Check official guideline sources directly.*"""
         return {
             "query": query,
             "domain": domain,
             "domain_info": domain_info,
-            "user_context": user_context,
             "answer": answer,
             "papers_used": 0,
             "real_papers_used": 0,
@@ -1900,8 +2354,10 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
         }
-    def _create_error_response(self, query: str, domain: str, user_context: str, error: str) -> Dict[str, Any]:
         """Create error response"""
         if CONFIG_AVAILABLE:
             try:
                 domain_info = {
@@ -1920,26 +2376,33 @@ Based on analysis of {papers_count} papers in {domain_name} ({real_papers_count}
             })
         answer = f"""# 🚨 **Analysis Error**
-**Query:** {query}
 **Domain:** {domain_info['name']}
-**User Context:** {user_context}
 **Error:** {error}
-**Troubleshooting:**
 1. Check your internet connection
-2. Try a simpler query
-3. Verify domain selection
 4. Contact support if problem persists
-**For Guideline Searches:**
-- Try searching guideline names directly
-- Check if specific guideline databases are accessible
-- Consider searching PubMed with guideline filters
-Please try again or reformulate your question."""
         return {
             "query": query,
             "domain": domain,
             "domain_info": domain_info,
-            "user_context": user_context,
             "answer": answer,
             "papers_used": 0,
             "real_papers_used": 0,
@@ -2033,7 +2496,7 @@ Please try again or reformulate your question."""
         }
     def get_engine_status(self) -> Dict[str, Any]:
-        """Get engine status with guideline metrics"""
         # Calculate average guideline coverage
         avg_guideline_coverage = 0
         if self.metrics['guideline_coverage']:
@@ -2042,12 +2505,13 @@ Please try again or reformulate your question."""
         return {
             "engine_name": "Medical Research RAG Engine",
-            "version": "2.1.0",
             "model": self.model if hasattr(self, 'model') else "Unknown",
-            "features": ["evidence_based_reasoning", "real_paper_fetching",
-                         "confidence_scoring", "guideline_detection", "guideline_gap_analysis"],
             "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
             "real_time_search": self.use_real_time,
             "guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
             "metrics": {
                 "total_queries": self.metrics['total_queries'],
@@ -2059,7 +2523,7 @@ Please try again or reformulate your question."""
                 "demo_papers_used": self.metrics['demo_papers_used']
             },
             "domains_supported": len(DOMAIN_INFO),
-            "user_contexts_supported": len(USER_CONTEXT_INFO)
         }
     def clear_memory(self):
@@ -2075,74 +2539,75 @@ Please try again or reformulate your question."""
 # TEST FUNCTION
 # ============================================================================
-def test_medical_rag_engine():
-    """Test the medical RAG engine with guideline detection"""
     print("\n" + "=" * 60)
-    print("🧪 TESTING MEDICAL RAG ENGINE WITH GUIDELINE DETECTION")
     print("=" * 60)
     try:
         # Initialize engine
         engine = EnhancedRAGEngine(
-            session_id="medical_test",
             model="gpt-oss-120b",
-            use_real_time=True  # Enable real-time paper fetching
         )
-        # Test queries with different domains to test guideline detection
         test_cases = [
             {
-                "query": "Compare first-line antibiotics for community-acquired pneumonia based on recent evidence",
-                "domain": "infectious_disease",
-                "user_context": "clinician"
             },
             {
-                "query": "Newest GLP-1 agonists for type 2 diabetes and comparative effectiveness",
                 "domain": "endocrinology",
-                "user_context": "clinician"
             },
             {
-                "query": "Management of hypertension in elderly patients with diabetes",
-                "domain": "cardiology",
-                "user_context": "researcher"
             }
         ]
-        for i, test_case in enumerate(test_cases[:1], 1):  # Test first one for speed
             print(f"\n📝 Test Case {i}:")
             print(f"   Query: '{test_case['query']}'")
             print(f"   Domain: {test_case['domain']}")
-            print(f"   User Context: {test_case['user_context']}")
             # Process query
             response = engine.answer_research_question(
                 query=test_case['query'],
                 domain=test_case['domain'],
-                max_papers=15,
-                user_context=test_case['user_context'],
                 use_fallback=True
             )
             if response and 'error' not in response:
                 print(f"\n✅ Test Successful!")
                 print(f"   Papers used: {response.get('papers_used', 0)}")
-                print(f"   Real papers: {response.get('real_papers_used', 0)}")
                 print(f"   Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")
-                # Check guideline info
-                guideline_info = response.get('guideline_info', {})
-                if guideline_info:
-                    print(f"   Guidelines found: {len(guideline_info.get('guidelines_found', []))}")
-                    if guideline_info.get('critical_missing'):
-                        print(f"   Missing guidelines: {', '.join(guideline_info['critical_missing'][:3])}")
-                    print(f"   Guideline coverage: {guideline_info.get('coverage_percentage', 0)}%")
         # Show engine status
         status = engine.get_engine_status()
         print(f"\n🔧 Engine Status:")
-        print(f"   Research engine available: {status.get('research_engine_available', False)}")
-        print(f"   Guideline detection: ENABLED")
-        print(f"   Average guideline coverage: {status['metrics']['average_guideline_coverage']}%")
         print(f"   Total queries: {status['metrics']['total_queries']}")
         return True
@@ -2156,15 +2621,15 @@ def test_medical_rag_engine():
 if __name__ == "__main__":
     # Run test
-    test_result = test_medical_rag_engine()
     if test_result:
         print(f"\n{'=' * 60}")
-        print("🎉 MEDICAL RAG ENGINE TEST COMPLETE!")
-        print("   Evidence-based reasoning: ✓")
-        print("   Real paper fetching: ✓")
         print("   Guideline detection: ✓")
-        print("   Guideline gap analysis: ✓")
         print(f"{'=' * 60}")
     else:
         print("\n❌ Engine test failed")

 """
 rag_engine.py - Production-Ready Medical RAG Engine
+Updated with role-based response handling and improved simple query detection
 """
 from typing import List, Dict, Any, Optional, Tuple
 # ============================================================================
+# ROLE-BASED REASONING FOR MEDICAL RESEARCH
 # ============================================================================
+class RoleBasedReasoning:
+    """Role-based reasoning technique focused on domain-agnostic, role-appropriate responses"""
+    # Role definitions with domain-agnostic prompts
+    ROLE_SYSTEM_PROMPTS = {
+        'patient': {
+            'name': 'Patient',
+            'icon': '🩺',
+            'prompt': '''You are helping a patient understand information. Use simple, clear, reassuring language.
+            - Focus on practical implications and what they need to know
+            - Avoid complex terminology or jargon
+            - Emphasize safety and when to seek professional help
+            - Be compassionate and supportive
+            - Do not provide diagnoses or specific medical advice
+            - Explain concepts in everyday terms'''
+        },
+        'student': {
+            'name': 'Student',
+            'icon': '🎓',
+            'prompt': '''You are teaching a student. Focus on educational value and understanding.
+            - Explain foundational concepts and definitions
+            - Provide examples and analogies
+            - Encourage critical thinking and questions
+            - Structure information logically
+            - Connect to broader knowledge areas
+            - Mention learning resources when helpful'''
+        },
+        'clinician': {
+            'name': 'Clinician',
+            'icon': '👨‍⚕️',
+            'prompt': '''You are assisting a healthcare professional. Be concise, actionable, and evidence-based.
+            - Focus on practical implications and decision-making
+            - Reference guidelines and evidence levels when relevant
+            - Consider workflow and implementation
+            - Be precise but efficient with time
+            - Address risks and benefits clearly
+            - Maintain professional tone'''
+        },
+        'doctor': {
+            'name': 'Doctor',
+            'icon': '⚕️',
+            'prompt': '''You are assisting a physician. Use appropriate terminology and clinical reasoning.
+            - Focus on differential diagnosis, treatment options, and management
+            - Reference current standards of care and guidelines
+            - Consider patient factors and comorbidities
+            - Discuss evidence quality and limitations
+            - Be thorough but organized
+            - Maintain clinical accuracy'''
+        },
+        'researcher': {
+            'name': 'Researcher',
+            'icon': '🔬',
+            'prompt': '''You are assisting a research scientist. Focus on methodology and evidence.
+            - Discuss study designs, methods, and limitations
+            - Analyze evidence quality and gaps
+            - Consider statistical significance and clinical relevance
+            - Reference current literature and trends
+            - Discuss implications for future research
+            - Maintain scientific rigor'''
+        },
+        'professor': {
+            'name': 'Professor',
+            'icon': '📚',
+            'prompt': '''You are assisting an academic educator. Focus on knowledge synthesis and pedagogy.
+            - Provide comprehensive overviews with context
+            - Compare theories, methods, and findings
+            - Discuss historical development and future directions
+            - Emphasize critical evaluation and synthesis
+            - Connect to broader academic discourse
+            - Support teaching and learning objectives'''
+        },
+        'pharmacist': {
+            'name': 'Pharmacist',
+            'icon': '💊',
+            'prompt': '''You are assisting a pharmacy professional. Focus on medications and safety.
+            - Discuss drug mechanisms, interactions, and pharmacokinetics
+            - Emphasize safety profiles and monitoring
+            - Consider dosing, administration, and compliance
+            - Address patient counseling points
+            - Reference formularies and guidelines
+            - Maintain focus on medication optimization'''
+        },
+        'general': {
+            'name': 'General User',
+            'icon': '👤',
+            'prompt': '''You are assisting a general user. Provide balanced, accessible information.
+            - Adjust complexity based on the query
+            - Be helpful and informative without overwhelming
+            - Provide context and practical implications
+            - Use clear language with minimal jargon
+            - Consider diverse backgrounds and knowledge levels
+            - Maintain neutral, objective tone'''
+        }
+    }
     @staticmethod
+    def create_role_prompt(query: str, domain: str, role: str,
+                          papers_count: int = 0, guideline_info: Dict = None) -> str:
+        """Create role-appropriate prompt with domain-agnostic focus"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
+        # Simple query detection - greetings and basic questions
+        simple_queries = ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good afternoon',
+                         'good evening', 'how are you', "what's up", 'sup']
+        query_lower = query.lower().strip()
+        if query_lower in simple_queries or len(query.split()) <= 2:
+            # Simple greeting or short query
+            if role == 'patient':
+                return f"""You are helping a patient. Use warm, reassuring tone.
+Query: {query}
+Respond with a friendly greeting and invitation to ask questions. Keep it brief and welcoming.
+Example: "Hello! I'm here to help answer your health questions in simple, clear terms. What would you like to know?""""
+            elif role == 'student':
+                return f"""You are teaching a student.
+Query: {query}
+Respond with an encouraging greeting that invites learning questions.
+Example: "Hi there! I'm here to help you learn about medical topics. What are you curious about today?""""
+            elif role in ['clinician', 'doctor']:
+                return f"""You are assisting a healthcare professional.
+Query: {query}
+Respond with a professional greeting appropriate for clinical setting.
+Example: "Hello. I'm ready to assist with evidence-based medical information. How can I help you today?""""
+            elif role in ['researcher', 'professor']:
+                return f"""You are assisting an academic professional.
+Query: {query}
+Respond with a scholarly greeting that invites research questions.
+Example: "Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?""""
+            elif role == 'pharmacist':
+                return f"""You are assisting a pharmacy professional.
+Query: {query}
+Respond with a professional greeting focused on medication information.
+Example: "Hello. I can help with medication-related questions and information. How can I assist you today?""""
+            else:  # general
+                return f"""You are assisting a general user.
+Query: {query}
+Respond with a friendly, welcoming greeting.
+Example: "Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?""""
+        # For substantive queries, use role-appropriate analysis
+        role_prompt = role_info['prompt']
+        # Domain-agnostic instruction
+        domain_agnostic = f"""DOMAIN-AGNOSTIC APPROACH:
+- This system can answer questions from ANY domain (tech, finance, health, education, general)
+- Adapt your expertise to the query domain naturally
+- Do NOT force medical framing on non-medical questions
+- Only emphasize citations/guidelines when the query domain and role demand it
+- Use appropriate terminology for the query's domain"""
+        # Build comprehensive prompt
+        prompt = f"""ROLE: {role_info['name']} {role_info['icon']}
+{role_prompt}
+{domain_agnostic}
+QUERY: {query}
+QUERY DOMAIN CONTEXT: {domain} (adapt your response appropriately)
+RESPONSE GUIDELINES:
+1. **Role-Appropriate Depth:**
+   - {role}: Adjust response complexity for {role_info['name'].lower()} needs
+2. **Terminology Level:**
+   - Use language appropriate for {role_info['name'].lower()} understanding
+3. **Evidence Awareness:**
+   - { 'Reference evidence/guidelines when domain-appropriate' if role in ['clinician', 'doctor', 'researcher'] else 'Mention evidence when helpful, not required' }
+4. **Safety & Practicality:**
+   - { 'Include appropriate disclaimers' if role == 'patient' else 'Maintain professional standards' }
+5. **Response Structure:**
+   - Organize information logically for {role_info['name'].lower()} understanding
+   - Prioritize most relevant information first
+   - Keep response focused and actionable"""
+        # Add research context if we have papers
+        if papers_count > 0:
+            prompt += f"\n\nRESEARCH CONTEXT: Analyzing {papers_count} relevant sources"
+        # Add guideline context if available
         if guideline_info:
             if guideline_info.get('guidelines_found'):
+                prompt += f"\nGUIDELINES REFERENCED: {', '.join(guideline_info['guidelines_found'])}"
             if guideline_info.get('critical_missing'):
+                prompt += f"\nGUIDELINE GAPS: Missing {', '.join(guideline_info['critical_missing'][:2])}"
+        prompt += f"\n\nPlease provide a {role_info['name'].lower()}-appropriate response to: {query}"
+        return prompt
 # ============================================================================
             print(f"⚠️  LLM not available - using fallback mode: {e}")
             self.llm = None
+        self.role_reasoning = RoleBasedReasoning()  # NEW: Role-based reasoning
         self.ranker = PaperRanker()
         self.confidence_scorer = ConfidenceScorer()
         self.context_detector = UserContextDetector()
         else:
             print("   📄 Real paper fetching: DISABLED (using demo papers)")
         print("   📋 Guideline detection: ENABLED")
+        print("   👤 Role-based responses: ENABLED")
     def answer_research_question(self,
                                  query: str,
                                  use_memory: bool = True,
                                  user_context: str = "auto",
                                  use_fallback: bool = False,
+                                 role: str = "general",  # NEW: Explicit role parameter
+                                 role_system_prompt: str = None,  # NEW: Custom role prompt from frontend
                                  **kwargs) -> Dict[str, Any]:
+        """Answer medical research questions with role-based reasoning"""
         start_time = time.time()
         self.metrics['total_queries'] += 1
         print(f"\n🔍 Processing query: '{query}'")
         print(f"   Domain: {domain}")
+        print(f"   Role: {role}")
         print(f"   Max papers: {max_papers}")
         print(f"   Real-time search: {self.use_real_time}")
         try:
+            # Auto-detect user context if needed (backward compatibility)
             if user_context == "auto":
                 user_context = self.context_detector.detect_context(query, domain)
             self.metrics['user_contexts'][user_context] += 1
+            # NEW: Check for simple queries first (greetings, basic questions)
+            simple_response = self._handle_simple_query(query, domain, role)
+            if simple_response:
+                return simple_response
+            # Check if query requires research analysis
+            requires_research = self._requires_research_analysis(query)
+            if not requires_research:
+                # For non-research queries, provide direct role-appropriate response
+                return self._handle_direct_query(query, domain, role)
             # Retrieve papers using MedicalResearchEngine
             print("📚 Retrieving relevant papers...")
             papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)
             if not papers:
                 print("⚠️  No papers found, creating fallback response...")
+                return self._create_no_results_response(query, domain, role)
             # Detect guideline citations
             print("📋 Detecting guideline citations...")
                 })
             # Rank papers
+            ranked_papers = self.ranker.rank_papers(papers, query, domain, role)
             print(f"📊 Papers found: {len(ranked_papers)}")
             # Track paper sources
             # Calculate confidence with guideline consideration
             confidence = self.confidence_scorer.calculate_confidence(
+                ranked_papers, query, "summary", role, domain, guideline_info
             )
+            # Generate analysis using role-based reasoning
+            print("🧠 Generating role-based analysis...")
+            analysis = self._generate_role_based_analysis(
+                query, domain, role, ranked_papers, guideline_info, role_system_prompt
             )
+            # Generate clinical bottom line with role awareness
+            bottom_line = self._generate_role_bottom_line(
+                query, domain, role, len(ranked_papers), real_papers, guideline_info
             )
             # Synthesize final answer
+            final_answer = self._synthesize_role_answer(
+                query, domain, role, analysis, ranked_papers,
                 bottom_line, confidence, guideline_info
             )
             # Update memory
             if use_memory and self.memory:
+                self._update_memory(query, final_answer, domain, role, ranked_papers, guideline_info)
             # Update metrics
             response_time = time.time() - start_time
             print(f"❌ Error in research analysis: {e}")
             import traceback
             traceback.print_exc()
+            return self._create_error_response(query, domain, role, str(e))
+    def _handle_simple_query(self, query: str, domain: str, role: str) -> Optional[Dict[str, Any]]:
+        """Handle simple queries like greetings with role-appropriate responses"""
+        query_lower = query.lower().strip()
+        # Simple greetings
+        simple_greetings = ['hi', 'hello', 'hey', 'greetings', 'good morning',
+                           'good afternoon', 'good evening', 'howdy']
+        if query_lower in simple_greetings:
+            print("   👋 Detected simple greeting")
+            return self._create_greeting_response(query, domain, role)
+        # Very short queries (1-2 words) that aren't research questions
+        if len(query.split()) <= 2 and not self._looks_like_research_query(query):
+            print("   💬 Detected simple query")
+            return self._create_simple_response(query, domain, role)
+        return None
+    def _looks_like_research_query(self, query: str) -> bool:
+        """Check if query looks like a research question"""
+        query_lower = query.lower()
+        # Research question indicators
+        research_indicators = [
+            'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis',
+            'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical',
+            'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis',
+            'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk',
+            'benefit', 'recommendation', 'guideline', 'standard', 'protocol'
+        ]
+        # Check if query contains research indicators
+        for indicator in research_indicators:
+            if indicator in query_lower:
+                return True
+        # Check question words
+        question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who']
+        if any(query_lower.startswith(word) for word in question_words):
+            # Check if it's a complex question (more than basic)
+            if len(query.split()) > 3:
+                return True
+        return False
+    def _requires_research_analysis(self, query: str) -> bool:
+        """Determine if query requires full research analysis"""
+        query_lower = query.lower().strip()
+        # Definitely simple queries
+        simple_patterns = [
+            r'^hi$', r'^hello$', r'^hey$', r'^greetings$',
+            r'^good morning$', r'^good afternoon$', r'^good evening$',
+            r'^how are you$', r"^what's up$", r'^sup$',
+            r'^thanks$', r'^thank you$', r'^bye$', r'^goodbye$'
+        ]
+        for pattern in simple_patterns:
+            if re.match(pattern, query_lower):
+                return False
+        # Check if it's a substantive question
+        if len(query.split()) <= 2 and not self._looks_like_research_query(query):
+            return False
+        return True
+    def _create_greeting_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
+        """Create role-appropriate greeting response"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
+        # Role-specific greetings
+        greetings = {
+            'patient': "👋 Hello! I'm here to help you understand health topics in simple, clear terms. What would you like to know?",
+            'student': "👋 Hi there! I'm here to help you learn about medical topics. What are you curious about today?",
+            'clinician': "👋 Hello. I'm ready to assist with evidence-based medical information. How can I help you today?",
+            'doctor': "👋 Hello, doctor. I'm available to discuss clinical questions and evidence. What would you like to explore?",
+            'researcher': "👋 Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?",
+            'professor': "👋 Hello. I can assist with academic discussions and evidence synthesis. What topic interests you?",
+            'pharmacist': "👋 Hello. I can help with medication-related questions and information. How can I assist you today?",
+            'general': "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?"
+        }
+        greeting = greetings.get(role, greetings['general'])
+        if CONFIG_AVAILABLE:
+            try:
+                domain_info = {
+                    'name': get_domain_display_name(domain),
+                    'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
+                }
+            except:
+                domain_info = DOMAIN_INFO.get(domain, {
+                    'name': domain.replace('_', ' ').title(),
+                    'icon': '⚕️'
+                })
+        else:
+            domain_info = DOMAIN_INFO.get(domain, {
+                'name': domain.replace('_', ' ').title(),
+                'icon': '⚕️'
+            })
+        answer = f"""# {greeting}
+**Role:** {role_info['name']} {role_info['icon']}
+**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
+Feel free to ask me anything! I'll provide information tailored to your needs as a {role_info['name'].lower()}."""
+        return {
+            "query": query,
+            "domain": domain,
+            "domain_info": domain_info,
+            "user_context": role,
+            "user_context_info": role_info,
+            "answer": answer,
+            "analysis": greeting,
+            "bottom_line": greeting,
+            "papers_used": 0,
+            "real_papers_used": 0,
+            "demo_papers_used": 0,
+            "confidence_score": {
+                'overall_score': 95.0,
+                'level': 'HIGH 🟢',
+                'explanation': 'Simple greeting response'
+            },
+            "guideline_info": None,
+            "reasoning_method": "greeting",
+            "real_time_search": self.use_real_time,
+            "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
+            "metrics": {
+                'response_time': time.time(),
+                'papers_analyzed': 0,
+                'domain': domain,
+                'user_context': role
+            }
+        }
+    def _create_simple_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
+        """Create role-appropriate response for simple queries"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
+        # Generate simple, direct response
+        simple_responses = {
+            'patient': f"I'd be happy to help with '{query}'. Could you tell me a bit more about what you're looking for?",
+            'student': f"That's an interesting topic! To help you best, could you provide more details about what you'd like to know regarding '{query}'?",
+            'clinician': f"Regarding '{query}', I can provide evidence-based information. Please share more specifics about your clinical question.",
+            'doctor': f"For '{query}', I can offer medical information. Could you elaborate on the clinical context or specific aspects you're interested in?",
+            'researcher': f"On the topic of '{query}', I can discuss research perspectives. What specific aspect would you like to explore?",
+            'professor': f"Regarding '{query}', I can provide academic perspectives. What particular angle or detail would you like to discuss?",
+            'pharmacist': f"About '{query}', I can offer medication-related information. Could you specify what you'd like to know?",
+            'general': f"I can help with information about '{query}'. Could you provide more details about what specifically you're interested in?"
+        }
+        response = simple_responses.get(role, simple_responses['general'])
+        if CONFIG_AVAILABLE:
+            try:
+                domain_info = {
+                    'name': get_domain_display_name(domain),
+                    'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
+                }
+            except:
+                domain_info = DOMAIN_INFO.get(domain, {
+                    'name': domain.replace('_', ' ').title(),
+                    'icon': '⚕️'
+                })
+        else:
+            domain_info = DOMAIN_INFO.get(domain, {
+                'name': domain.replace('_', ' ').title(),
+                'icon': '⚕️'
+            })
+        answer = f"""# 💬 **Response**
+**Role:** {role_info['name']} {role_info['icon']}
+**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
+{response}
+*Tip: For more detailed information, try asking a more specific question!*"""
+        return {
+            "query": query,
+            "domain": domain,
+            "domain_info": domain_info,
+            "user_context": role,
+            "user_context_info": role_info,
+            "answer": answer,
+            "analysis": response,
+            "bottom_line": response,
+            "papers_used": 0,
+            "real_papers_used": 0,
+            "demo_papers_used": 0,
+            "confidence_score": {
+                'overall_score': 85.0,
+                'level': 'HIGH 🟢',
+                'explanation': 'Simple query response'
+            },
+            "guideline_info": None,
+            "reasoning_method": "simple_response",
+            "real_time_search": self.use_real_time,
+            "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
+            "metrics": {
+                'response_time': time.time(),
+                'papers_analyzed': 0,
+                'domain': domain,
+                'user_context': role
+            }
+        }
+    def _handle_direct_query(self, query: str, domain: str, role: str) -> Dict[str, Any]:
+        """Handle direct queries without research papers"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
+        # Use LLM for direct response if available
+        if self.llm:
+            try:
+                prompt = self.role_reasoning.create_role_prompt(query, domain, role, 0, None)
+                response = self.llm.generate(
+                    prompt,
+                    system_message=f"You are assisting a {role_info['name'].lower()}. Provide helpful, accurate information.",
+                    max_tokens=1000
+                )
+                # Clean up response
+                response = response.strip()
+                if not response:
+                    response = f"I'd be happy to help with '{query}'. Could you provide more details about what specifically you're looking for?"
+            except Exception as e:
+                print(f"⚠️  LLM direct response failed: {e}")
+                response = f"I can help with information about '{query}'. Please feel free to ask more specific questions!"
+        else:
+            response = f"I'd be happy to discuss '{query}'. What specific aspect would you like to know more about?"
+        if CONFIG_AVAILABLE:
+            try:
+                domain_info = {
+                    'name': get_domain_display_name(domain),
+                    'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
+                }
+            except:
+                domain_info = DOMAIN_INFO.get(domain, {
+                    'name': domain.replace('_', ' ').title(),
+                    'icon': '⚕️'
+                })
+        else:
+            domain_info = DOMAIN_INFO.get(domain, {
+                'name': domain.replace('_', ' ').title(),
+                'icon': '⚕️'
+            })
+        answer = f"""# 💬 **Response**
+**Role:** {role_info['name']} {role_info['icon']}
+**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
+{response}
+*Note: This is a direct response. For evidence-based research analysis with papers, please ask a more specific research question.*"""
+        return {
+            "query": query,
+            "domain": domain,
+            "domain_info": domain_info,
+            "user_context": role,
+            "user_context_info": role_info,
+            "answer": answer,
+            "analysis": response,
+            "bottom_line": response,
+            "papers_used": 0,
+            "real_papers_used": 0,
+            "demo_papers_used": 0,
+            "confidence_score": {
+                'overall_score': 80.0,
+                'level': 'HIGH 🟢',
+                'explanation': 'Direct query response without papers'
+            },
+            "guideline_info": None,
+            "reasoning_method": "direct_response",
+            "real_time_search": self.use_real_time,
+            "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
+            "metrics": {
+                'response_time': time.time(),
+                'papers_analyzed': 0,
+                'domain': domain,
+                'user_context': role
+            }
+        }
     def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
                               use_fallback: bool = False) -> List[Dict]:
         return papers
+    def _generate_role_based_analysis(self, query: str, domain: str, role: str,
+                                     papers: List[Dict], guideline_info: Dict = None,
+                                     custom_role_prompt: str = None) -> str:
+        """Generate role-based analysis using LLM if available"""
         if not self.llm:
+            return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
+        # Create role-based prompt
+        prompt = self.role_reasoning.create_role_prompt(
+            query, domain, role, len(papers), guideline_info
         )
+        # Add paper information for research context
+        if papers:
+            paper_info = "\n".join([
+                f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})"
+                for i, p in enumerate(papers[:3])
+            ])
+            prompt += f"\n\n**Relevant Sources:**\n{paper_info}"
         # Add demo paper disclaimer if any demo papers
         demo_count = sum(1 for p in papers if p.get('is_demo', False))
         if demo_count > 0:
+            prompt += f"\n\nNote: {demo_count} illustrative examples included for context."
         try:
+            # Use custom role prompt if provided, otherwise use default
+            system_message = custom_role_prompt if custom_role_prompt else f"You are assisting a {role}. Provide helpful, accurate information."
             analysis = self.llm.generate(
+                prompt,
+                system_message=system_message,
+                max_tokens=2000
             )
             return analysis
         except Exception as e:
+            print(f"⚠️  LLM role-based analysis failed: {e}")
+            return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
+    def _create_fallback_role_analysis(self, query: str, domain: str, role: str,
+                                      papers: List[Dict], guideline_info: Dict = None) -> str:
         """Create fallback analysis when LLM is unavailable"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
         if CONFIG_AVAILABLE:
             try:
                 domain_name = get_domain_display_name(domain)
         real_papers = [p for p in papers if not p.get('is_demo', False)]
         demo_papers = [p for p in papers if p.get('is_demo', False)]
+        analysis = f"""**{role_info['name']}-Focused Analysis**
 **Query:** {query}
+**Domain Context:** {domain_name}
+**Role Perspective:** {role_info['name']}"""
         # Add guideline information
         if guideline_info:
                 analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"
         analysis += f"""
+**Key Information for {role_info['name']}:**
+Based on analysis of {len(papers)} relevant sources ({len(real_papers)} real, {len(demo_papers)} illustrative):
+1. **{role_info['name']}-Relevant Insights:**
+   - Information tailored to {role_info['name'].lower()} needs and perspective
+   - Practical implications for {role_info['name'].lower()} context
+   - Actionable takeaways appropriate for this role
+2. **Domain Context:**
+   - Considerations specific to {domain_name}
+   - Relevant standards and approaches in this field
+   - Important context for application
+3. **Evidence Considerations:**
+   - {len(papers)} sources analyzed
+   - Quality and relevance assessed for {role_info['name'].lower()} needs
+   - {"Guideline awareness as noted above" if guideline_info else "Standard evidence considerations"}
+**Recommendations for {role_info['name']}:**
+- Apply information within {role_info['name'].lower()} role context
+- Consider individual circumstances and specific needs
+- {"Consult referenced guidelines as appropriate" if guideline_info and guideline_info.get('guidelines_found') else "Reference standard practices"}
+- Seek additional information for specific cases
+- Integrate with professional judgment and experience
+*Note: This analysis is tailored for {role_info['name'].lower()} perspective. For other perspectives, different considerations may apply.*"""
         if demo_papers:
+            analysis += f"\n\n*Includes {len(demo_papers)} illustrative examples for comprehensive analysis.*"
         return analysis
+    def _generate_role_bottom_line(self, query: str, domain: str, role: str,
+                                  papers_count: int, real_papers_count: int,
+                                  guideline_info: Dict = None) -> str:
+        """Generate role-appropriate bottom line"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
         if CONFIG_AVAILABLE:
             try:
                 domain_name = get_domain_display_name(domain)
         else:
             domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
+        bottom_line = f"""**Bottom Line for {role_info['name']}:**
+Based on {papers_count} sources in {domain_name} ({real_papers_count} real sources), here are the key takeaways for {role_info['name'].lower()} perspective."""
         # Add guideline-specific bottom line
         if guideline_info:
             if guideline_info.get('guidelines_found'):
+                bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced."
             if guideline_info.get('critical_missing'):
+                missing_list = ', '.join(guideline_info['critical_missing'][:2])
+                bottom_line += f"\n**Consider:** Missing explicit guideline citations for {missing_list}."
             coverage = guideline_info.get('coverage_percentage', 0)
             if coverage < 50:
+                bottom_line += f"\n**Evidence Note:** Guideline coverage is limited."
         bottom_line += f"""
+**{role_info['name']}-Specific Considerations:**
+- Information tailored to {role_info['name'].lower()} role and needs
+- Practical application within {role_info['name'].lower()} context
+- Integration with {role_info['name'].lower()} knowledge and experience
+- {"Guideline-aware decision making" if guideline_info else "Evidence-informed approach"}
+- Consideration of specific circumstances and constraints"""
         if papers_count > real_papers_count:
+            bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for context.*"
         return bottom_line
+    def _synthesize_role_answer(self, query: str, domain: str, role: str,
+                               analysis: str, papers: List[Dict],
+                               bottom_line: str, confidence: Dict[str, Any],
+                               guideline_info: Dict = None) -> Dict[str, Any]:
+        """Synthesize final answer with role information"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
         if CONFIG_AVAILABLE:
             try:
                 domain_info = {
                 'description': f'Research in {domain.replace("_", " ")}'
             })
         # Count real vs demo papers
         real_papers = [p for p in papers if not p.get('is_demo', False)]
         demo_papers = [p for p in papers if p.get('is_demo', False)]
+        # Format paper citations
         paper_citations = []
+        for i, paper in enumerate(papers[:5], 1):
             title = paper.get('title', 'Untitled')
             authors = paper.get('authors', [])
             year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
             is_demo = paper.get('is_demo', False)
             is_preprint = paper.get('is_preprint', False)
             # Format authors
             if authors and isinstance(authors, list) and len(authors) > 0:
                 if len(authors) == 1:
             demo_indicator = "📄 " if is_demo else ""
             preprint_indicator = "⚡ " if is_preprint else ""
             if author_str and year:
+                citation += f"\n   {demo_indicator}{preprint_indicator}*{author_str} ({year})*"
             elif author_str:
+                citation += f"\n   {demo_indicator}{preprint_indicator}*{author_str}*"
             else:
+                citation += f"\n   {demo_indicator}{preprint_indicator}*Unknown authors*"
             if journal:
                 citation += f"\n   Journal: {journal}"
             elif source and source != 'unknown':
                 citation += f"\n   Source: {source}"
             paper_citations.append(citation)
         # Build guideline summary section
             if guideline_info.get('guidelines_found'):
                 guideline_summary += f"**✅ Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n"
             if guideline_info.get('critical_missing'):
                 missing_list = ', '.join(guideline_info['critical_missing'])
                 guideline_summary += f"**⚠️  Missing Guideline Citations:** {missing_list}\n\n"
             guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n"
         # Build answer
+        answer = f"""# 🔬 **{role_info['name']}-Focused Analysis**
+**Role:** {role_info['name']} {role_info['icon']}
 **Domain:** {domain_info['name']} {domain_info.get('icon', '')}
 **Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100)
+**Sources Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)
 ---
 ## 📋 **Executive Summary**
 {bottom_line}
 {analysis}
 ---
 ## 📊 **Supporting Evidence**
+{chr(10).join(paper_citations) if paper_citations else "*No papers cited for this simple query*"}
 ---
+## 🎯 **Key Takeaways for {role_info['name']}**
+1. Role-appropriate information and insights
 2. Domain-specific considerations for {domain_info['name'].lower()}
+3. Practical implications tailored to {role_info['name'].lower()} needs
+4. {"Guideline-aware recommendations" if guideline_info else "Evidence-informed approach"}
+*Analysis performed with {role_info['name'].lower()}-focused reasoning*
 *Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*"""
         return {
             "query": query,
             "domain": domain,
             "domain_info": domain_info,
+            "user_context": role,
+            "user_context_info": role_info,
             "answer": answer,
             "analysis": analysis,
             "bottom_line": bottom_line,
             "demo_papers_used": len(demo_papers),
             "confidence_score": confidence,
             "guideline_info": guideline_info,
+            "reasoning_method": "role_based",
             "real_time_search": self.use_real_time,
             "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
             "metrics": {
                 'response_time': time.time(),
                 'papers_analyzed': len(papers),
                 'domain': domain,
+                'user_context': role
             }
         }
     def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
+                       role: str, papers: List[Dict], guideline_info: Dict = None):
+        """Update conversation memory with role info"""
         if not self.memory:
             return
         memory_data = {
             'query': query,
             'domain': domain,
+            'role': role,
             'papers_used': len(papers),
             'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
             'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
             metadata=memory_data
         )
+    def _create_no_results_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
         """Create response when no papers are found"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
         if CONFIG_AVAILABLE:
             try:
                 domain_info = {
             })
         answer = f"""# 🔍 **Limited Research Found**
+**Role:** {role_info['name']} {role_info['icon']}
 **Domain:** {domain_info['name']}
+**Query:** {query}
+**Suggestions for {role_info['name']}:**
 1. Try broadening your search terms
+2. Consider related topics in {domain_info['name']}
+3. Check spelling of technical terms
 4. Try a more general domain selection
+**For Role-Appropriate Information:**
+- Ask more general questions about the topic
+- Request explanations of concepts
+- Inquire about standard approaches or practices
+- Seek practical guidance rather than specific research
+**Example {role_info['name'].lower()}-appropriate queries:**
+- "Basic explanation of [topic] for {role_info['name'].lower()}"
+- "Standard approaches to [issue]"
+- "Practical guidance for [situation]"
+- "Key concepts about [subject]"
+*Note: Some specialized topics may have limited published research. I can still provide general information and guidance tailored to your role.*"""
         return {
             "query": query,
             "domain": domain,
             "domain_info": domain_info,
+            "user_context": role,
+            "user_context_info": role_info,
             "answer": answer,
             "papers_used": 0,
             "real_papers_used": 0,
             "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
         }
+    def _create_error_response(self, query: str, domain: str, role: str, error: str) -> Dict[str, Any]:
         """Create error response"""
+        role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
         if CONFIG_AVAILABLE:
             try:
                 domain_info = {
             })
         answer = f"""# 🚨 **Analysis Error**
+**Role:** {role_info['name']} {role_info['icon']}
 **Domain:** {domain_info['name']}
+**Query:** {query}
 **Error:** {error}
+**Troubleshooting for {role_info['name']}:**
 1. Check your internet connection
+2. Try a simpler query or rephrase
+3. Verify domain selection is appropriate
 4. Contact support if problem persists
+**For Role-Appropriate Alternatives:**
+- Ask a simpler version of your question
+- Request general information instead of specific research
+- Try breaking complex questions into smaller parts
+- Use more common terminology
+Please try again or reformulate your question for {role_info['name'].lower()}-appropriate assistance."""
         return {
             "query": query,
             "domain": domain,
             "domain_info": domain_info,
+            "user_context": role,
+            "user_context_info": role_info,
             "answer": answer,
             "papers_used": 0,
             "real_papers_used": 0,
         }
     def get_engine_status(self) -> Dict[str, Any]:
+        """Get engine status with role metrics"""
         # Calculate average guideline coverage
         avg_guideline_coverage = 0
         if self.metrics['guideline_coverage']:
         return {
             "engine_name": "Medical Research RAG Engine",
+            "version": "2.2.0",
             "model": self.model if hasattr(self, 'model') else "Unknown",
+            "features": ["role_based_reasoning", "real_paper_fetching",
+                         "confidence_scoring", "guideline_detection", "simple_query_handling"],
             "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
             "real_time_search": self.use_real_time,
+            "roles_supported": list(RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.keys()),
             "guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
             "metrics": {
                 "total_queries": self.metrics['total_queries'],
                 "demo_papers_used": self.metrics['demo_papers_used']
             },
             "domains_supported": len(DOMAIN_INFO),
+            "simple_query_handling": "ENABLED"
         }
     def clear_memory(self):
 # TEST FUNCTION
 # ============================================================================
+def test_role_based_rag_engine():
+    """Test the medical RAG engine with role-based responses"""
     print("\n" + "=" * 60)
+    print("🧪 TESTING ROLE-BASED RAG ENGINE")
     print("=" * 60)
     try:
         # Initialize engine
         engine = EnhancedRAGEngine(
+            session_id="role_test",
             model="gpt-oss-120b",
+            use_real_time=False  # Disable real-time for faster testing
         )
+        # Test queries with different roles
         test_cases = [
             {
+                "query": "hi",
+                "domain": "general_medical",
+                "role": "patient"
             },
             {
+                "query": "hello",
+                "domain": "cardiology",
+                "role": "doctor"
+            },
+            {
+                "query": "hey",
                 "domain": "endocrinology",
+                "role": "student"
             },
             {
+                "query": "Compare first-line antibiotics for community-acquired pneumonia",
+                "domain": "infectious_disease",
+                "role": "clinician"
             }
         ]
+        for i, test_case in enumerate(test_cases, 1):
             print(f"\n📝 Test Case {i}:")
             print(f"   Query: '{test_case['query']}'")
             print(f"   Domain: {test_case['domain']}")
+            print(f"   Role: {test_case['role']}")
             # Process query
             response = engine.answer_research_question(
                 query=test_case['query'],
                 domain=test_case['domain'],
+                max_papers=5,
+                role=test_case['role'],
                 use_fallback=True
             )
             if response and 'error' not in response:
                 print(f"\n✅ Test Successful!")
+                print(f"   Response type: {response.get('reasoning_method', 'unknown')}")
                 print(f"   Papers used: {response.get('papers_used', 0)}")
                 print(f"   Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")
+                # Check if it's a simple response
+                if response.get('reasoning_method') in ['greeting', 'simple_response', 'direct_response']:
+                    print(f"   ⭐ Simple query handled appropriately!")
         # Show engine status
         status = engine.get_engine_status()
         print(f"\n🔧 Engine Status:")
+        print(f"   Role-based responses: ENABLED")
+        print(f"   Simple query handling: ENABLED")
+        print(f"   Roles supported: {len(status['roles_supported'])}")
         print(f"   Total queries: {status['metrics']['total_queries']}")
         return True
 if __name__ == "__main__":
     # Run test
+    test_result = test_role_based_rag_engine()
     if test_result:
         print(f"\n{'=' * 60}")
+        print("🎉 ROLE-BASED RAG ENGINE TEST COMPLETE!")
+        print("   Role-based reasoning: ✓")
+        print("   Simple query handling: ✓")
+        print("   Domain-agnostic approach: ✓")
         print("   Guideline detection: ✓")
         print(f"{'=' * 60}")
     else:
         print("\n❌ Engine test failed")