Spaces:

paulhemb
/

MedSearchPro

Running

App Files Files Community

MedSearchPro / chat /rag_engine.py

paulhemb

Update chat/rag_engine.py

7b063e5 verified 9 days ago

raw

history blame contribute delete

112 kB

	"""
	rag_engine.py - Production-Ready Medical RAG Engine
	Updated with role-based response handling and improved simple query detection
	"""

	from typing import List, Dict, Any, Optional, Tuple
	import re
	import json
	import time
	import random
	from datetime import datetime
	from collections import Counter
	import statistics

	# Medical paper templates for different domains
	PAPER_TEMPLATES = {
	'infectious_disease': [
	"Comparison of {drug1} vs {drug2} for {condition}: A randomized controlled trial",
	"Clinical outcomes of {treatment} in patients with {condition}: A multicenter study",
	"Risk factors and management of {condition} in the ICU setting",
	"Antimicrobial resistance patterns in {condition}: A retrospective analysis",
	"Efficacy and safety of {treatment} for {condition}: A systematic review and meta-analysis",
	"Diagnostic approaches for {condition}: Current evidence and guidelines",
	"Prevention strategies for {condition} in hospitalized patients",
	"Economic impact of {treatment} for {condition} in diverse healthcare settings"
	],
	'cardiology': [
	"Long-term outcomes of {intervention} in {condition}: The {acronym} trial",
	"Novel biomarkers for predicting {outcome} in {condition}",
	"Comparison of invasive vs conservative strategies for {condition}",
	"Role of {medication} in secondary prevention of {condition}",
	"Advances in imaging techniques for {condition} diagnosis",
	"Genetic predictors of treatment response in {condition}",
	"Quality of life outcomes following {procedure} for {condition}"
	],
	'endocrinology': [
	"Real-world effectiveness of {drug_class} in {condition} management",
	"Impact of {lifestyle_intervention} on glycemic control in {condition}",
	"Novel insulin delivery systems for {condition}: Patient-reported outcomes",
	"Thyroid dysfunction in patients with {comorbidity}: Screening and management",
	"Bone health in patients receiving {treatment} for {condition}",
	"Hormonal therapies for {condition}: Comparative effectiveness analysis"
	],
	'neurology': [
	"Early diagnosis and intervention in {condition}: Impact on long-term outcomes",
	"Neuroimaging biomarkers for {condition} progression",
	"Novel therapeutic targets in {condition} pathogenesis",
	"Cognitive rehabilitation strategies for {condition}: A randomized trial",
	"Genetic and environmental risk factors for {condition}",
	"Quality of life measures in {condition} clinical trials"
	],
	'oncology': [
	"Biomarker-driven therapy for {cancer_type}: Current status and future directions",
	"Immunotherapy combinations in {cancer_type}: Efficacy and toxicity profiles",
	"Liquid biopsy applications in {cancer_type} management",
	"Supportive care interventions for {cancer_type} treatment side effects",
	"Cost-effectiveness of targeted therapies in {cancer_type}",
	"Survivorship issues in {cancer_type}: Long-term follow-up data"
	],
	'internal_medicine': [
	"Management of {condition} in elderly patients with multiple comorbidities",
	"Diagnostic uncertainty in {condition}: A clinical decision-making framework",
	"Transition of care for patients with {condition}: Best practices",
	"Polypharmacy management in patients with {condition}",
	"Telemedicine applications for {condition} follow-up",
	"Patient education strategies for {condition} self-management"
	]
	}

	# Medical terms for paper generation (now used only as fallback)
	MEDICAL_TERMS = {
	'drugs': ['amoxicillin-clavulanate', 'azithromycin', 'ceftriaxone', 'doxycycline', 'levofloxacin',
	'meropenem', 'vancomycin', 'piperacillin-tazobactam', 'linezolid', 'metronidazole'],
	'conditions': ['community-acquired pneumonia', 'hospital-acquired pneumonia', 'sepsis', 'urinary tract infection',
	'skin and soft tissue infection', 'intra-abdominal infection', 'meningitis', 'endocarditis'],
	'treatments': ['antibiotic therapy', 'source control', 'resuscitation', 'ventilator management',
	'infection prevention', 'antimicrobial stewardship'],
	'outcomes': ['clinical cure', 'mortality', 'length of stay', 'readmission', 'antibiotic resistance',
	'adverse events', 'cost-effectiveness', 'quality of life']
	}


	# ============================================================================
	# GUIDELINE DETECTION SYSTEM
	# ============================================================================

	class GuidelineDetector:
	"""Detect explicit guideline citations in medical papers"""

	# Comprehensive guideline databases by domain
	GUIDELINE_DATABASES = {
	# Diabetes/Endocrinology
	'endocrinology': {
	'IDF': ['IDF', 'International Diabetes Federation', 'International Diabetes Federation guidelines'],
	'ADA': ['ADA', 'American Diabetes Association', 'American Diabetes Association guidelines',
	'ADA/EASD', 'ADA Standards of Care'],
	'EASD': ['EASD', 'European Association for the Study of Diabetes'],
	'AACE': ['AACE', 'American Association of Clinical Endocrinologists'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence', 'NICE guidelines'],
	'WHO': ['WHO', 'World Health Organization', 'WHO guidelines for diabetes'],
	'ATP III': ['ATP III', 'Adult Treatment Panel III', 'NCEP ATP III'],
	'KDIGO': ['KDIGO', 'Kidney Disease Improving Global Outcomes'],
	'ESC': ['ESC', 'European Society of Cardiology', 'ESC/EASD'],
	'AHA': ['AHA', 'American Heart Association']
	},

	# Cardiology
	'cardiology': {
	'ACC/AHA': ['ACC/AHA', 'American College of Cardiology/American Heart Association',
	'ACC/AHA guidelines', 'AHA/ACC'],
	'ESC': ['ESC', 'European Society of Cardiology', 'ESC guidelines'],
	'AHA': ['AHA', 'American Heart Association', 'AHA guidelines'],
	'ACC': ['ACC', 'American College of Cardiology'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence'],
	'WHO': ['WHO', 'World Health Organization'],
	'ATP III': ['ATP III', 'Adult Treatment Panel III', 'NCEP ATP III'],
	'JNC': ['JNC', 'Joint National Committee', 'JNC 8', 'JNC 7'],
	'CHEP': ['CHEP', 'Canadian Hypertension Education Program'],
	'CCS': ['CCS', 'Canadian Cardiovascular Society']
	},

	# Infectious Diseases
	'infectious_disease': {
	'IDSA': ['IDSA', 'Infectious Diseases Society of America', 'IDSA guidelines'],
	'ATS': ['ATS', 'American Thoracic Society', 'ATS/IDSA'],
	'CDC': ['CDC', 'Centers for Disease Control and Prevention', 'CDC guidelines'],
	'WHO': ['WHO', 'World Health Organization', 'WHO guidelines'],
	'ECDC': ['ECDC', 'European Centre for Disease Prevention and Control'],
	'SHEA': ['SHEA', 'Society for Healthcare Epidemiology of America'],
	'ESCMID': ['ESCMID', 'European Society of Clinical Microbiology and Infectious Diseases'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence']
	},

	# Oncology
	'oncology': {
	'NCCN': ['NCCN', 'National Comprehensive Cancer Network', 'NCCN guidelines'],
	'ASCO': ['ASCO', 'American Society of Clinical Oncology', 'ASCO guidelines'],
	'ESMO': ['ESMO', 'European Society for Medical Oncology', 'ESMO guidelines'],
	'AJCC': ['AJCC', 'American Joint Committee on Cancer'],
	'WHO': ['WHO', 'World Health Organization'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence']
	},

	# Neurology
	'neurology': {
	'AAN': ['AAN', 'American Academy of Neurology', 'AAN guidelines'],
	'EFNS': ['EFNS', 'European Federation of Neurological Societies'],
	'EAN': ['EAN', 'European Academy of Neurology'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence'],
	'WHO': ['WHO', 'World Health Organization']
	},

	# Internal Medicine (General)
	'internal_medicine': {
	'ACP': ['ACP', 'American College of Physicians', 'ACP guidelines'],
	'ACC/AHA': ['ACC/AHA', 'American College of Cardiology/American Heart Association'],
	'IDSA': ['IDSA', 'Infectious Diseases Society of America'],
	'ATS': ['ATS', 'American Thoracic Society'],
	'ADA': ['ADA', 'American Diabetes Association'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence'],
	'WHO': ['WHO', 'World Health Organization'],
	'USPSTF': ['USPSTF', 'U.S. Preventive Services Task Force']
	},

	# Pulmonology
	'pulmonology': {
	'ATS': ['ATS', 'American Thoracic Society', 'ATS guidelines'],
	'ERS': ['ERS', 'European Respiratory Society'],
	'GOLD': ['GOLD', 'Global Initiative for Chronic Obstructive Lung Disease'],
	'GINA': ['GINA', 'Global Initiative for Asthma'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence']
	},

	# Gastroenterology
	'gastroenterology': {
	'AGA': ['AGA', 'American Gastroenterological Association', 'AGA guidelines'],
	'ACG': ['ACG', 'American College of Gastroenterology'],
	'UEG': ['UEG', 'United European Gastroenterology'],
	'ESGE': ['ESGE', 'European Society of Gastrointestinal Endoscopy'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence']
	},

	# Nephrology
	'nephrology': {
	'KDIGO': ['KDIGO', 'Kidney Disease Improving Global Outcomes', 'KDIGO guidelines'],
	'NKF': ['NKF', 'National Kidney Foundation', 'NKF/KDOQI'],
	'KDOQI': ['KDOQI', 'Kidney Disease Outcomes Quality Initiative'],
	'ERA': ['ERA', 'European Renal Association'],
	'NICE': ['NICE', 'National Institute for Health and Care Excellence']
	},

	# Hematology
	'hematology': {
	'ASH': ['ASH', 'American Society of Hematology', 'ASH guidelines'],
	'ESMO': ['ESMO', 'European Society for Medical Oncology'],
	'NCCN': ['NCCN', 'National Comprehensive Cancer Network'],
	'WHO': ['WHO', 'World Health Organization']
	}
	}

	# Domain-specific critical guidelines that should be mentioned
	CRITICAL_GUIDELINES = {
	'endocrinology': ['ADA', 'IDF', 'EASD', 'AACE', 'NICE'],
	'cardiology': ['ACC/AHA', 'ESC', 'AHA', 'NICE'],
	'infectious_disease': ['IDSA', 'ATS', 'CDC', 'WHO'],
	'oncology': ['NCCN', 'ASCO', 'ESMO'],
	'diabetes': ['ADA', 'IDF', 'EASD', 'AACE'],
	'hypertension': ['ACC/AHA', 'ESC', 'JNC', 'NICE'],
	'hyperlipidemia': ['ACC/AHA', 'ESC', 'NICE', 'ATP III'],
	'heart_failure': ['ACC/AHA', 'ESC', 'NICE'],
	'pneumonia': ['IDSA', 'ATS', 'CDC'],
	'sepsis': ['SSC', 'IDSA', 'WHO'],
	'COPD': ['GOLD', 'ATS', 'ERS', 'NICE'],
	'asthma': ['GINA', 'ATS', 'ERS', 'NICE']
	}

	@staticmethod
	def detect_guidelines(papers: List[Dict], domain: str, query: str) -> Dict[str, Any]:
	"""Detect guideline citations in papers and identify missing critical ones"""
	if not papers:
	return {
	'guidelines_found': [],
	'critical_missing': [],
	'guideline_coverage': 'NO_PAPERS',
	'recommendation': 'No papers available for guideline analysis'
	}

	# Get relevant guideline database for domain
	domain_guidelines = GuidelineDetector.GUIDELINE_DATABASES.get(domain, {})

	# Also check related domains
	all_guidelines = {}
	if domain in GuidelineDetector.GUIDELINE_DATABASES:
	all_guidelines.update(GuidelineDetector.GUIDELINE_DATABASES[domain])

	# Check for condition-specific guidelines
	query_lower = query.lower()
	condition_guidelines = []

	if 'diabetes' in query_lower or 'glycemic' in query_lower:
	condition_guidelines.extend(['ADA', 'IDF', 'EASD', 'AACE'])
	if 'hypertension' in query_lower or 'blood pressure' in query_lower:
	condition_guidelines.extend(['ACC/AHA', 'ESC', 'JNC', 'NICE'])
	if 'hyperlipidemia' in query_lower or 'cholesterol' in query_lower or 'lipid' in query_lower:
	condition_guidelines.extend(['ACC/AHA', 'ESC', 'ATP III', 'NICE'])
	if 'heart failure' in query_lower:
	condition_guidelines.extend(['ACC/AHA', 'ESC', 'NICE'])
	if 'pneumonia' in query_lower:
	condition_guidelines.extend(['IDSA', 'ATS', 'CDC'])
	if 'sepsis' in query_lower:
	condition_guidelines.extend(['SSC', 'IDSA', 'WHO'])
	if 'copd' in query_lower or 'chronic obstructive' in query_lower:
	condition_guidelines.extend(['GOLD', 'ATS', 'ERS'])
	if 'asthma' in query_lower:
	condition_guidelines.extend(['GINA', 'ATS', 'ERS'])

	# Merge domain and condition guidelines
	guidelines_to_check = {}
	for guideline in set(condition_guidelines):
	for key, value in all_guidelines.items():
	if guideline == key or guideline in key:
	guidelines_to_check[key] = value

	# If no specific condition guidelines, use domain guidelines
	if not guidelines_to_check and domain_guidelines:
	guidelines_to_check = domain_guidelines

	# Scan papers for guideline mentions
	found_guidelines = set()
	papers_with_guidelines = []

	for paper in papers:
	text = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower()
	paper_guidelines = []

	for guideline_name, patterns in guidelines_to_check.items():
	for pattern in patterns:
	pattern_lower = pattern.lower()
	if pattern_lower in text:
	found_guidelines.add(guideline_name)
	if guideline_name not in paper_guidelines:
	paper_guidelines.append(guideline_name)

	if paper_guidelines:
	papers_with_guidelines.append({
	'title': paper.get('title', 'Untitled')[:100],
	'guidelines': paper_guidelines
	})

	# Determine critical missing guidelines
	critical_missing = []
	if condition_guidelines:
	# Check condition-specific critical guidelines
	for guideline in condition_guidelines:
	if guideline not in found_guidelines:
	critical_missing.append(guideline)
	elif domain_guidelines:
	# Check domain-specific critical guidelines
	critical_for_domain = GuidelineDetector.CRITICAL_GUIDELINES.get(domain, [])
	for guideline in critical_for_domain:
	if guideline not in found_guidelines:
	critical_missing.append(guideline)

	# Calculate coverage score
	coverage_percentage = 0
	if guidelines_to_check:
	coverage_percentage = (len(found_guidelines) / len(guidelines_to_check)) * 100
	if coverage_percentage >= 75:
	coverage = 'HIGH'
	elif coverage_percentage >= 50:
	coverage = 'MODERATE'
	elif coverage_percentage >= 25:
	coverage = 'LOW'
	else:
	coverage = 'VERY_LOW'
	else:
	coverage = 'UNKNOWN'

	# Generate recommendation
	if critical_missing:
	if len(critical_missing) >= 3:
	recommendation = f"Missing explicit guideline citations ({', '.join(critical_missing[:3])}...)"
	else:
	recommendation = f"Missing explicit guideline citations ({', '.join(critical_missing)})"
	elif found_guidelines:
	recommendation = f"Guideline coverage: {len(found_guidelines)}/{len(guidelines_to_check) if guidelines_to_check else 'N/A'} major guidelines referenced"
	else:
	recommendation = "No explicit guideline citations detected"

	return {
	'guidelines_found': sorted(list(found_guidelines)),
	'critical_missing': critical_missing,
	'guideline_coverage': coverage,
	'recommendation': recommendation,
	'papers_with_guidelines': papers_with_guidelines[:5], # Top 5 papers with guidelines
	'total_guidelines_checked': len(guidelines_to_check),
	'coverage_percentage': round(coverage_percentage, 1) if guidelines_to_check else 0
	}

	# ADD THIS AT THE VERY TOP OF rag_engine.py (before any other imports)
	import sys
	import os

	# Add the project root to Python path
	current_dir = os.path.dirname(os.path.abspath(__file__))
	project_root = os.path.dirname(current_dir) # This goes from chat/ to MedSearchPro/

	if project_root not in sys.path:
	sys.path.insert(0, project_root)
	print(f"✅ Added project root to sys.path: {project_root}")


	# Import existing modules
	try:
	from processing.vector_store import VectorStore
	from chat.summarizer import MultiDocumentSummarizer
	from chat.single_paper_summarizer import SinglePaperSummarizer
	from chat.comparator import CrossPaperComparator
	from chat.gap_analyzer import ResearchGapAnalyzer
	from lib.memory_manager import ConversationMemory
	from llm.llm_provider import XAIGrokProvider, GrokLLM
	except ImportError as e:
	print(f"⚠️ Some dependencies not found - using simplified mode: {e}")

	# Set fallback values for all imported modules
	VectorStore = None
	MultiDocumentSummarizer = None
	SinglePaperSummarizer = None
	CrossPaperComparator = None
	ResearchGapAnalyzer = None
	ConversationMemory = None
	GrokLLM = None

	# Import config for domain mapping
	try:
	from config.domains import (
	get_domain_display_name, get_domain_description,
	validate_domain, get_all_domains
	)

	CONFIG_AVAILABLE = True
	except ImportError:
	print("⚠️ config.domains not found - using fallback domain info")
	CONFIG_AVAILABLE = False


	# ============================================================================
	# ROLE-BASED REASONING FOR MEDICAL RESEARCH
	# ============================================================================

	class RoleBasedReasoning:
	"""Role-based reasoning technique focused on domain-agnostic, role-appropriate responses"""

	# Role definitions with domain-agnostic prompts
	ROLE_SYSTEM_PROMPTS = {
	'patient': {
	'name': 'Patient',
	'icon': '🩺',
	'prompt': '''You are helping a patient understand information. Use simple, clear, reassuring language.
	- Focus on practical implications and what they need to know
	- Avoid complex terminology or jargon
	- Emphasize safety and when to seek professional help
	- Be compassionate and supportive
	- Do not provide diagnoses or specific medical advice
	- Explain concepts in everyday terms'''
	},
	'student': {
	'name': 'Student',
	'icon': '🎓',
	'prompt': '''You are teaching a student. Focus on educational value and understanding.
	- Explain foundational concepts and definitions
	- Provide examples and analogies
	- Encourage critical thinking and questions
	- Structure information logically
	- Connect to broader knowledge areas
	- Mention learning resources when helpful'''
	},
	'clinician': {
	'name': 'Clinician',
	'icon': '👨‍⚕️',
	'prompt': '''You are assisting a healthcare professional. Be concise, actionable, and evidence-based.
	- Focus on practical implications and decision-making
	- Reference guidelines and evidence levels when relevant
	- Consider workflow and implementation
	- Be precise but efficient with time
	- Address risks and benefits clearly
	- Maintain professional tone'''
	},
	'doctor': {
	'name': 'Doctor',
	'icon': '⚕️',
	'prompt': '''You are assisting a physician. Use appropriate terminology and clinical reasoning.
	- Focus on differential diagnosis, treatment options, and management
	- Reference current standards of care and guidelines
	- Consider patient factors and comorbidities
	- Discuss evidence quality and limitations
	- Be thorough but organized
	- Maintain clinical accuracy'''
	},
	'researcher': {
	'name': 'Researcher',
	'icon': '🔬',
	'prompt': '''You are assisting a research scientist. Focus on methodology and evidence.
	- Discuss study designs, methods, and limitations
	- Analyze evidence quality and gaps
	- Consider statistical significance and clinical relevance
	- Reference current literature and trends
	- Discuss implications for future research
	- Maintain scientific rigor'''
	},
	'professor': {
	'name': 'Professor',
	'icon': '📚',
	'prompt': '''You are assisting an academic educator. Focus on knowledge synthesis and pedagogy.
	- Provide comprehensive overviews with context
	- Compare theories, methods, and findings
	- Discuss historical development and future directions
	- Emphasize critical evaluation and synthesis
	- Connect to broader academic discourse
	- Support teaching and learning objectives'''
	},
	'pharmacist': {
	'name': 'Pharmacist',
	'icon': '💊',
	'prompt': '''You are assisting a pharmacy professional. Focus on medications and safety.
	- Discuss drug mechanisms, interactions, and pharmacokinetics
	- Emphasize safety profiles and monitoring
	- Consider dosing, administration, and compliance
	- Address patient counseling points
	- Reference formularies and guidelines
	- Maintain focus on medication optimization'''
	},
	'general': {
	'name': 'General User',
	'icon': '👤',
	'prompt': '''You are assisting a general user. Provide balanced, accessible information.
	- Adjust complexity based on the query
	- Be helpful and informative without overwhelming
	- Provide context and practical implications
	- Use clear language with minimal jargon
	- Consider diverse backgrounds and knowledge levels
	- Maintain neutral, objective tone'''
	}
	}

	@staticmethod
	def create_role_prompt(query: str, domain: str, role: str,
	papers_count: int = 0, guideline_info: Dict = None) -> str:
	"""Create role-appropriate prompt with domain-agnostic focus"""

	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	# Simple query detection - greetings and basic questions
	simple_queries = ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good afternoon',
	'good evening', 'how are you', "what's up", 'sup']
	query_lower = query.lower().strip()

	if query_lower in simple_queries or len(query.split()) <= 2:
	# Simple greeting or short query
	if role == 'patient':
	return f"""You are helping a patient. Use warm, reassuring tone.

	Query: {query}

	Respond with a friendly greeting and invitation to ask questions. Keep it brief and welcoming.
	Example: "Hello! I'm here to help answer your health questions in simple, clear terms. What would you like to know?"""

	elif role == 'student':
	return f"""You are teaching a student.

	Query: {query}

	Respond with an encouraging greeting that invites learning questions.
	Example: "Hi there! I'm here to help you learn about medical topics. What are you curious about today?"""

	elif role in ['clinician', 'doctor']:
	return f"""You are assisting a healthcare professional.

	Query: {query}

	Respond with a professional greeting appropriate for clinical setting.
	Example: "Hello. I'm ready to assist with evidence-based medical information. How can I help you today?"""

	elif role in ['researcher', 'professor']:
	return f"""You are assisting an academic professional.

	Query: {query}

	Respond with a scholarly greeting that invites research questions.
	Example: "Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?"""

	elif role == 'pharmacist':
	return f"""You are assisting a pharmacy professional.

	Query: {query}

	Respond with a professional greeting focused on medication information.
	Example: "Hello. I can help with medication-related questions and information. How can I assist you today?"""

	else: # general
	return f"""You are assisting a general user.

	Query: {query}

	Respond with a friendly, welcoming greeting.
	Example: "Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?"""

	# For substantive queries, use role-appropriate analysis
	role_prompt = role_info['prompt']

	# Domain-agnostic instruction
	domain_agnostic = f"""DOMAIN-AGNOSTIC APPROACH:
	- This system can answer questions from ANY domain (tech, finance, health, education, general)
	- Adapt your expertise to the query domain naturally
	- Do NOT force medical framing on non-medical questions
	- Only emphasize citations/guidelines when the query domain and role demand it
	- Use appropriate terminology for the query's domain"""

	# Build comprehensive prompt
	prompt = f"""ROLE: {role_info['name']} {role_info['icon']}
	{role_prompt}

	{domain_agnostic}

	QUERY: {query}
	QUERY DOMAIN CONTEXT: {domain} (adapt your response appropriately)

	RESPONSE GUIDELINES:
	1. Role-Appropriate Depth:
	- {role}: Adjust response complexity for {role_info['name'].lower()} needs

	2. Terminology Level:
	- Use language appropriate for {role_info['name'].lower()} understanding

	3. Evidence Awareness:
	- { 'Reference evidence/guidelines when domain-appropriate' if role in ['clinician', 'doctor', 'researcher'] else 'Mention evidence when helpful, not required' }

	4. Safety & Practicality:
	- { 'Include appropriate disclaimers' if role == 'patient' else 'Maintain professional standards' }

	5. Response Structure:
	- Organize information logically for {role_info['name'].lower()} understanding
	- Prioritize most relevant information first
	- Keep response focused and actionable"""

	# Add research context if we have papers
	if papers_count > 0:
	prompt += f"\n\nRESEARCH CONTEXT: Analyzing {papers_count} relevant sources"

	# Add guideline context if available
	if guideline_info:
	if guideline_info.get('guidelines_found'):
	prompt += f"\nGUIDELINES REFERENCED: {', '.join(guideline_info['guidelines_found'])}"
	if guideline_info.get('critical_missing'):
	prompt += f"\nGUIDELINE GAPS: Missing {', '.join(guideline_info['critical_missing'][:2])}"

	prompt += f"\n\nPlease provide a {role_info['name'].lower()}-appropriate response to: {query}"

	return prompt


	# ============================================================================
	# DOMAIN CONFIGURATION
	# ============================================================================

	# Domain descriptions (fallback if config.domains not available)
	DOMAIN_INFO = {
	'internal_medicine': {
	'name': 'Internal Medicine',
	'icon': '🏥',
	'description': 'General internal medicine and diagnosis'
	},
	'endocrinology': {
	'name': 'Endocrinology',
	'icon': '🧬',
	'description': 'Hormonal and metabolic disorders'
	},
	'cardiology': {
	'name': 'Cardiology',
	'icon': '❤️',
	'description': 'Heart and cardiovascular diseases'
	},
	'neurology': {
	'name': 'Neurology',
	'icon': '🧠',
	'description': 'Brain and nervous system disorders'
	},
	'oncology': {
	'name': 'Oncology',
	'icon': '🦠',
	'description': 'Cancer research and treatment'
	},
	'infectious_disease': {
	'name': 'Infectious Diseases',
	'icon': '🦠',
	'description': 'Infectious diseases and microbiology'
	},
	'clinical_research': {
	'name': 'Clinical Research',
	'icon': '📊',
	'description': 'Clinical trials and evidence-based medicine'
	},
	'general_medical': {
	'name': 'General Medical',
	'icon': '⚕️',
	'description': 'General medical research'
	},
	'pulmonology': {
	'name': 'Pulmonology',
	'icon': '🫁',
	'description': 'Respiratory diseases and lung health'
	},
	'gastroenterology': {
	'name': 'Gastroenterology',
	'icon': '🍽️',
	'description': 'Digestive system disorders'
	},
	'nephrology': {
	'name': 'Nephrology',
	'icon': '🫘',
	'description': 'Kidney diseases and disorders'
	},
	'hematology': {
	'name': 'Hematology',
	'icon': '🩸',
	'description': 'Blood disorders and hematologic diseases'
	},
	'surgery': {
	'name': 'Surgery',
	'icon': '🔪',
	'description': 'Surgical procedures and interventions'
	},
	'orthopedics': {
	'name': 'Orthopedics',
	'icon': '🦴',
	'description': 'Musculoskeletal disorders and injuries'
	},
	'urology': {
	'name': 'Urology',
	'icon': '🚽',
	'description': 'Urinary tract and male reproductive system'
	},
	'ophthalmology': {
	'name': 'Ophthalmology',
	'icon': '👁️',
	'description': 'Eye diseases and vision disorders'
	},
	'dermatology': {
	'name': 'Dermatology',
	'icon': '🦋',
	'description': 'Skin diseases and disorders'
	},
	'psychiatry': {
	'name': 'Psychiatry',
	'icon': '🧘',
	'description': 'Mental health and psychiatric disorders'
	},
	'obstetrics_gynecology': {
	'name': 'Obstetrics & Gynecology',
	'icon': '🤰',
	'description': "Women's health and reproductive medicine"
	},
	'pediatrics': {
	'name': 'Pediatrics',
	'icon': '👶',
	'description': 'Child health and pediatric medicine'
	},
	'emergency_medicine': {
	'name': 'Emergency Medicine',
	'icon': '🚑',
	'description': 'Emergency care and acute medicine'
	},
	'critical_care': {
	'name': 'Critical Care Medicine',
	'icon': '🏥',
	'description': 'Intensive care and critical care medicine'
	},
	'pathology': {
	'name': 'Pathology',
	'icon': '🔬',
	'description': 'Disease diagnosis and laboratory medicine'
	},
	'laboratory_medicine': {
	'name': 'Laboratory Medicine',
	'icon': '🧪',
	'description': 'Clinical laboratory testing and diagnostics'
	},
	'medical_imaging': {
	'name': 'Medical Imaging & Radiology AI',
	'icon': '📷',
	'description': 'Medical imaging and radiological diagnosis'
	},
	'bioinformatics': {
	'name': 'Bioinformatics',
	'icon': '💻',
	'description': 'Computational biology and data analysis'
	},
	'genomics': {
	'name': 'Genomics & Sequencing',
	'icon': '🧬',
	'description': 'Genomic research and sequencing technologies'
	},
	'pharmacology': {
	'name': 'Pharmacology',
	'icon': '💊',
	'description': 'Drug research and pharmacology'
	},
	'public_health': {
	'name': 'Public Health Analytics',
	'icon': '🌍',
	'description': 'Public health and epidemiology'
	},
	'pain_medicine': {
	'name': 'Pain Medicine',
	'icon': '🩹',
	'description': 'Pain management and treatment'
	},
	'nutrition': {
	'name': 'Nutrition',
	'icon': '🍎',
	'description': 'Nutritional science and dietetics'
	},
	'allergy_immunology': {
	'name': 'Allergy & Immunology',
	'icon': '🤧',
	'description': 'Allergies and immune system disorders'
	},
	'rehabilitation_medicine': {
	'name': 'Rehabilitation Medicine',
	'icon': '♿',
	'description': 'Physical medicine and rehabilitation'
	},
	'auto': {
	'name': 'Auto-detect',
	'icon': '🔍',
	'description': 'Automatic domain detection'
	}
	}

	# User context information
	USER_CONTEXT_INFO = {
	'clinician': {
	'name': 'Clinician',
	'icon': '👨‍⚕️',
	'description': 'Medical doctors, nurses, and healthcare providers'
	},
	'researcher': {
	'name': 'Researcher',
	'icon': '🔬',
	'description': 'Academic researchers and scientists'
	},
	'student': {
	'name': 'Student',
	'icon': '🎓',
	'description': 'Medical students and trainees'
	},
	'patient': {
	'name': 'Patient',
	'icon': '👤',
	'description': 'Patients and general public'
	},
	'general': {
	'name': 'General',
	'icon': '👤',
	'description': 'General audience'
	}
	}


	# ============================================================================
	# CORE COMPONENTS
	# ============================================================================

	class PaperRanker:
	"""Rank papers by relevance to query and domain"""

	def __init__(self):
	self.query_cache = {}

	def rank_papers(self, papers: List[Dict], query: str, domain: str = None,
	user_context: str = "general") -> List[Dict]:
	"""Rank papers by relevance with guideline bonus"""
	if not papers:
	return []

	scored_papers = []
	query_lower = query.lower()
	query_words = set(query_lower.split())

	for paper in papers:
	score = 0

	# Title relevance
	title = paper.get('title', '').lower()
	if title:
	if query_lower in title:
	score += 100
	score += sum(10 for word in query_words if word in title)

	# Abstract relevance
	abstract = paper.get('abstract', '').lower()
	if abstract:
	score += sum(5 for word in query_words if word in abstract)

	# Domain relevance
	if domain and domain in DOMAIN_INFO:
	domain_keywords = [
	domain.split('_')[0], # First part of domain name
	domain.replace('_', ' ')
	]
	for keyword in domain_keywords:
	if keyword in abstract or keyword in title:
	score += 15

	# Guideline mention bonus
	text = f"{title} {abstract}"
	guideline_keywords = ['guideline', 'recommendation', 'consensus', 'position statement',
	'ada', 'aha', 'acc', 'esc', 'idsa', 'ats', 'nccn', 'nice', 'who']
	if any(keyword in text for keyword in guideline_keywords):
	score += 30 # Bonus for guideline-related papers

	# Recency bonus
	year = self._extract_year(paper)
	if year:
	current_year = datetime.now().year
	age = current_year - year
	if age <= 2:
	score += 20
	elif age <= 5:
	score += 10

	# Source quality
	source = paper.get('source', '').lower()
	high_quality_sources = ['pubmed', 'nejm', 'lancet', 'jama', 'nature', 'science',
	'circulation', 'jacc', 'jco', 'nejm', 'cell', 'bmj']
	if any(hq_source in source for hq_source in high_quality_sources):
	score += 15

	paper['relevance_score'] = score
	scored_papers.append((score, paper))

	# Sort by score
	scored_papers.sort(reverse=True, key=lambda x: x[0])

	# Normalize scores
	if scored_papers:
	max_score = scored_papers[0][0]
	if max_score > 0:
	for _, paper in scored_papers:
	paper['normalized_score'] = int((paper['relevance_score'] / max_score) * 100)

	return [paper for _, paper in scored_papers]

	def _extract_year(self, paper: Dict) -> Optional[int]:
	"""Extract year from paper"""
	pub_date = paper.get('publication_date', '')
	if pub_date and isinstance(pub_date, str):
	year_pattern = re.search(r'\b(19\|20)\d{2}\b', pub_date)
	if year_pattern:
	try:
	return int(year_pattern.group())
	except:
	pass

	# Try other date fields
	date_field = paper.get('date', '')
	if date_field and isinstance(date_field, str):
	year_pattern = re.search(r'\b(19\|20)\d{2}\b', date_field)
	if year_pattern:
	try:
	return int(year_pattern.group())
	except:
	pass

	return None


	class ConfidenceScorer:
	"""Calculate confidence scores for medical evidence with guideline consideration"""

	def __init__(self):
	self.score_weights = {
	'paper_count': 0.15,
	'paper_quality': 0.20,
	'study_design': 0.20,
	'evidence_recency': 0.15,
	'source_diversity': 0.10,
	'domain_relevance': 0.10,
	'guideline_alignment': 0.10 # New: Guideline alignment score
	}

	def calculate_confidence(self, papers: List[Dict], query: str,
	analysis_type: str = "summary",
	user_context: str = "general",
	domain: str = "general_medical",
	guideline_info: Dict = None) -> Dict[str, Any]:
	"""Calculate confidence score with guideline consideration"""

	if not papers:
	return {
	'overall_score': 0,
	'level': 'VERY LOW ⚫',
	'explanation': 'No supporting evidence',
	'factors': {}
	}

	# Calculate factors
	factors = {
	'paper_count': self._score_paper_count(papers),
	'paper_quality': self._score_paper_quality(papers),
	'study_design': self._score_study_design(papers),
	'evidence_recency': self._score_evidence_recency(papers),
	'source_diversity': self._score_source_diversity(papers),
	'domain_relevance': self._score_domain_relevance(papers, domain),
	'guideline_alignment': self._score_guideline_alignment(papers, domain, query, guideline_info)
	}

	# Calculate overall score
	overall_score = 0
	for factor, weight in self.score_weights.items():
	if factor in factors:
	overall_score += factors[factor] * weight

	# Generate explanation with guideline context
	explanation = self._generate_explanation(factors, overall_score, domain, guideline_info)

	return {
	'overall_score': round(min(100, overall_score), 1),
	'level': self._score_to_level(overall_score),
	'explanation': explanation,
	'factors': {k: round(v, 1) for k, v in factors.items()},
	'guideline_info': guideline_info
	}

	def _score_guideline_alignment(self, papers: List[Dict], domain: str,
	query: str, guideline_info: Dict = None) -> float:
	"""Score based on guideline alignment"""
	if not guideline_info:
	return 50 # Neutral if no guideline info

	if guideline_info.get('guideline_coverage') == 'NO_PAPERS':
	return 0

	# Base score on coverage percentage
	coverage_pct = guideline_info.get('coverage_percentage', 0)

	# Penalize for missing critical guidelines
	critical_missing = len(guideline_info.get('critical_missing', []))
	if critical_missing > 0:
	penalty = critical_missing * 10
	coverage_pct = max(0, coverage_pct - penalty)

	# Consider guideline relevance based on domain
	if domain in ['endocrinology', 'cardiology', 'infectious_disease']:
	# Guidelines are critical for these domains
	return min(100, coverage_pct)
	elif domain in ['oncology', 'neurology', 'internal_medicine']:
	# Guidelines are important but not always critical
	return min(100, coverage_pct * 0.9)
	else:
	# Guidelines are less critical
	return min(100, coverage_pct * 0.7)

	def _score_paper_count(self, papers: List[Dict]) -> float:
	"""Score based on number of papers"""
	count = len(papers)
	if count >= 10:
	return 100
	elif count >= 7:
	return 85
	elif count >= 5:
	return 70
	elif count >= 3:
	return 50
	elif count >= 1:
	return 30
	return 0

	def _score_paper_quality(self, papers: List[Dict]) -> float:
	"""Score based on paper quality"""
	if not papers:
	return 0

	scores = []
	for paper in papers[:10]:
	score = 50

	# Source quality
	source = paper.get('source', '').lower()
	high_quality_sources = ['pubmed', 'nejm', 'lancet', 'jama', 'nature', 'science',
	'circulation', 'jacc', 'jco', 'cell', 'bmj', 'springer']
	if any(hq_source in source for hq_source in high_quality_sources):
	score += 20

	# Journal quality
	journal = paper.get('journal', '').lower()
	if any(hq_journal in journal for hq_journal in high_quality_sources):
	score += 10

	# Citations (if available)
	citations = paper.get('citations', 0)
	if citations > 100:
	score += 15
	elif citations > 20:
	score += 10

	scores.append(min(100, score))

	return statistics.mean(scores) if scores else 50

	def _score_study_design(self, papers: List[Dict]) -> float:
	"""Score based on study design"""
	if not papers:
	return 0

	design_scores = {
	'RCT': 100,
	'randomized controlled trial': 100,
	'prospective cohort': 80,
	'retrospective cohort': 60,
	'case-control': 50,
	'review': 30,
	'meta-analysis': 90,
	'systematic review': 85,
	'case report': 20,
	'case series': 25,
	'guideline': 95,
	'consensus': 90
	}

	scores = []
	for paper in papers[:10]:
	abstract = paper.get('abstract', '').lower()
	title = paper.get('title', '').lower()
	text = abstract + " " + title
	paper_score = 30

	for design, score in design_scores.items():
	if design in text:
	paper_score = max(paper_score, score)

	scores.append(paper_score)

	return statistics.mean(scores) if scores else 30

	def _score_evidence_recency(self, papers: List[Dict]) -> float:
	"""Score based on recency"""
	if not papers:
	return 0

	current_year = datetime.now().year
	recent_papers = 0

	for paper in papers[:10]:
	year = self._extract_year(paper)
	if year and current_year - year <= 3:
	recent_papers += 1

	percentage = (recent_papers / min(10, len(papers))) * 100
	return min(100, percentage)

	def _score_source_diversity(self, papers: List[Dict]) -> float:
	"""Score based on source diversity"""
	sources = set()
	for paper in papers:
	source = paper.get('source', '')
	if source:
	sources.add(source.lower())

	unique_sources = len(sources)
	if unique_sources >= 4:
	return 100
	elif unique_sources >= 3:
	return 75
	elif unique_sources >= 2:
	return 50
	elif unique_sources == 1:
	return 25
	return 0

	def _score_domain_relevance(self, papers: List[Dict], domain: str) -> float:
	"""Score based on domain relevance"""
	if domain == "general_medical" or domain == "auto":
	return 50

	relevant_papers = 0
	domain_terms = domain.split('_')

	for paper in papers[:10]:
	abstract = paper.get('abstract', '').lower()
	title = paper.get('title', '').lower()
	text = abstract + " " + title

	# Check for domain terms
	matches = sum(1 for term in domain_terms if term in text)
	if matches >= 1:
	relevant_papers += 1

	percentage = (relevant_papers / min(10, len(papers))) * 100
	return min(100, percentage)

	def _extract_year(self, paper: Dict) -> Optional[int]:
	"""Extract year from paper"""
	pub_date = paper.get('publication_date', '')
	if pub_date and isinstance(pub_date, str):
	year_pattern = re.search(r'\b(19\|20)\d{2}\b', pub_date)
	if year_pattern:
	try:
	return int(year_pattern.group())
	except:
	pass

	return None

	def _score_to_level(self, score: float) -> str:
	"""Convert score to confidence level"""
	if score >= 80:
	return "HIGH 🟢"
	elif score >= 60:
	return "MODERATE 🟡"
	elif score >= 40:
	return "FAIR 🟠"
	else:
	return "LOW 🔴"

	def _generate_explanation(self, factors: Dict, score: float,
	domain: str, guideline_info: Dict = None) -> str:
	"""Generate explanation for confidence score with guideline context"""
	explanations = []

	if factors.get('paper_count', 0) >= 70:
	explanations.append("Strong evidence base")
	elif factors.get('paper_count', 0) <= 30:
	explanations.append("Limited evidence base")

	if factors.get('study_design', 0) >= 70:
	explanations.append("High-quality study designs")

	if factors.get('evidence_recency', 0) >= 70:
	explanations.append("Recent evidence")

	if factors.get('source_diversity', 0) >= 70:
	explanations.append("Diverse sources")

	# Add guideline-specific explanations
	if guideline_info:
	guideline_score = factors.get('guideline_alignment', 0)
	if guideline_score >= 70:
	if guideline_info.get('guidelines_found'):
	explanations.append(
	f"Good guideline coverage ({len(guideline_info['guidelines_found'])} referenced)")
	elif guideline_score <= 30:
	if guideline_info.get('critical_missing'):
	missing_str = ', '.join(guideline_info['critical_missing'][:3])
	explanations.append(f"Missing guideline citations ({missing_str})")

	if CONFIG_AVAILABLE:
	try:
	domain_name = get_domain_display_name(domain)
	except:
	domain_name = domain.replace('_', ' ').title()
	else:
	domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())

	explanation = f"{domain_name} confidence: {score:.1f}/100"

	if explanations:
	explanation += f". Factors: {'; '.join(explanations)}"

	return explanation


	class UserContextDetector:
	"""Detect user context from query"""

	def __init__(self):
	self.context_patterns = {
	"clinician": ['patient', 'clinical', 'treatment', 'diagnosis', 'therapy',
	'management', 'guidelines', 'recommend', 'prescribe'],
	"researcher": ['research', 'study', 'methodology', 'evidence', 'publication',
	'hypothesis', 'experiment', 'results', 'conclusions'],
	"student": ['learn', 'study', 'exam', 'textbook', 'course', 'education',
	'explain', 'understand', 'concept', 'basics'],
	"patient": ['i have', 'my symptoms', 'my doctor', 'my treatment', 'pain',
	'suffering', 'experience', 'diagnosed', 'medication']
	}

	def detect_context(self, query: str, domain: str = None) -> str:
	"""Detect user context from query"""
	query_lower = query.lower()

	# Check for explicit mentions
	if 'clinician' in query_lower or 'doctor' in query_lower or 'nurse' in query_lower:
	return "clinician"
	if 'researcher' in query_lower or 'scientist' in query_lower or 'academic' in query_lower:
	return "researcher"
	if 'student' in query_lower or 'trainee' in query_lower:
	return "student"
	if 'patient' in query_lower or 'i have' in query_lower or 'my ' in query_lower[:50]:
	return "patient"

	# Check patterns
	context_scores = {}
	for context_type, patterns in self.context_patterns.items():
	score = sum(1 for pattern in patterns if pattern in query_lower)
	if score > 0:
	context_scores[context_type] = score

	if context_scores:
	return max(context_scores.items(), key=lambda x: x[1])[0]

	return "general"


	# ============================================================================
	# MAIN RAG ENGINE
	# ============================================================================

	class EnhancedRAGEngine:
	"""Production-ready RAG engine for medical research with real paper fetching and guideline detection"""

	def __init__(self, vector_store=None, session_id: str = "default",
	model: str = "gpt-oss-120b", use_real_time: bool = True):
	print("🚀 Initializing Medical Research RAG Engine...")

	# Model mapping
	model_map = {
	"gpt-oss-120b": "gpt-oss-120b",
	"llama-70b": "llama-3-70b-instruct",
	"llama-8b": "llama-3-8b-instruct",
	"mixtral": "mixtral-8x7b-instruct"
	}

	self.model = model_map.get(model, model)
	print(f"📊 Using model: {self.model}")
	self.use_real_time = use_real_time

	# Initialize MedicalResearchEngine from main.py
	print("📚 Initializing MedicalResearchEngine...")
	try:
	# Import and create MedicalResearchEngine
	from main import MedicalResearchEngine
	self.research_engine = MedicalResearchEngine()
	print("✅ MedicalResearchEngine loaded successfully!")
	self.RESEARCH_ENGINE_AVAILABLE = True
	except ImportError as e:
	print(f"⚠️ MedicalResearchEngine import failed: {e}")
	print("⚠️ Using fallback mode - will generate demo papers")
	self.RESEARCH_ENGINE_AVAILABLE = False
	except Exception as e:
	print(f"⚠️ MedicalResearchEngine initialization failed: {e}")
	print("⚠️ Using fallback mode - will generate demo papers")
	self.RESEARCH_ENGINE_AVAILABLE = False

	# Initialize LLM
	try:
	# Ensure we can import from llm directory
	import sys
	import os

	# Get the project root (MedSearchPro/
	current_dir = os.path.dirname(os.path.abspath(__file__)) # /MedSearchPro/chat/
	project_root = os.path.dirname(current_dir)

	if project_root not in sys.path:
	sys.path.insert(0, project_root)

	# Now importing GrokLLM
	from llm.llm_provider import GrokLLM

	# Initialize LLM components
	self.llm = GrokLLM(model=self.model)
	print(f"✅ LLM loaded: {self.llm.get_active_provider()}")
	except Exception as e:
	print(f"⚠️ LLM not available - using fallback mode: {e}")
	self.llm = None

	self.role_reasoning = RoleBasedReasoning() # NEW: Role-based reasoning
	self.ranker = PaperRanker()
	self.confidence_scorer = ConfidenceScorer()
	self.context_detector = UserContextDetector()
	self.guideline_detector = GuidelineDetector() # New: Guideline detector

	# Initialize RAG components
	try:
	self.vector_store = vector_store or VectorStore()
	except:
	self.vector_store = None
	print("⚠️ Vector store not available")

	try:
	self.single_paper_summarizer = SinglePaperSummarizer(model=self.model) if hasattr(self, 'llm') else None
	except:
	self.single_paper_summarizer = None

	# Memory
	try:
	self.memory = ConversationMemory(session_id=session_id)
	except:
	print("⚠️ ConversationMemory not available")
	self.memory = None

	# Metrics
	self.metrics = {
	'total_queries': 0,
	'average_confidence': 0,
	'domains_used': Counter(),
	'user_contexts': Counter(),
	'real_papers_fetched': 0,
	'demo_papers_used': 0,
	'guideline_coverage': [] # Track guideline coverage over time
	}

	print(f"✅ Medical Research RAG Engine Ready! Session: {session_id}")
	if self.RESEARCH_ENGINE_AVAILABLE:
	print(" 📄 Real paper fetching: ENABLED")
	else:
	print(" 📄 Real paper fetching: DISABLED (using demo papers)")
	print(" 📋 Guideline detection: ENABLED")
	print(" 👤 Role-based responses: ENABLED")

	def answer_research_question(self,
	query: str,
	domain: str = "general_medical",
	max_papers: int = 20,
	use_memory: bool = True,
	user_context: str = "auto",
	use_fallback: bool = False,
	role: str = "general", # NEW: Explicit role parameter
	role_system_prompt: str = None, # NEW: Custom role prompt from frontend
	**kwargs) -> Dict[str, Any]:
	"""Answer medical research questions with role-based reasoning"""

	start_time = time.time()
	self.metrics['total_queries'] += 1
	self.metrics['domains_used'][domain] += 1

	print(f"\n🔍 Processing query: '{query}'")
	print(f" Domain: {domain}")
	print(f" Role: {role}")
	print(f" Max papers: {max_papers}")
	print(f" Real-time search: {self.use_real_time}")

	try:
	# Auto-detect user context if needed (backward compatibility)
	if user_context == "auto":
	user_context = self.context_detector.detect_context(query, domain)

	self.metrics['user_contexts'][user_context] += 1

	# NEW: Check for simple queries first (greetings, basic questions)
	simple_response = self._handle_simple_query(query, domain, role)
	if simple_response:
	return simple_response

	# Check if query requires research analysis
	requires_research = self._requires_research_analysis(query)
	if not requires_research:
	# For non-research queries, provide direct role-appropriate response
	return self._handle_direct_query(query, domain, role)

	# Retrieve papers using MedicalResearchEngine
	print("📚 Retrieving relevant papers...")
	papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)

	if not papers:
	print("⚠️ No papers found, creating fallback response...")
	return self._create_no_results_response(query, domain, role)

	# Detect guideline citations
	print("📋 Detecting guideline citations...")
	guideline_info = self.guideline_detector.detect_guidelines(papers, domain, query)

	# Store guideline coverage for metrics
	if guideline_info:
	self.metrics['guideline_coverage'].append({
	'domain': domain,
	'coverage': guideline_info.get('coverage_percentage', 0),
	'guidelines_found': len(guideline_info.get('guidelines_found', [])),
	'critical_missing': len(guideline_info.get('critical_missing', [])),
	'timestamp': datetime.now().isoformat()
	})

	# Rank papers
	ranked_papers = self.ranker.rank_papers(papers, query, domain, role)
	print(f"📊 Papers found: {len(ranked_papers)}")

	# Track paper sources
	real_papers = sum(1 for p in ranked_papers if not p.get('is_demo', False))
	demo_papers = sum(1 for p in ranked_papers if p.get('is_demo', False))
	self.metrics['real_papers_fetched'] += real_papers
	self.metrics['demo_papers_used'] += demo_papers

	if demo_papers > 0:
	print(f" ⚠️ Includes {demo_papers} demo papers (real papers: {real_papers})")

	# Report guideline findings
	if guideline_info.get('guidelines_found'):
	print(f" 📋 Guidelines referenced: {', '.join(guideline_info['guidelines_found'])}")
	if guideline_info.get('critical_missing'):
	print(f" ⚠️ Missing guidelines: {', '.join(guideline_info['critical_missing'][:3])}")

	# Calculate confidence with guideline consideration
	confidence = self.confidence_scorer.calculate_confidence(
	ranked_papers, query, "summary", role, domain, guideline_info
	)

	# Generate analysis using role-based reasoning
	print("🧠 Generating role-based analysis...")
	analysis = self._generate_role_based_analysis(
	query, domain, role, ranked_papers, guideline_info, role_system_prompt
	)

	# Generate clinical bottom line with role awareness
	bottom_line = self._generate_role_bottom_line(
	query, domain, role, len(ranked_papers), real_papers, guideline_info
	)

	# Synthesize final answer
	final_answer = self._synthesize_role_answer(
	query, domain, role, analysis, ranked_papers,
	bottom_line, confidence, guideline_info
	)

	# Update memory
	if use_memory and self.memory:
	self._update_memory(query, final_answer, domain, role, ranked_papers, guideline_info)

	# Update metrics
	response_time = time.time() - start_time
	self.metrics['average_confidence'] = (
	(self.metrics['average_confidence'] * (self.metrics['total_queries'] - 1) +
	confidence['overall_score']) / self.metrics['total_queries']
	)

	print(f"✅ Analysis complete in {response_time:.2f}s")
	print(f" Confidence: {confidence['overall_score']}/100")
	print(f" Papers used: {len(ranked_papers)}")
	print(f" Real papers: {real_papers}, Demo papers: {demo_papers}")
	print(f" Guideline coverage: {guideline_info.get('coverage_percentage', 0)}%")

	return final_answer

	except Exception as e:
	print(f"❌ Error in research analysis: {e}")
	import traceback
	traceback.print_exc()
	return self._create_error_response(query, domain, role, str(e))

	def _handle_simple_query(self, query: str, domain: str, role: str) -> Optional[Dict[str, Any]]:
	"""Handle simple queries like greetings with role-appropriate responses"""
	query_lower = query.lower().strip()

	# Simple greetings
	simple_greetings = ['hi', 'hello', 'hey', 'greetings', 'good morning',
	'good afternoon', 'good evening', 'howdy']

	if query_lower in simple_greetings:
	print(" 👋 Detected simple greeting")
	return self._create_greeting_response(query, domain, role)

	# Very short queries (1-2 words) that aren't research questions
	if len(query.split()) <= 2 and not self._looks_like_research_query(query):
	print(" 💬 Detected simple query")
	return self._create_simple_response(query, domain, role)

	return None

	def _looks_like_research_query(self, query: str) -> bool:
	"""Check if query looks like a research question"""
	query_lower = query.lower()

	# Research question indicators
	research_indicators = [
	'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis',
	'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical',
	'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis',
	'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk',
	'benefit', 'recommendation', 'guideline', 'standard', 'protocol'
	]

	# Check if query contains research indicators
	for indicator in research_indicators:
	if indicator in query_lower:
	return True

	# Check question words
	question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who']
	if any(query_lower.startswith(word) for word in question_words):
	# Check if it's a complex question (more than basic)
	if len(query.split()) > 3:
	return True

	return False

	def _requires_research_analysis(self, query: str) -> bool:
	"""Determine if query requires full research analysis"""
	query_lower = query.lower().strip()

	# Definitely simple queries
	simple_patterns = [
	r'^hi$', r'^hello$', r'^hey$', r'^greetings$',
	r'^good morning$', r'^good afternoon$', r'^good evening$',
	r'^how are you$', r"^what's up$", r'^sup$',
	r'^thanks$', r'^thank you$', r'^bye$', r'^goodbye$'
	]

	for pattern in simple_patterns:
	if re.match(pattern, query_lower):
	return False

	# Check if it's a substantive question
	if len(query.split()) <= 2 and not self._looks_like_research_query(query):
	return False

	return True

	def _create_greeting_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
	"""Create role-appropriate greeting response"""
	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	# Role-specific greetings
	greetings = {
	'patient': "👋 Hello! I'm here to help you understand health topics in simple, clear terms. What would you like to know?",
	'student': "👋 Hi there! I'm here to help you learn about medical topics. What are you curious about today?",
	'clinician': "👋 Hello. I'm ready to assist with evidence-based medical information. How can I help you today?",
	'doctor': "👋 Hello, doctor. I'm available to discuss clinical questions and evidence. What would you like to explore?",
	'researcher': "👋 Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?",
	'professor': "👋 Hello. I can assist with academic discussions and evidence synthesis. What topic interests you?",
	'pharmacist': "👋 Hello. I can help with medication-related questions and information. How can I assist you today?",
	'general': "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?"
	}

	greeting = greetings.get(role, greetings['general'])

	if CONFIG_AVAILABLE:
	try:
	domain_info = {
	'name': get_domain_display_name(domain),
	'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
	}
	except:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})
	else:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})

	answer = f"""# {greeting}

	Role: {role_info['name']} {role_info['icon']}
	Domain: {domain_info['name']} {domain_info.get('icon', '')}

	Feel free to ask me anything! I'll provide information tailored to your needs as a {role_info['name'].lower()}."""

	return {
	"query": query,
	"domain": domain,
	"domain_info": domain_info,
	"user_context": role,
	"user_context_info": role_info,
	"answer": answer,
	"analysis": greeting,
	"bottom_line": greeting,
	"papers_used": 0,
	"real_papers_used": 0,
	"demo_papers_used": 0,
	"confidence_score": {
	'overall_score': 95.0,
	'level': 'HIGH 🟢',
	'explanation': 'Simple greeting response'
	},
	"guideline_info": None,
	"reasoning_method": "greeting",
	"real_time_search": self.use_real_time,
	"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
	"metrics": {
	'response_time': time.time(),
	'papers_analyzed': 0,
	'domain': domain,
	'user_context': role
	}
	}

	def _create_simple_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
	"""Create role-appropriate response for simple queries"""
	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	# Generate simple, direct response
	simple_responses = {
	'patient': f"I'd be happy to help with '{query}'. Could you tell me a bit more about what you're looking for?",
	'student': f"That's an interesting topic! To help you best, could you provide more details about what you'd like to know regarding '{query}'?",
	'clinician': f"Regarding '{query}', I can provide evidence-based information. Please share more specifics about your clinical question.",
	'doctor': f"For '{query}', I can offer medical information. Could you elaborate on the clinical context or specific aspects you're interested in?",
	'researcher': f"On the topic of '{query}', I can discuss research perspectives. What specific aspect would you like to explore?",
	'professor': f"Regarding '{query}', I can provide academic perspectives. What particular angle or detail would you like to discuss?",
	'pharmacist': f"About '{query}', I can offer medication-related information. Could you specify what you'd like to know?",
	'general': f"I can help with information about '{query}'. Could you provide more details about what specifically you're interested in?"
	}

	response = simple_responses.get(role, simple_responses['general'])

	if CONFIG_AVAILABLE:
	try:
	domain_info = {
	'name': get_domain_display_name(domain),
	'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
	}
	except:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})
	else:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})

	answer = f"""# 💬 Response

	Role: {role_info['name']} {role_info['icon']}
	Domain: {domain_info['name']} {domain_info.get('icon', '')}

	{response}

	Tip: For more detailed information, try asking a more specific question!"""

	return {
	"query": query,
	"domain": domain,
	"domain_info": domain_info,
	"user_context": role,
	"user_context_info": role_info,
	"answer": answer,
	"analysis": response,
	"bottom_line": response,
	"papers_used": 0,
	"real_papers_used": 0,
	"demo_papers_used": 0,
	"confidence_score": {
	'overall_score': 85.0,
	'level': 'HIGH 🟢',
	'explanation': 'Simple query response'
	},
	"guideline_info": None,
	"reasoning_method": "simple_response",
	"real_time_search": self.use_real_time,
	"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
	"metrics": {
	'response_time': time.time(),
	'papers_analyzed': 0,
	'domain': domain,
	'user_context': role
	}
	}

	def _handle_direct_query(self, query: str, domain: str, role: str) -> Dict[str, Any]:
	"""Handle direct queries without research papers"""
	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	# Use LLM for direct response if available
	if self.llm:
	try:
	prompt = self.role_reasoning.create_role_prompt(query, domain, role, 0, None)

	response = self.llm.generate(
	prompt,
	system_message=f"You are assisting a {role_info['name'].lower()}. Provide helpful, accurate information.",
	max_tokens=1000
	)

	# Clean up response
	response = response.strip()
	if not response:
	response = f"I'd be happy to help with '{query}'. Could you provide more details about what specifically you're looking for?"

	except Exception as e:
	print(f"⚠️ LLM direct response failed: {e}")
	response = f"I can help with information about '{query}'. Please feel free to ask more specific questions!"
	else:
	response = f"I'd be happy to discuss '{query}'. What specific aspect would you like to know more about?"

	if CONFIG_AVAILABLE:
	try:
	domain_info = {
	'name': get_domain_display_name(domain),
	'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
	}
	except:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})
	else:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})

	answer = f"""# 💬 Response

	Role: {role_info['name']} {role_info['icon']}
	Domain: {domain_info['name']} {domain_info.get('icon', '')}

	{response}

	Note: This is a direct response. For evidence-based research analysis with papers, please ask a more specific research question."""

	return {
	"query": query,
	"domain": domain,
	"domain_info": domain_info,
	"user_context": role,
	"user_context_info": role_info,
	"answer": answer,
	"analysis": response,
	"bottom_line": response,
	"papers_used": 0,
	"real_papers_used": 0,
	"demo_papers_used": 0,
	"confidence_score": {
	'overall_score': 80.0,
	'level': 'HIGH 🟢',
	'explanation': 'Direct query response without papers'
	},
	"guideline_info": None,
	"reasoning_method": "direct_response",
	"real_time_search": self.use_real_time,
	"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
	"metrics": {
	'response_time': time.time(),
	'papers_analyzed': 0,
	'domain': domain,
	'user_context': role
	}
	}

	def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
	use_fallback: bool = False) -> List[Dict]:
	"""Retrieve real papers using MedicalResearchEngine"""
	papers = []

	# Use MedicalResearchEngine if available
	if self.RESEARCH_ENGINE_AVAILABLE and self.use_real_time:
	try:
	print(f" 🔍 Using MedicalResearchEngine for real-time search...")

	# Use the research engine to get real papers
	result = self.research_engine.answer_user_query(
	user_query=query,
	domain=domain,
	use_real_time=True,
	use_fallback=use_fallback
	)

	# Extract papers from result
	if result and 'supporting_papers' in result:
	raw_papers = result['supporting_papers']

	for raw_paper in raw_papers[:max_papers]:
	# Transform to our paper format
	paper = {
	'id': raw_paper.get('paper_id') or
	raw_paper.get('doi') or
	raw_paper.get('pmid') or
	f"{hash(raw_paper.get('title', ''))}",
	'title': raw_paper.get('title', 'Untitled'),
	'abstract': raw_paper.get('abstract') or
	raw_paper.get('summary') or
	raw_paper.get('description', ''),
	'source': raw_paper.get('source', 'unknown'),
	'publication_date': raw_paper.get('publication_date') or
	raw_paper.get('date') or
	raw_paper.get('year', ''),
	'authors': self._parse_authors(raw_paper.get('authors', '')),
	'journal': raw_paper.get('journal', ''),
	'doi': raw_paper.get('doi', ''),
	'url': raw_paper.get('url') or raw_paper.get('link', ''),
	'citations': raw_paper.get('citation_count', 0) or
	raw_paper.get('citations', 0),
	'is_preprint': raw_paper.get('is_preprint', False),
	'is_fallback': raw_paper.get('is_fallback', False),
	'is_demo': False, # Real paper!
	'search_domain': domain
	}

	# Clean up the abstract
	if paper['abstract']:
	# Remove excessive whitespace
	paper['abstract'] = ' '.join(paper['abstract'].split())
	# Limit length
	if len(paper['abstract']) > 2000:
	paper['abstract'] = paper['abstract'][:2000] + "..."

	papers.append(paper)

	print(f" ✅ Retrieved {len(papers)} real papers from MedicalResearchEngine")

	else:
	print(f" ⚠️ No papers returned from MedicalResearchEngine")

	except Exception as e:
	print(f" ⚠️ MedicalResearchEngine failed: {e}")

	# Try vector store as fallback
	if len(papers) < max_papers // 2 and self.vector_store:
	try:
	print(f" 🔍 Trying vector store...")
	results = self.vector_store.search(
	query=query,
	domain=domain,
	n_results=max_papers - len(papers)
	)

	seen_ids = set([p['id'] for p in papers if p.get('id')])
	for result in results:
	paper_id = result['metadata'].get('paper_id')
	if paper_id and paper_id not in seen_ids:
	paper = {
	'id': paper_id,
	'title': result['metadata'].get('paper_title', ''),
	'abstract': result['text'],
	'source': result['metadata'].get('source', 'vector_store'),
	'publication_date': result['metadata'].get('publication_date', ''),
	'authors': result['metadata'].get('authors', '').split(',')
	if result['metadata'].get('authors') else [],
	'citations': result['metadata'].get('citations', 0),
	'is_demo': False
	}
	papers.append(paper)
	seen_ids.add(paper_id)

	print(f" ✅ Added {len(results)} papers from vector store")

	except Exception as e:
	print(f" ⚠️ Vector store search failed: {e}")

	# Generate demo papers only if we have very few real papers
	if len(papers) < max_papers // 3:
	needed = max_papers - len(papers)
	demo_papers = self._create_demo_papers(query, domain, needed)
	papers.extend(demo_papers)
	print(f" 📄 Added {len(demo_papers)} demo papers for illustration")

	return papers[:max_papers]

	def _parse_authors(self, authors_input) -> List[str]:
	"""Parse authors from various input formats"""
	if not authors_input:
	return []

	if isinstance(authors_input, list):
	return authors_input

	if isinstance(authors_input, str):
	# Try to split by common separators
	if ';' in authors_input:
	return [a.strip() for a in authors_input.split(';') if a.strip()]
	elif ',' in authors_input:
	# Check if it's "Last, First" format or just comma-separated names
	parts = [p.strip() for p in authors_input.split(',')]
	if len(parts) > 2: # Probably comma-separated names
	return parts
	else:
	# Might be "Last, First" format - return as is
	return [authors_input]
	else:
	return [authors_input]

	return []

	def _create_demo_papers(self, query: str, domain: str, count: int) -> List[Dict]:
	"""Create demo papers for illustration only"""
	papers = []
	current_year = datetime.now().year

	# Common medical journal sources
	journal_sources = {
	'infectious_disease': ['New England Journal of Medicine', 'The Lancet Infectious Diseases',
	'Clinical Infectious Diseases', 'Journal of Antimicrobial Chemotherapy'],
	'cardiology': ['New England Journal of Medicine', 'Journal of the American College of Cardiology',
	'Circulation', 'European Heart Journal'],
	'endocrinology': ['Diabetes Care', 'The Lancet Diabetes & Endocrinology',
	'Journal of Clinical Endocrinology & Metabolism'],
	'neurology': ['Neurology', 'The Lancet Neurology', 'JAMA Neurology', 'Brain'],
	'oncology': ['Journal of Clinical Oncology', 'The Lancet Oncology', 'JAMA Oncology',
	'Annals of Oncology'],
	'internal_medicine': ['New England Journal of Medicine', 'The Lancet', 'JAMA',
	'Annals of Internal Medicine']
	}

	sources = journal_sources.get(domain, ['PubMed', 'Medical Research Database'])

	for i in range(min(count, 5)): # Limit demo papers
	# Generate title based on query
	query_terms = [word for word in query.lower().split() if len(word) > 4]
	if query_terms:
	base_term = random.choice(query_terms).title()
	title = f"Recent Advances in {base_term}: A {random.choice(['Systematic Review', 'Meta-analysis', 'Clinical Trial'])}"
	else:
	title = f"Current Research in {domain.replace('_', ' ').title()}"

	# Generate abstract
	abstract = f"This study examines {query.lower()}. Results demonstrate significant findings relevant to clinical practice. Further research is warranted to confirm these observations."

	# Generate authors
	first_names = ['James', 'Mary', 'Robert', 'Patricia', 'John', 'Jennifer']
	last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia']
	authors = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(random.randint(2, 5))]

	# Publication date
	year_offset = i % 4
	pub_year = current_year - year_offset
	month = random.randint(1, 12)
	pub_date = f"{pub_year}-{month:02d}"

	paper = {
	'id': f"demo_{domain}_{i}_{int(time.time())}",
	'title': title,
	'abstract': abstract,
	'source': random.choice(sources),
	'publication_date': pub_date,
	'authors': authors,
	'citations': random.randint(0, 50),
	'is_demo': True,
	'is_preprint': random.random() > 0.7,
	'journal': random.choice(sources)
	}

	papers.append(paper)

	return papers

	def _generate_role_based_analysis(self, query: str, domain: str, role: str,
	papers: List[Dict], guideline_info: Dict = None,
	custom_role_prompt: str = None) -> str:
	"""Generate role-based analysis using LLM if available"""

	if not self.llm:
	return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)

	# Create role-based prompt
	prompt = self.role_reasoning.create_role_prompt(
	query, domain, role, len(papers), guideline_info
	)

	# Add paper information for research context
	if papers:
	paper_info = "\n".join([
	f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})"
	for i, p in enumerate(papers[:3])
	])
	prompt += f"\n\nRelevant Sources:\n{paper_info}"

	# Add demo paper disclaimer if any demo papers
	demo_count = sum(1 for p in papers if p.get('is_demo', False))
	if demo_count > 0:
	prompt += f"\n\nNote: {demo_count} illustrative examples included for context."

	try:
	# Use custom role prompt if provided, otherwise use default
	system_message = custom_role_prompt if custom_role_prompt else f"You are assisting a {role}. Provide helpful, accurate information."

	analysis = self.llm.generate(
	prompt,
	system_message=system_message,
	max_tokens=2000
	)
	return analysis
	except Exception as e:
	print(f"⚠️ LLM role-based analysis failed: {e}")
	return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)

	def _create_fallback_role_analysis(self, query: str, domain: str, role: str,
	papers: List[Dict], guideline_info: Dict = None) -> str:
	"""Create fallback analysis when LLM is unavailable"""
	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	if CONFIG_AVAILABLE:
	try:
	domain_name = get_domain_display_name(domain)
	except:
	domain_name = domain.replace('_', ' ').title()
	else:
	domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())

	# Count real vs demo papers
	real_papers = [p for p in papers if not p.get('is_demo', False)]
	demo_papers = [p for p in papers if p.get('is_demo', False)]

	analysis = f"""{role_info['name']}-Focused Analysis
	Query: {query}
	Domain Context: {domain_name}
	Role Perspective: {role_info['name']}"""

	# Add guideline information
	if guideline_info:
	if guideline_info.get('guidelines_found'):
	analysis += f"\nGuidelines Referenced: {', '.join(guideline_info['guidelines_found'])}"
	if guideline_info.get('critical_missing'):
	analysis += f"\nGuideline Gaps: Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"

	analysis += f"""
	Key Information for {role_info['name']}:
	Based on analysis of {len(papers)} relevant sources ({len(real_papers)} real, {len(demo_papers)} illustrative):

	1. {role_info['name']}-Relevant Insights:
	- Information tailored to {role_info['name'].lower()} needs and perspective
	- Practical implications for {role_info['name'].lower()} context
	- Actionable takeaways appropriate for this role

	2. Domain Context:
	- Considerations specific to {domain_name}
	- Relevant standards and approaches in this field
	- Important context for application

	3. Evidence Considerations:
	- {len(papers)} sources analyzed
	- Quality and relevance assessed for {role_info['name'].lower()} needs
	- {"Guideline awareness as noted above" if guideline_info else "Standard evidence considerations"}

	Recommendations for {role_info['name']}:
	- Apply information within {role_info['name'].lower()} role context
	- Consider individual circumstances and specific needs
	- {"Consult referenced guidelines as appropriate" if guideline_info and guideline_info.get('guidelines_found') else "Reference standard practices"}
	- Seek additional information for specific cases
	- Integrate with professional judgment and experience

	Note: This analysis is tailored for {role_info['name'].lower()} perspective. For other perspectives, different considerations may apply."""

	if demo_papers:
	analysis += f"\n\nIncludes {len(demo_papers)} illustrative examples for comprehensive analysis."

	return analysis

	def _generate_role_bottom_line(self, query: str, domain: str, role: str,
	papers_count: int, real_papers_count: int,
	guideline_info: Dict = None) -> str:
	"""Generate role-appropriate bottom line"""
	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	if CONFIG_AVAILABLE:
	try:
	domain_name = get_domain_display_name(domain)
	except:
	domain_name = domain.replace('_', ' ').title()
	else:
	domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())

	bottom_line = f"""Bottom Line for {role_info['name']}:
	Based on {papers_count} sources in {domain_name} ({real_papers_count} real sources), here are the key takeaways for {role_info['name'].lower()} perspective."""

	# Add guideline-specific bottom line
	if guideline_info:
	if guideline_info.get('guidelines_found'):
	bottom_line += f"\n\nGuideline Context: {len(guideline_info['guidelines_found'])} major guidelines referenced."

	if guideline_info.get('critical_missing'):
	missing_list = ', '.join(guideline_info['critical_missing'][:2])
	bottom_line += f"\nConsider: Missing explicit guideline citations for {missing_list}."

	coverage = guideline_info.get('coverage_percentage', 0)
	if coverage < 50:
	bottom_line += f"\nEvidence Note: Guideline coverage is limited."

	bottom_line += f"""
	{role_info['name']}-Specific Considerations:
	- Information tailored to {role_info['name'].lower()} role and needs
	- Practical application within {role_info['name'].lower()} context
	- Integration with {role_info['name'].lower()} knowledge and experience
	- {"Guideline-aware decision making" if guideline_info else "Evidence-informed approach"}
	- Consideration of specific circumstances and constraints"""

	if papers_count > real_papers_count:
	bottom_line += f"\n\nNote: Includes {papers_count - real_papers_count} illustrative examples for context."

	return bottom_line

	def _synthesize_role_answer(self, query: str, domain: str, role: str,
	analysis: str, papers: List[Dict],
	bottom_line: str, confidence: Dict[str, Any],
	guideline_info: Dict = None) -> Dict[str, Any]:
	"""Synthesize final answer with role information"""

	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	if CONFIG_AVAILABLE:
	try:
	domain_info = {
	'name': get_domain_display_name(domain),
	'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️'),
	'description': get_domain_description(domain)
	}
	except:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️',
	'description': f'Research in {domain.replace("_", " ")}'
	})
	else:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️',
	'description': f'Research in {domain.replace("_", " ")}'
	})

	# Count real vs demo papers
	real_papers = [p for p in papers if not p.get('is_demo', False)]
	demo_papers = [p for p in papers if p.get('is_demo', False)]

	# Format paper citations
	paper_citations = []
	for i, paper in enumerate(papers[:5], 1):
	title = paper.get('title', 'Untitled')
	authors = paper.get('authors', [])
	year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
	source = paper.get('source', 'Unknown')
	journal = paper.get('journal', '')
	is_demo = paper.get('is_demo', False)
	is_preprint = paper.get('is_preprint', False)

	# Format authors
	if authors and isinstance(authors, list) and len(authors) > 0:
	if len(authors) == 1:
	author_str = authors[0]
	elif len(authors) == 2:
	author_str = f"{authors[0]} and {authors[1]}"
	else:
	author_str = f"{authors[0]} et al."
	else:
	author_str = "Authors not specified"

	# Build citation
	citation = f"{i}. {title}"

	demo_indicator = "📄 " if is_demo else ""
	preprint_indicator = "⚡ " if is_preprint else ""

	if author_str and year:
	citation += f"\n {demo_indicator}{preprint_indicator}{author_str} ({year})"
	elif author_str:
	citation += f"\n {demo_indicator}{preprint_indicator}{author_str}"
	else:
	citation += f"\n {demo_indicator}{preprint_indicator}Unknown authors"

	if journal:
	citation += f"\n Journal: {journal}"
	elif source and source != 'unknown':
	citation += f"\n Source: {source}"

	paper_citations.append(citation)

	# Build guideline summary section
	guideline_summary = ""
	if guideline_info:
	guideline_summary = "## 📋 Guideline Assessment\n\n"

	if guideline_info.get('guidelines_found'):
	guideline_summary += f"✅ Guidelines Referenced: {', '.join(guideline_info['guidelines_found'])}\n\n"

	if guideline_info.get('critical_missing'):
	missing_list = ', '.join(guideline_info['critical_missing'])
	guideline_summary += f"⚠️ Missing Guideline Citations: {missing_list}\n\n"

	guideline_summary += f"Coverage Score: {guideline_info.get('coverage_percentage', 0)}%\n\n"

	# Build answer
	answer = f"""# 🔬 {role_info['name']}-Focused Analysis
	Role: {role_info['name']} {role_info['icon']}
	Domain: {domain_info['name']} {domain_info.get('icon', '')}
	Evidence Confidence: {confidence['level']} ({confidence['overall_score']}/100)
	Sources Analyzed: {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)
	---
	## 📋 Executive Summary
	{bottom_line}
	---
	{guideline_summary}## 🔍 Detailed Analysis
	{analysis}
	---
	## 📊 Supporting Evidence
	{chr(10).join(paper_citations) if paper_citations else "No papers cited for this simple query"}
	---
	## 🎯 Key Takeaways for {role_info['name']}
	1. Role-appropriate information and insights
	2. Domain-specific considerations for {domain_info['name'].lower()}
	3. Practical implications tailored to {role_info['name'].lower()} needs
	4. {"Guideline-aware recommendations" if guideline_info else "Evidence-informed approach"}
	Analysis performed with {role_info['name'].lower()}-focused reasoning
	Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}"""

	return {
	"query": query,
	"domain": domain,
	"domain_info": domain_info,
	"user_context": role,
	"user_context_info": role_info,
	"answer": answer,
	"analysis": analysis,
	"bottom_line": bottom_line,
	"papers_used": len(papers),
	"real_papers_used": len(real_papers),
	"demo_papers_used": len(demo_papers),
	"confidence_score": confidence,
	"guideline_info": guideline_info,
	"reasoning_method": "role_based",
	"real_time_search": self.use_real_time,
	"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
	"metrics": {
	'response_time': time.time(),
	'papers_analyzed': len(papers),
	'domain': domain,
	'user_context': role
	}
	}

	def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
	role: str, papers: List[Dict], guideline_info: Dict = None):
	"""Update conversation memory with role info"""
	if not self.memory:
	return

	memory_data = {
	'query': query,
	'domain': domain,
	'role': role,
	'papers_used': len(papers),
	'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
	'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
	'confidence_score': response.get('confidence_score', {}).get('overall_score', 0),
	'timestamp': datetime.now().isoformat()
	}

	# Add guideline info if available
	if guideline_info:
	memory_data['guidelines_found'] = guideline_info.get('guidelines_found', [])
	memory_data['critical_missing'] = guideline_info.get('critical_missing', [])
	memory_data['guideline_coverage'] = guideline_info.get('coverage_percentage', 0)

	self.memory.add_interaction(
	user_message=query,
	ai_response=response.get('answer', '')[:1000],
	metadata=memory_data
	)

	def _create_no_results_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
	"""Create response when no papers are found"""
	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	if CONFIG_AVAILABLE:
	try:
	domain_info = {
	'name': get_domain_display_name(domain),
	'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
	}
	except:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})
	else:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})

	answer = f"""# 🔍 Limited Research Found

	Role: {role_info['name']} {role_info['icon']}
	Domain: {domain_info['name']}

	Query: {query}

	Suggestions for {role_info['name']}:
	1. Try broadening your search terms
	2. Consider related topics in {domain_info['name']}
	3. Check spelling of technical terms
	4. Try a more general domain selection

	For Role-Appropriate Information:
	- Ask more general questions about the topic
	- Request explanations of concepts
	- Inquire about standard approaches or practices
	- Seek practical guidance rather than specific research

	Example {role_info['name'].lower()}-appropriate queries:
	- "Basic explanation of [topic] for {role_info['name'].lower()}"
	- "Standard approaches to [issue]"
	- "Practical guidance for [situation]"
	- "Key concepts about [subject]"

	Note: Some specialized topics may have limited published research. I can still provide general information and guidance tailored to your role."""

	return {
	"query": query,
	"domain": domain,
	"domain_info": domain_info,
	"user_context": role,
	"user_context_info": role_info,
	"answer": answer,
	"papers_used": 0,
	"real_papers_used": 0,
	"demo_papers_used": 0,
	"confidence_score": {
	'overall_score': 0,
	'level': 'VERY LOW ⚫',
	'explanation': 'No supporting evidence found'
	},
	"error": "no_results",
	"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
	}

	def _create_error_response(self, query: str, domain: str, role: str, error: str) -> Dict[str, Any]:
	"""Create error response"""
	role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])

	if CONFIG_AVAILABLE:
	try:
	domain_info = {
	'name': get_domain_display_name(domain),
	'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️')
	}
	except:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})
	else:
	domain_info = DOMAIN_INFO.get(domain, {
	'name': domain.replace('_', ' ').title(),
	'icon': '⚕️'
	})

	answer = f"""# 🚨 Analysis Error

	Role: {role_info['name']} {role_info['icon']}
	Domain: {domain_info['name']}

	Query: {query}
	Error: {error}

	Troubleshooting for {role_info['name']}:
	1. Check your internet connection
	2. Try a simpler query or rephrase
	3. Verify domain selection is appropriate
	4. Contact support if problem persists

	For Role-Appropriate Alternatives:
	- Ask a simpler version of your question
	- Request general information instead of specific research
	- Try breaking complex questions into smaller parts
	- Use more common terminology

	Please try again or reformulate your question for {role_info['name'].lower()}-appropriate assistance."""

	return {
	"query": query,
	"domain": domain,
	"domain_info": domain_info,
	"user_context": role,
	"user_context_info": role_info,
	"answer": answer,
	"papers_used": 0,
	"real_papers_used": 0,
	"demo_papers_used": 0,
	"confidence_score": {
	'overall_score': 0,
	'level': 'ERROR 🔴',
	'explanation': f'Analysis failed: {error}'
	},
	"error": error,
	"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
	}

	def summarize_single_paper(self,
	paper_id: str = None,
	paper_title: str = None,
	paper_data: Dict = None,
	user_query: str = None,
	domain: str = "general_medical") -> Dict[str, Any]:
	"""Summarize a single research paper with guideline detection"""

	print(f"\n📄 Summarizing paper: {paper_title or paper_id}")

	try:
	# Use single paper summarizer if available
	if self.single_paper_summarizer and (paper_title or paper_data):
	paper = paper_data or {'title': paper_title or 'Unknown'}

	summary_result = self.single_paper_summarizer.summarize_paper(
	paper, user_query
	)

	if summary_result.get("success"):
	# Add guideline detection for single paper
	if paper.get('abstract'):
	guideline_info = self.guideline_detector.detect_guidelines([paper], domain, user_query or "")
	if guideline_info.get('guidelines_found'):
	summary_result[
	'guideline_context'] = f"References {', '.join(guideline_info['guidelines_found'])} guidelines"

	return summary_result

	# Fallback summary
	return self._create_fallback_summary(paper_title, domain, user_query)

	except Exception as e:
	print(f"❌ Paper summarization failed: {e}")
	return {
	"success": False,
	"error": str(e),
	"paper_title": paper_title,
	"summary": f"Unable to generate summary. Error: {e}"
	}

	def _create_fallback_summary(self, paper_title: str, domain: str, user_query: str) -> Dict[str, Any]:
	"""Create fallback paper summary"""
	if CONFIG_AVAILABLE:
	try:
	domain_name = get_domain_display_name(domain)
	except:
	domain_name = domain.replace('_', ' ').title()
	else:
	domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())

	summary = f"""Paper Summary: {paper_title}
	Domain Context: {domain_name}
	User Query: {user_query or 'General summary requested'}
	Key Points:
	1. This paper contributes to the {domain_name} literature
	2. Study design and methodology align with field standards
	3. Findings have implications for clinical practice and research
	4. Limitations and future directions are discussed
	Guideline Considerations:
	- Review paper for explicit guideline citations
	- Consider alignment with major {domain_name} guidelines
	- Consult specific guidelines for clinical application
	Recommendations:
	- Review full text for detailed methodology
	- Consider findings in context of broader literature
	- Consult with domain experts for application
	- Reference established clinical guidelines
	Note: This is a general summary. Full paper review is recommended for detailed analysis."""

	return {
	"success": True,
	"paper_title": paper_title,
	"summary": summary,
	"quick_summary": f"Summary of {paper_title} in {domain_name}",
	"domain": domain,
	"confidence": 0.6
	}

	def get_engine_status(self) -> Dict[str, Any]:
	"""Get engine status with role metrics"""
	# Calculate average guideline coverage
	avg_guideline_coverage = 0
	if self.metrics['guideline_coverage']:
	avg_guideline_coverage = sum(g['coverage'] for g in self.metrics['guideline_coverage']) / len(
	self.metrics['guideline_coverage'])

	return {
	"engine_name": "Medical Research RAG Engine",
	"version": "2.2.0",
	"model": self.model if hasattr(self, 'model') else "Unknown",
	"features": ["role_based_reasoning", "real_paper_fetching",
	"confidence_scoring", "guideline_detection", "simple_query_handling"],
	"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
	"real_time_search": self.use_real_time,
	"roles_supported": list(RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.keys()),
	"guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
	"metrics": {
	"total_queries": self.metrics['total_queries'],
	"average_confidence": round(self.metrics['average_confidence'], 1),
	"average_guideline_coverage": round(avg_guideline_coverage, 1),
	"domains_used": dict(self.metrics['domains_used']),
	"user_contexts": dict(self.metrics['user_contexts']),
	"real_papers_fetched": self.metrics['real_papers_fetched'],
	"demo_papers_used": self.metrics['demo_papers_used']
	},
	"domains_supported": len(DOMAIN_INFO),
	"simple_query_handling": "ENABLED"
	}

	def clear_memory(self):
	"""Clear conversation memory"""
	if self.memory:
	self.memory.clear_memory()
	print("🧹 Engine memory cleared")
	else:
	print("⚠️ Memory system not available")


	# ============================================================================
	# TEST FUNCTION
	# ============================================================================

	def test_role_based_rag_engine():
	"""Test the medical RAG engine with role-based responses"""
	print("\n" + "=" * 60)
	print("🧪 TESTING ROLE-BASED RAG ENGINE")
	print("=" * 60)

	try:
	# Initialize engine
	engine = EnhancedRAGEngine(
	session_id="role_test",
	model="gpt-oss-120b",
	use_real_time=False # Disable real-time for faster testing
	)

	# Test queries with different roles
	test_cases = [
	{
	"query": "hi",
	"domain": "general_medical",
	"role": "patient"
	},
	{
	"query": "hello",
	"domain": "cardiology",
	"role": "doctor"
	},
	{
	"query": "hey",
	"domain": "endocrinology",
	"role": "student"
	},
	{
	"query": "Compare first-line antibiotics for community-acquired pneumonia",
	"domain": "infectious_disease",
	"role": "clinician"
	}
	]

	for i, test_case in enumerate(test_cases, 1):
	print(f"\n📝 Test Case {i}:")
	print(f" Query: '{test_case['query']}'")
	print(f" Domain: {test_case['domain']}")
	print(f" Role: {test_case['role']}")

	# Process query
	response = engine.answer_research_question(
	query=test_case['query'],
	domain=test_case['domain'],
	max_papers=5,
	role=test_case['role'],
	use_fallback=True
	)

	if response and 'error' not in response:
	print(f"\n✅ Test Successful!")
	print(f" Response type: {response.get('reasoning_method', 'unknown')}")
	print(f" Papers used: {response.get('papers_used', 0)}")
	print(f" Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")

	# Check if it's a simple response
	if response.get('reasoning_method') in ['greeting', 'simple_response', 'direct_response']:
	print(f" ⭐ Simple query handled appropriately!")

	# Show engine status
	status = engine.get_engine_status()
	print(f"\n🔧 Engine Status:")
	print(f" Role-based responses: ENABLED")
	print(f" Simple query handling: ENABLED")
	print(f" Roles supported: {len(status['roles_supported'])}")
	print(f" Total queries: {status['metrics']['total_queries']}")

	return True

	except Exception as e:
	print(f"\n❌ Test failed with exception: {e}")
	import traceback
	traceback.print_exc()
	return False


	if __name__ == "__main__":
	# Run test
	test_result = test_role_based_rag_engine()

	if test_result:
	print(f"\n{'=' * 60}")
	print("🎉 ROLE-BASED RAG ENGINE TEST COMPLETE!")
	print(" Role-based reasoning: ✓")
	print(" Simple query handling: ✓")
	print(" Domain-agnostic approach: ✓")
	print(" Guideline detection: ✓")
	print(f"{'=' * 60}")
	else:
	print("\n❌ Engine test failed")