Spaces:
Running
Running
| """ | |
| rag_engine.py - Production-Ready Medical RAG Engine | |
| Updated with role-based response handling and improved simple query detection | |
| """ | |
| from typing import List, Dict, Any, Optional, Tuple | |
| import re | |
| import json | |
| import time | |
| import random | |
| from datetime import datetime | |
| from collections import Counter | |
| import statistics | |
| # Medical paper templates for different domains | |
| PAPER_TEMPLATES = { | |
| 'infectious_disease': [ | |
| "Comparison of {drug1} vs {drug2} for {condition}: A randomized controlled trial", | |
| "Clinical outcomes of {treatment} in patients with {condition}: A multicenter study", | |
| "Risk factors and management of {condition} in the ICU setting", | |
| "Antimicrobial resistance patterns in {condition}: A retrospective analysis", | |
| "Efficacy and safety of {treatment} for {condition}: A systematic review and meta-analysis", | |
| "Diagnostic approaches for {condition}: Current evidence and guidelines", | |
| "Prevention strategies for {condition} in hospitalized patients", | |
| "Economic impact of {treatment} for {condition} in diverse healthcare settings" | |
| ], | |
| 'cardiology': [ | |
| "Long-term outcomes of {intervention} in {condition}: The {acronym} trial", | |
| "Novel biomarkers for predicting {outcome} in {condition}", | |
| "Comparison of invasive vs conservative strategies for {condition}", | |
| "Role of {medication} in secondary prevention of {condition}", | |
| "Advances in imaging techniques for {condition} diagnosis", | |
| "Genetic predictors of treatment response in {condition}", | |
| "Quality of life outcomes following {procedure} for {condition}" | |
| ], | |
| 'endocrinology': [ | |
| "Real-world effectiveness of {drug_class} in {condition} management", | |
| "Impact of {lifestyle_intervention} on glycemic control in {condition}", | |
| "Novel insulin delivery systems for {condition}: Patient-reported outcomes", | |
| "Thyroid dysfunction in patients with {comorbidity}: Screening and management", | |
| "Bone health in patients receiving {treatment} for {condition}", | |
| "Hormonal therapies for {condition}: Comparative effectiveness analysis" | |
| ], | |
| 'neurology': [ | |
| "Early diagnosis and intervention in {condition}: Impact on long-term outcomes", | |
| "Neuroimaging biomarkers for {condition} progression", | |
| "Novel therapeutic targets in {condition} pathogenesis", | |
| "Cognitive rehabilitation strategies for {condition}: A randomized trial", | |
| "Genetic and environmental risk factors for {condition}", | |
| "Quality of life measures in {condition} clinical trials" | |
| ], | |
| 'oncology': [ | |
| "Biomarker-driven therapy for {cancer_type}: Current status and future directions", | |
| "Immunotherapy combinations in {cancer_type}: Efficacy and toxicity profiles", | |
| "Liquid biopsy applications in {cancer_type} management", | |
| "Supportive care interventions for {cancer_type} treatment side effects", | |
| "Cost-effectiveness of targeted therapies in {cancer_type}", | |
| "Survivorship issues in {cancer_type}: Long-term follow-up data" | |
| ], | |
| 'internal_medicine': [ | |
| "Management of {condition} in elderly patients with multiple comorbidities", | |
| "Diagnostic uncertainty in {condition}: A clinical decision-making framework", | |
| "Transition of care for patients with {condition}: Best practices", | |
| "Polypharmacy management in patients with {condition}", | |
| "Telemedicine applications for {condition} follow-up", | |
| "Patient education strategies for {condition} self-management" | |
| ] | |
| } | |
| # Medical terms for paper generation (now used only as fallback) | |
| MEDICAL_TERMS = { | |
| 'drugs': ['amoxicillin-clavulanate', 'azithromycin', 'ceftriaxone', 'doxycycline', 'levofloxacin', | |
| 'meropenem', 'vancomycin', 'piperacillin-tazobactam', 'linezolid', 'metronidazole'], | |
| 'conditions': ['community-acquired pneumonia', 'hospital-acquired pneumonia', 'sepsis', 'urinary tract infection', | |
| 'skin and soft tissue infection', 'intra-abdominal infection', 'meningitis', 'endocarditis'], | |
| 'treatments': ['antibiotic therapy', 'source control', 'resuscitation', 'ventilator management', | |
| 'infection prevention', 'antimicrobial stewardship'], | |
| 'outcomes': ['clinical cure', 'mortality', 'length of stay', 'readmission', 'antibiotic resistance', | |
| 'adverse events', 'cost-effectiveness', 'quality of life'] | |
| } | |
| # ============================================================================ | |
| # GUIDELINE DETECTION SYSTEM | |
| # ============================================================================ | |
| class GuidelineDetector: | |
| """Detect explicit guideline citations in medical papers""" | |
| # Comprehensive guideline databases by domain | |
| GUIDELINE_DATABASES = { | |
| # Diabetes/Endocrinology | |
| 'endocrinology': { | |
| 'IDF': ['IDF', 'International Diabetes Federation', 'International Diabetes Federation guidelines'], | |
| 'ADA': ['ADA', 'American Diabetes Association', 'American Diabetes Association guidelines', | |
| 'ADA/EASD', 'ADA Standards of Care'], | |
| 'EASD': ['EASD', 'European Association for the Study of Diabetes'], | |
| 'AACE': ['AACE', 'American Association of Clinical Endocrinologists'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence', 'NICE guidelines'], | |
| 'WHO': ['WHO', 'World Health Organization', 'WHO guidelines for diabetes'], | |
| 'ATP III': ['ATP III', 'Adult Treatment Panel III', 'NCEP ATP III'], | |
| 'KDIGO': ['KDIGO', 'Kidney Disease Improving Global Outcomes'], | |
| 'ESC': ['ESC', 'European Society of Cardiology', 'ESC/EASD'], | |
| 'AHA': ['AHA', 'American Heart Association'] | |
| }, | |
| # Cardiology | |
| 'cardiology': { | |
| 'ACC/AHA': ['ACC/AHA', 'American College of Cardiology/American Heart Association', | |
| 'ACC/AHA guidelines', 'AHA/ACC'], | |
| 'ESC': ['ESC', 'European Society of Cardiology', 'ESC guidelines'], | |
| 'AHA': ['AHA', 'American Heart Association', 'AHA guidelines'], | |
| 'ACC': ['ACC', 'American College of Cardiology'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'], | |
| 'WHO': ['WHO', 'World Health Organization'], | |
| 'ATP III': ['ATP III', 'Adult Treatment Panel III', 'NCEP ATP III'], | |
| 'JNC': ['JNC', 'Joint National Committee', 'JNC 8', 'JNC 7'], | |
| 'CHEP': ['CHEP', 'Canadian Hypertension Education Program'], | |
| 'CCS': ['CCS', 'Canadian Cardiovascular Society'] | |
| }, | |
| # Infectious Diseases | |
| 'infectious_disease': { | |
| 'IDSA': ['IDSA', 'Infectious Diseases Society of America', 'IDSA guidelines'], | |
| 'ATS': ['ATS', 'American Thoracic Society', 'ATS/IDSA'], | |
| 'CDC': ['CDC', 'Centers for Disease Control and Prevention', 'CDC guidelines'], | |
| 'WHO': ['WHO', 'World Health Organization', 'WHO guidelines'], | |
| 'ECDC': ['ECDC', 'European Centre for Disease Prevention and Control'], | |
| 'SHEA': ['SHEA', 'Society for Healthcare Epidemiology of America'], | |
| 'ESCMID': ['ESCMID', 'European Society of Clinical Microbiology and Infectious Diseases'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'] | |
| }, | |
| # Oncology | |
| 'oncology': { | |
| 'NCCN': ['NCCN', 'National Comprehensive Cancer Network', 'NCCN guidelines'], | |
| 'ASCO': ['ASCO', 'American Society of Clinical Oncology', 'ASCO guidelines'], | |
| 'ESMO': ['ESMO', 'European Society for Medical Oncology', 'ESMO guidelines'], | |
| 'AJCC': ['AJCC', 'American Joint Committee on Cancer'], | |
| 'WHO': ['WHO', 'World Health Organization'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'] | |
| }, | |
| # Neurology | |
| 'neurology': { | |
| 'AAN': ['AAN', 'American Academy of Neurology', 'AAN guidelines'], | |
| 'EFNS': ['EFNS', 'European Federation of Neurological Societies'], | |
| 'EAN': ['EAN', 'European Academy of Neurology'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'], | |
| 'WHO': ['WHO', 'World Health Organization'] | |
| }, | |
| # Internal Medicine (General) | |
| 'internal_medicine': { | |
| 'ACP': ['ACP', 'American College of Physicians', 'ACP guidelines'], | |
| 'ACC/AHA': ['ACC/AHA', 'American College of Cardiology/American Heart Association'], | |
| 'IDSA': ['IDSA', 'Infectious Diseases Society of America'], | |
| 'ATS': ['ATS', 'American Thoracic Society'], | |
| 'ADA': ['ADA', 'American Diabetes Association'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'], | |
| 'WHO': ['WHO', 'World Health Organization'], | |
| 'USPSTF': ['USPSTF', 'U.S. Preventive Services Task Force'] | |
| }, | |
| # Pulmonology | |
| 'pulmonology': { | |
| 'ATS': ['ATS', 'American Thoracic Society', 'ATS guidelines'], | |
| 'ERS': ['ERS', 'European Respiratory Society'], | |
| 'GOLD': ['GOLD', 'Global Initiative for Chronic Obstructive Lung Disease'], | |
| 'GINA': ['GINA', 'Global Initiative for Asthma'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'] | |
| }, | |
| # Gastroenterology | |
| 'gastroenterology': { | |
| 'AGA': ['AGA', 'American Gastroenterological Association', 'AGA guidelines'], | |
| 'ACG': ['ACG', 'American College of Gastroenterology'], | |
| 'UEG': ['UEG', 'United European Gastroenterology'], | |
| 'ESGE': ['ESGE', 'European Society of Gastrointestinal Endoscopy'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'] | |
| }, | |
| # Nephrology | |
| 'nephrology': { | |
| 'KDIGO': ['KDIGO', 'Kidney Disease Improving Global Outcomes', 'KDIGO guidelines'], | |
| 'NKF': ['NKF', 'National Kidney Foundation', 'NKF/KDOQI'], | |
| 'KDOQI': ['KDOQI', 'Kidney Disease Outcomes Quality Initiative'], | |
| 'ERA': ['ERA', 'European Renal Association'], | |
| 'NICE': ['NICE', 'National Institute for Health and Care Excellence'] | |
| }, | |
| # Hematology | |
| 'hematology': { | |
| 'ASH': ['ASH', 'American Society of Hematology', 'ASH guidelines'], | |
| 'ESMO': ['ESMO', 'European Society for Medical Oncology'], | |
| 'NCCN': ['NCCN', 'National Comprehensive Cancer Network'], | |
| 'WHO': ['WHO', 'World Health Organization'] | |
| } | |
| } | |
| # Domain-specific critical guidelines that should be mentioned | |
| CRITICAL_GUIDELINES = { | |
| 'endocrinology': ['ADA', 'IDF', 'EASD', 'AACE', 'NICE'], | |
| 'cardiology': ['ACC/AHA', 'ESC', 'AHA', 'NICE'], | |
| 'infectious_disease': ['IDSA', 'ATS', 'CDC', 'WHO'], | |
| 'oncology': ['NCCN', 'ASCO', 'ESMO'], | |
| 'diabetes': ['ADA', 'IDF', 'EASD', 'AACE'], | |
| 'hypertension': ['ACC/AHA', 'ESC', 'JNC', 'NICE'], | |
| 'hyperlipidemia': ['ACC/AHA', 'ESC', 'NICE', 'ATP III'], | |
| 'heart_failure': ['ACC/AHA', 'ESC', 'NICE'], | |
| 'pneumonia': ['IDSA', 'ATS', 'CDC'], | |
| 'sepsis': ['SSC', 'IDSA', 'WHO'], | |
| 'COPD': ['GOLD', 'ATS', 'ERS', 'NICE'], | |
| 'asthma': ['GINA', 'ATS', 'ERS', 'NICE'] | |
| } | |
| def detect_guidelines(papers: List[Dict], domain: str, query: str) -> Dict[str, Any]: | |
| """Detect guideline citations in papers and identify missing critical ones""" | |
| if not papers: | |
| return { | |
| 'guidelines_found': [], | |
| 'critical_missing': [], | |
| 'guideline_coverage': 'NO_PAPERS', | |
| 'recommendation': 'No papers available for guideline analysis' | |
| } | |
| # Get relevant guideline database for domain | |
| domain_guidelines = GuidelineDetector.GUIDELINE_DATABASES.get(domain, {}) | |
| # Also check related domains | |
| all_guidelines = {} | |
| if domain in GuidelineDetector.GUIDELINE_DATABASES: | |
| all_guidelines.update(GuidelineDetector.GUIDELINE_DATABASES[domain]) | |
| # Check for condition-specific guidelines | |
| query_lower = query.lower() | |
| condition_guidelines = [] | |
| if 'diabetes' in query_lower or 'glycemic' in query_lower: | |
| condition_guidelines.extend(['ADA', 'IDF', 'EASD', 'AACE']) | |
| if 'hypertension' in query_lower or 'blood pressure' in query_lower: | |
| condition_guidelines.extend(['ACC/AHA', 'ESC', 'JNC', 'NICE']) | |
| if 'hyperlipidemia' in query_lower or 'cholesterol' in query_lower or 'lipid' in query_lower: | |
| condition_guidelines.extend(['ACC/AHA', 'ESC', 'ATP III', 'NICE']) | |
| if 'heart failure' in query_lower: | |
| condition_guidelines.extend(['ACC/AHA', 'ESC', 'NICE']) | |
| if 'pneumonia' in query_lower: | |
| condition_guidelines.extend(['IDSA', 'ATS', 'CDC']) | |
| if 'sepsis' in query_lower: | |
| condition_guidelines.extend(['SSC', 'IDSA', 'WHO']) | |
| if 'copd' in query_lower or 'chronic obstructive' in query_lower: | |
| condition_guidelines.extend(['GOLD', 'ATS', 'ERS']) | |
| if 'asthma' in query_lower: | |
| condition_guidelines.extend(['GINA', 'ATS', 'ERS']) | |
| # Merge domain and condition guidelines | |
| guidelines_to_check = {} | |
| for guideline in set(condition_guidelines): | |
| for key, value in all_guidelines.items(): | |
| if guideline == key or guideline in key: | |
| guidelines_to_check[key] = value | |
| # If no specific condition guidelines, use domain guidelines | |
| if not guidelines_to_check and domain_guidelines: | |
| guidelines_to_check = domain_guidelines | |
| # Scan papers for guideline mentions | |
| found_guidelines = set() | |
| papers_with_guidelines = [] | |
| for paper in papers: | |
| text = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower() | |
| paper_guidelines = [] | |
| for guideline_name, patterns in guidelines_to_check.items(): | |
| for pattern in patterns: | |
| pattern_lower = pattern.lower() | |
| if pattern_lower in text: | |
| found_guidelines.add(guideline_name) | |
| if guideline_name not in paper_guidelines: | |
| paper_guidelines.append(guideline_name) | |
| if paper_guidelines: | |
| papers_with_guidelines.append({ | |
| 'title': paper.get('title', 'Untitled')[:100], | |
| 'guidelines': paper_guidelines | |
| }) | |
| # Determine critical missing guidelines | |
| critical_missing = [] | |
| if condition_guidelines: | |
| # Check condition-specific critical guidelines | |
| for guideline in condition_guidelines: | |
| if guideline not in found_guidelines: | |
| critical_missing.append(guideline) | |
| elif domain_guidelines: | |
| # Check domain-specific critical guidelines | |
| critical_for_domain = GuidelineDetector.CRITICAL_GUIDELINES.get(domain, []) | |
| for guideline in critical_for_domain: | |
| if guideline not in found_guidelines: | |
| critical_missing.append(guideline) | |
| # Calculate coverage score | |
| coverage_percentage = 0 | |
| if guidelines_to_check: | |
| coverage_percentage = (len(found_guidelines) / len(guidelines_to_check)) * 100 | |
| if coverage_percentage >= 75: | |
| coverage = 'HIGH' | |
| elif coverage_percentage >= 50: | |
| coverage = 'MODERATE' | |
| elif coverage_percentage >= 25: | |
| coverage = 'LOW' | |
| else: | |
| coverage = 'VERY_LOW' | |
| else: | |
| coverage = 'UNKNOWN' | |
| # Generate recommendation | |
| if critical_missing: | |
| if len(critical_missing) >= 3: | |
| recommendation = f"Missing explicit guideline citations ({', '.join(critical_missing[:3])}...)" | |
| else: | |
| recommendation = f"Missing explicit guideline citations ({', '.join(critical_missing)})" | |
| elif found_guidelines: | |
| recommendation = f"Guideline coverage: {len(found_guidelines)}/{len(guidelines_to_check) if guidelines_to_check else 'N/A'} major guidelines referenced" | |
| else: | |
| recommendation = "No explicit guideline citations detected" | |
| return { | |
| 'guidelines_found': sorted(list(found_guidelines)), | |
| 'critical_missing': critical_missing, | |
| 'guideline_coverage': coverage, | |
| 'recommendation': recommendation, | |
| 'papers_with_guidelines': papers_with_guidelines[:5], # Top 5 papers with guidelines | |
| 'total_guidelines_checked': len(guidelines_to_check), | |
| 'coverage_percentage': round(coverage_percentage, 1) if guidelines_to_check else 0 | |
| } | |
| # ADD THIS AT THE VERY TOP OF rag_engine.py (before any other imports) | |
| import sys | |
| import os | |
| # Add the project root to Python path | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) | |
| project_root = os.path.dirname(current_dir) # This goes from chat/ to MedSearchPro/ | |
| if project_root not in sys.path: | |
| sys.path.insert(0, project_root) | |
| print(f"✅ Added project root to sys.path: {project_root}") | |
| # Import existing modules | |
| try: | |
| from processing.vector_store import VectorStore | |
| from chat.summarizer import MultiDocumentSummarizer | |
| from chat.single_paper_summarizer import SinglePaperSummarizer | |
| from chat.comparator import CrossPaperComparator | |
| from chat.gap_analyzer import ResearchGapAnalyzer | |
| from lib.memory_manager import ConversationMemory | |
| from llm.llm_provider import XAIGrokProvider, GrokLLM | |
| except ImportError as e: | |
| print(f"⚠️ Some dependencies not found - using simplified mode: {e}") | |
| # Set fallback values for all imported modules | |
| VectorStore = None | |
| MultiDocumentSummarizer = None | |
| SinglePaperSummarizer = None | |
| CrossPaperComparator = None | |
| ResearchGapAnalyzer = None | |
| ConversationMemory = None | |
| GrokLLM = None | |
| # Import config for domain mapping | |
| try: | |
| from config.domains import ( | |
| get_domain_display_name, get_domain_description, | |
| validate_domain, get_all_domains | |
| ) | |
| CONFIG_AVAILABLE = True | |
| except ImportError: | |
| print("⚠️ config.domains not found - using fallback domain info") | |
| CONFIG_AVAILABLE = False | |
| # ============================================================================ | |
| # ROLE-BASED REASONING FOR MEDICAL RESEARCH | |
| # ============================================================================ | |
| class RoleBasedReasoning: | |
| """Role-based reasoning technique focused on domain-agnostic, role-appropriate responses""" | |
| # Role definitions with domain-agnostic prompts | |
| ROLE_SYSTEM_PROMPTS = { | |
| 'patient': { | |
| 'name': 'Patient', | |
| 'icon': '🩺', | |
| 'prompt': '''You are helping a patient understand information. Use simple, clear, reassuring language. | |
| - Focus on practical implications and what they need to know | |
| - Avoid complex terminology or jargon | |
| - Emphasize safety and when to seek professional help | |
| - Be compassionate and supportive | |
| - Do not provide diagnoses or specific medical advice | |
| - Explain concepts in everyday terms''' | |
| }, | |
| 'student': { | |
| 'name': 'Student', | |
| 'icon': '🎓', | |
| 'prompt': '''You are teaching a student. Focus on educational value and understanding. | |
| - Explain foundational concepts and definitions | |
| - Provide examples and analogies | |
| - Encourage critical thinking and questions | |
| - Structure information logically | |
| - Connect to broader knowledge areas | |
| - Mention learning resources when helpful''' | |
| }, | |
| 'clinician': { | |
| 'name': 'Clinician', | |
| 'icon': '👨⚕️', | |
| 'prompt': '''You are assisting a healthcare professional. Be concise, actionable, and evidence-based. | |
| - Focus on practical implications and decision-making | |
| - Reference guidelines and evidence levels when relevant | |
| - Consider workflow and implementation | |
| - Be precise but efficient with time | |
| - Address risks and benefits clearly | |
| - Maintain professional tone''' | |
| }, | |
| 'doctor': { | |
| 'name': 'Doctor', | |
| 'icon': '⚕️', | |
| 'prompt': '''You are assisting a physician. Use appropriate terminology and clinical reasoning. | |
| - Focus on differential diagnosis, treatment options, and management | |
| - Reference current standards of care and guidelines | |
| - Consider patient factors and comorbidities | |
| - Discuss evidence quality and limitations | |
| - Be thorough but organized | |
| - Maintain clinical accuracy''' | |
| }, | |
| 'researcher': { | |
| 'name': 'Researcher', | |
| 'icon': '🔬', | |
| 'prompt': '''You are assisting a research scientist. Focus on methodology and evidence. | |
| - Discuss study designs, methods, and limitations | |
| - Analyze evidence quality and gaps | |
| - Consider statistical significance and clinical relevance | |
| - Reference current literature and trends | |
| - Discuss implications for future research | |
| - Maintain scientific rigor''' | |
| }, | |
| 'professor': { | |
| 'name': 'Professor', | |
| 'icon': '📚', | |
| 'prompt': '''You are assisting an academic educator. Focus on knowledge synthesis and pedagogy. | |
| - Provide comprehensive overviews with context | |
| - Compare theories, methods, and findings | |
| - Discuss historical development and future directions | |
| - Emphasize critical evaluation and synthesis | |
| - Connect to broader academic discourse | |
| - Support teaching and learning objectives''' | |
| }, | |
| 'pharmacist': { | |
| 'name': 'Pharmacist', | |
| 'icon': '💊', | |
| 'prompt': '''You are assisting a pharmacy professional. Focus on medications and safety. | |
| - Discuss drug mechanisms, interactions, and pharmacokinetics | |
| - Emphasize safety profiles and monitoring | |
| - Consider dosing, administration, and compliance | |
| - Address patient counseling points | |
| - Reference formularies and guidelines | |
| - Maintain focus on medication optimization''' | |
| }, | |
| 'general': { | |
| 'name': 'General User', | |
| 'icon': '👤', | |
| 'prompt': '''You are assisting a general user. Provide balanced, accessible information. | |
| - Adjust complexity based on the query | |
| - Be helpful and informative without overwhelming | |
| - Provide context and practical implications | |
| - Use clear language with minimal jargon | |
| - Consider diverse backgrounds and knowledge levels | |
| - Maintain neutral, objective tone''' | |
| } | |
| } | |
| def create_role_prompt(query: str, domain: str, role: str, | |
| papers_count: int = 0, guideline_info: Dict = None) -> str: | |
| """Create role-appropriate prompt with domain-agnostic focus""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| # Simple query detection - greetings and basic questions | |
| simple_queries = ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good afternoon', | |
| 'good evening', 'how are you', "what's up", 'sup'] | |
| query_lower = query.lower().strip() | |
| if query_lower in simple_queries or len(query.split()) <= 2: | |
| # Simple greeting or short query | |
| if role == 'patient': | |
| return f"""You are helping a patient. Use warm, reassuring tone. | |
| Query: {query} | |
| Respond with a friendly greeting and invitation to ask questions. Keep it brief and welcoming. | |
| Example: "Hello! I'm here to help answer your health questions in simple, clear terms. What would you like to know?""" | |
| elif role == 'student': | |
| return f"""You are teaching a student. | |
| Query: {query} | |
| Respond with an encouraging greeting that invites learning questions. | |
| Example: "Hi there! I'm here to help you learn about medical topics. What are you curious about today?""" | |
| elif role in ['clinician', 'doctor']: | |
| return f"""You are assisting a healthcare professional. | |
| Query: {query} | |
| Respond with a professional greeting appropriate for clinical setting. | |
| Example: "Hello. I'm ready to assist with evidence-based medical information. How can I help you today?""" | |
| elif role in ['researcher', 'professor']: | |
| return f"""You are assisting an academic professional. | |
| Query: {query} | |
| Respond with a scholarly greeting that invites research questions. | |
| Example: "Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?""" | |
| elif role == 'pharmacist': | |
| return f"""You are assisting a pharmacy professional. | |
| Query: {query} | |
| Respond with a professional greeting focused on medication information. | |
| Example: "Hello. I can help with medication-related questions and information. How can I assist you today?""" | |
| else: # general | |
| return f"""You are assisting a general user. | |
| Query: {query} | |
| Respond with a friendly, welcoming greeting. | |
| Example: "Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?""" | |
| # For substantive queries, use role-appropriate analysis | |
| role_prompt = role_info['prompt'] | |
| # Domain-agnostic instruction | |
| domain_agnostic = f"""DOMAIN-AGNOSTIC APPROACH: | |
| - This system can answer questions from ANY domain (tech, finance, health, education, general) | |
| - Adapt your expertise to the query domain naturally | |
| - Do NOT force medical framing on non-medical questions | |
| - Only emphasize citations/guidelines when the query domain and role demand it | |
| - Use appropriate terminology for the query's domain""" | |
| # Build comprehensive prompt | |
| prompt = f"""ROLE: {role_info['name']} {role_info['icon']} | |
| {role_prompt} | |
| {domain_agnostic} | |
| QUERY: {query} | |
| QUERY DOMAIN CONTEXT: {domain} (adapt your response appropriately) | |
| RESPONSE GUIDELINES: | |
| 1. **Role-Appropriate Depth:** | |
| - {role}: Adjust response complexity for {role_info['name'].lower()} needs | |
| 2. **Terminology Level:** | |
| - Use language appropriate for {role_info['name'].lower()} understanding | |
| 3. **Evidence Awareness:** | |
| - { 'Reference evidence/guidelines when domain-appropriate' if role in ['clinician', 'doctor', 'researcher'] else 'Mention evidence when helpful, not required' } | |
| 4. **Safety & Practicality:** | |
| - { 'Include appropriate disclaimers' if role == 'patient' else 'Maintain professional standards' } | |
| 5. **Response Structure:** | |
| - Organize information logically for {role_info['name'].lower()} understanding | |
| - Prioritize most relevant information first | |
| - Keep response focused and actionable""" | |
| # Add research context if we have papers | |
| if papers_count > 0: | |
| prompt += f"\n\nRESEARCH CONTEXT: Analyzing {papers_count} relevant sources" | |
| # Add guideline context if available | |
| if guideline_info: | |
| if guideline_info.get('guidelines_found'): | |
| prompt += f"\nGUIDELINES REFERENCED: {', '.join(guideline_info['guidelines_found'])}" | |
| if guideline_info.get('critical_missing'): | |
| prompt += f"\nGUIDELINE GAPS: Missing {', '.join(guideline_info['critical_missing'][:2])}" | |
| prompt += f"\n\nPlease provide a {role_info['name'].lower()}-appropriate response to: {query}" | |
| return prompt | |
| # ============================================================================ | |
| # DOMAIN CONFIGURATION | |
| # ============================================================================ | |
| # Domain descriptions (fallback if config.domains not available) | |
| DOMAIN_INFO = { | |
| 'internal_medicine': { | |
| 'name': 'Internal Medicine', | |
| 'icon': '🏥', | |
| 'description': 'General internal medicine and diagnosis' | |
| }, | |
| 'endocrinology': { | |
| 'name': 'Endocrinology', | |
| 'icon': '🧬', | |
| 'description': 'Hormonal and metabolic disorders' | |
| }, | |
| 'cardiology': { | |
| 'name': 'Cardiology', | |
| 'icon': '❤️', | |
| 'description': 'Heart and cardiovascular diseases' | |
| }, | |
| 'neurology': { | |
| 'name': 'Neurology', | |
| 'icon': '🧠', | |
| 'description': 'Brain and nervous system disorders' | |
| }, | |
| 'oncology': { | |
| 'name': 'Oncology', | |
| 'icon': '🦠', | |
| 'description': 'Cancer research and treatment' | |
| }, | |
| 'infectious_disease': { | |
| 'name': 'Infectious Diseases', | |
| 'icon': '🦠', | |
| 'description': 'Infectious diseases and microbiology' | |
| }, | |
| 'clinical_research': { | |
| 'name': 'Clinical Research', | |
| 'icon': '📊', | |
| 'description': 'Clinical trials and evidence-based medicine' | |
| }, | |
| 'general_medical': { | |
| 'name': 'General Medical', | |
| 'icon': '⚕️', | |
| 'description': 'General medical research' | |
| }, | |
| 'pulmonology': { | |
| 'name': 'Pulmonology', | |
| 'icon': '🫁', | |
| 'description': 'Respiratory diseases and lung health' | |
| }, | |
| 'gastroenterology': { | |
| 'name': 'Gastroenterology', | |
| 'icon': '🍽️', | |
| 'description': 'Digestive system disorders' | |
| }, | |
| 'nephrology': { | |
| 'name': 'Nephrology', | |
| 'icon': '🫘', | |
| 'description': 'Kidney diseases and disorders' | |
| }, | |
| 'hematology': { | |
| 'name': 'Hematology', | |
| 'icon': '🩸', | |
| 'description': 'Blood disorders and hematologic diseases' | |
| }, | |
| 'surgery': { | |
| 'name': 'Surgery', | |
| 'icon': '🔪', | |
| 'description': 'Surgical procedures and interventions' | |
| }, | |
| 'orthopedics': { | |
| 'name': 'Orthopedics', | |
| 'icon': '🦴', | |
| 'description': 'Musculoskeletal disorders and injuries' | |
| }, | |
| 'urology': { | |
| 'name': 'Urology', | |
| 'icon': '🚽', | |
| 'description': 'Urinary tract and male reproductive system' | |
| }, | |
| 'ophthalmology': { | |
| 'name': 'Ophthalmology', | |
| 'icon': '👁️', | |
| 'description': 'Eye diseases and vision disorders' | |
| }, | |
| 'dermatology': { | |
| 'name': 'Dermatology', | |
| 'icon': '🦋', | |
| 'description': 'Skin diseases and disorders' | |
| }, | |
| 'psychiatry': { | |
| 'name': 'Psychiatry', | |
| 'icon': '🧘', | |
| 'description': 'Mental health and psychiatric disorders' | |
| }, | |
| 'obstetrics_gynecology': { | |
| 'name': 'Obstetrics & Gynecology', | |
| 'icon': '🤰', | |
| 'description': "Women's health and reproductive medicine" | |
| }, | |
| 'pediatrics': { | |
| 'name': 'Pediatrics', | |
| 'icon': '👶', | |
| 'description': 'Child health and pediatric medicine' | |
| }, | |
| 'emergency_medicine': { | |
| 'name': 'Emergency Medicine', | |
| 'icon': '🚑', | |
| 'description': 'Emergency care and acute medicine' | |
| }, | |
| 'critical_care': { | |
| 'name': 'Critical Care Medicine', | |
| 'icon': '🏥', | |
| 'description': 'Intensive care and critical care medicine' | |
| }, | |
| 'pathology': { | |
| 'name': 'Pathology', | |
| 'icon': '🔬', | |
| 'description': 'Disease diagnosis and laboratory medicine' | |
| }, | |
| 'laboratory_medicine': { | |
| 'name': 'Laboratory Medicine', | |
| 'icon': '🧪', | |
| 'description': 'Clinical laboratory testing and diagnostics' | |
| }, | |
| 'medical_imaging': { | |
| 'name': 'Medical Imaging & Radiology AI', | |
| 'icon': '📷', | |
| 'description': 'Medical imaging and radiological diagnosis' | |
| }, | |
| 'bioinformatics': { | |
| 'name': 'Bioinformatics', | |
| 'icon': '💻', | |
| 'description': 'Computational biology and data analysis' | |
| }, | |
| 'genomics': { | |
| 'name': 'Genomics & Sequencing', | |
| 'icon': '🧬', | |
| 'description': 'Genomic research and sequencing technologies' | |
| }, | |
| 'pharmacology': { | |
| 'name': 'Pharmacology', | |
| 'icon': '💊', | |
| 'description': 'Drug research and pharmacology' | |
| }, | |
| 'public_health': { | |
| 'name': 'Public Health Analytics', | |
| 'icon': '🌍', | |
| 'description': 'Public health and epidemiology' | |
| }, | |
| 'pain_medicine': { | |
| 'name': 'Pain Medicine', | |
| 'icon': '🩹', | |
| 'description': 'Pain management and treatment' | |
| }, | |
| 'nutrition': { | |
| 'name': 'Nutrition', | |
| 'icon': '🍎', | |
| 'description': 'Nutritional science and dietetics' | |
| }, | |
| 'allergy_immunology': { | |
| 'name': 'Allergy & Immunology', | |
| 'icon': '🤧', | |
| 'description': 'Allergies and immune system disorders' | |
| }, | |
| 'rehabilitation_medicine': { | |
| 'name': 'Rehabilitation Medicine', | |
| 'icon': '♿', | |
| 'description': 'Physical medicine and rehabilitation' | |
| }, | |
| 'auto': { | |
| 'name': 'Auto-detect', | |
| 'icon': '🔍', | |
| 'description': 'Automatic domain detection' | |
| } | |
| } | |
| # User context information | |
| USER_CONTEXT_INFO = { | |
| 'clinician': { | |
| 'name': 'Clinician', | |
| 'icon': '👨⚕️', | |
| 'description': 'Medical doctors, nurses, and healthcare providers' | |
| }, | |
| 'researcher': { | |
| 'name': 'Researcher', | |
| 'icon': '🔬', | |
| 'description': 'Academic researchers and scientists' | |
| }, | |
| 'student': { | |
| 'name': 'Student', | |
| 'icon': '🎓', | |
| 'description': 'Medical students and trainees' | |
| }, | |
| 'patient': { | |
| 'name': 'Patient', | |
| 'icon': '👤', | |
| 'description': 'Patients and general public' | |
| }, | |
| 'general': { | |
| 'name': 'General', | |
| 'icon': '👤', | |
| 'description': 'General audience' | |
| } | |
| } | |
| # ============================================================================ | |
| # CORE COMPONENTS | |
| # ============================================================================ | |
| class PaperRanker: | |
| """Rank papers by relevance to query and domain""" | |
| def __init__(self): | |
| self.query_cache = {} | |
| def rank_papers(self, papers: List[Dict], query: str, domain: str = None, | |
| user_context: str = "general") -> List[Dict]: | |
| """Rank papers by relevance with guideline bonus""" | |
| if not papers: | |
| return [] | |
| scored_papers = [] | |
| query_lower = query.lower() | |
| query_words = set(query_lower.split()) | |
| for paper in papers: | |
| score = 0 | |
| # Title relevance | |
| title = paper.get('title', '').lower() | |
| if title: | |
| if query_lower in title: | |
| score += 100 | |
| score += sum(10 for word in query_words if word in title) | |
| # Abstract relevance | |
| abstract = paper.get('abstract', '').lower() | |
| if abstract: | |
| score += sum(5 for word in query_words if word in abstract) | |
| # Domain relevance | |
| if domain and domain in DOMAIN_INFO: | |
| domain_keywords = [ | |
| domain.split('_')[0], # First part of domain name | |
| domain.replace('_', ' ') | |
| ] | |
| for keyword in domain_keywords: | |
| if keyword in abstract or keyword in title: | |
| score += 15 | |
| # Guideline mention bonus | |
| text = f"{title} {abstract}" | |
| guideline_keywords = ['guideline', 'recommendation', 'consensus', 'position statement', | |
| 'ada', 'aha', 'acc', 'esc', 'idsa', 'ats', 'nccn', 'nice', 'who'] | |
| if any(keyword in text for keyword in guideline_keywords): | |
| score += 30 # Bonus for guideline-related papers | |
| # Recency bonus | |
| year = self._extract_year(paper) | |
| if year: | |
| current_year = datetime.now().year | |
| age = current_year - year | |
| if age <= 2: | |
| score += 20 | |
| elif age <= 5: | |
| score += 10 | |
| # Source quality | |
| source = paper.get('source', '').lower() | |
| high_quality_sources = ['pubmed', 'nejm', 'lancet', 'jama', 'nature', 'science', | |
| 'circulation', 'jacc', 'jco', 'nejm', 'cell', 'bmj'] | |
| if any(hq_source in source for hq_source in high_quality_sources): | |
| score += 15 | |
| paper['relevance_score'] = score | |
| scored_papers.append((score, paper)) | |
| # Sort by score | |
| scored_papers.sort(reverse=True, key=lambda x: x[0]) | |
| # Normalize scores | |
| if scored_papers: | |
| max_score = scored_papers[0][0] | |
| if max_score > 0: | |
| for _, paper in scored_papers: | |
| paper['normalized_score'] = int((paper['relevance_score'] / max_score) * 100) | |
| return [paper for _, paper in scored_papers] | |
| def _extract_year(self, paper: Dict) -> Optional[int]: | |
| """Extract year from paper""" | |
| pub_date = paper.get('publication_date', '') | |
| if pub_date and isinstance(pub_date, str): | |
| year_pattern = re.search(r'\b(19|20)\d{2}\b', pub_date) | |
| if year_pattern: | |
| try: | |
| return int(year_pattern.group()) | |
| except: | |
| pass | |
| # Try other date fields | |
| date_field = paper.get('date', '') | |
| if date_field and isinstance(date_field, str): | |
| year_pattern = re.search(r'\b(19|20)\d{2}\b', date_field) | |
| if year_pattern: | |
| try: | |
| return int(year_pattern.group()) | |
| except: | |
| pass | |
| return None | |
| class ConfidenceScorer: | |
| """Calculate confidence scores for medical evidence with guideline consideration""" | |
| def __init__(self): | |
| self.score_weights = { | |
| 'paper_count': 0.15, | |
| 'paper_quality': 0.20, | |
| 'study_design': 0.20, | |
| 'evidence_recency': 0.15, | |
| 'source_diversity': 0.10, | |
| 'domain_relevance': 0.10, | |
| 'guideline_alignment': 0.10 # New: Guideline alignment score | |
| } | |
| def calculate_confidence(self, papers: List[Dict], query: str, | |
| analysis_type: str = "summary", | |
| user_context: str = "general", | |
| domain: str = "general_medical", | |
| guideline_info: Dict = None) -> Dict[str, Any]: | |
| """Calculate confidence score with guideline consideration""" | |
| if not papers: | |
| return { | |
| 'overall_score': 0, | |
| 'level': 'VERY LOW ⚫', | |
| 'explanation': 'No supporting evidence', | |
| 'factors': {} | |
| } | |
| # Calculate factors | |
| factors = { | |
| 'paper_count': self._score_paper_count(papers), | |
| 'paper_quality': self._score_paper_quality(papers), | |
| 'study_design': self._score_study_design(papers), | |
| 'evidence_recency': self._score_evidence_recency(papers), | |
| 'source_diversity': self._score_source_diversity(papers), | |
| 'domain_relevance': self._score_domain_relevance(papers, domain), | |
| 'guideline_alignment': self._score_guideline_alignment(papers, domain, query, guideline_info) | |
| } | |
| # Calculate overall score | |
| overall_score = 0 | |
| for factor, weight in self.score_weights.items(): | |
| if factor in factors: | |
| overall_score += factors[factor] * weight | |
| # Generate explanation with guideline context | |
| explanation = self._generate_explanation(factors, overall_score, domain, guideline_info) | |
| return { | |
| 'overall_score': round(min(100, overall_score), 1), | |
| 'level': self._score_to_level(overall_score), | |
| 'explanation': explanation, | |
| 'factors': {k: round(v, 1) for k, v in factors.items()}, | |
| 'guideline_info': guideline_info | |
| } | |
| def _score_guideline_alignment(self, papers: List[Dict], domain: str, | |
| query: str, guideline_info: Dict = None) -> float: | |
| """Score based on guideline alignment""" | |
| if not guideline_info: | |
| return 50 # Neutral if no guideline info | |
| if guideline_info.get('guideline_coverage') == 'NO_PAPERS': | |
| return 0 | |
| # Base score on coverage percentage | |
| coverage_pct = guideline_info.get('coverage_percentage', 0) | |
| # Penalize for missing critical guidelines | |
| critical_missing = len(guideline_info.get('critical_missing', [])) | |
| if critical_missing > 0: | |
| penalty = critical_missing * 10 | |
| coverage_pct = max(0, coverage_pct - penalty) | |
| # Consider guideline relevance based on domain | |
| if domain in ['endocrinology', 'cardiology', 'infectious_disease']: | |
| # Guidelines are critical for these domains | |
| return min(100, coverage_pct) | |
| elif domain in ['oncology', 'neurology', 'internal_medicine']: | |
| # Guidelines are important but not always critical | |
| return min(100, coverage_pct * 0.9) | |
| else: | |
| # Guidelines are less critical | |
| return min(100, coverage_pct * 0.7) | |
| def _score_paper_count(self, papers: List[Dict]) -> float: | |
| """Score based on number of papers""" | |
| count = len(papers) | |
| if count >= 10: | |
| return 100 | |
| elif count >= 7: | |
| return 85 | |
| elif count >= 5: | |
| return 70 | |
| elif count >= 3: | |
| return 50 | |
| elif count >= 1: | |
| return 30 | |
| return 0 | |
| def _score_paper_quality(self, papers: List[Dict]) -> float: | |
| """Score based on paper quality""" | |
| if not papers: | |
| return 0 | |
| scores = [] | |
| for paper in papers[:10]: | |
| score = 50 | |
| # Source quality | |
| source = paper.get('source', '').lower() | |
| high_quality_sources = ['pubmed', 'nejm', 'lancet', 'jama', 'nature', 'science', | |
| 'circulation', 'jacc', 'jco', 'cell', 'bmj', 'springer'] | |
| if any(hq_source in source for hq_source in high_quality_sources): | |
| score += 20 | |
| # Journal quality | |
| journal = paper.get('journal', '').lower() | |
| if any(hq_journal in journal for hq_journal in high_quality_sources): | |
| score += 10 | |
| # Citations (if available) | |
| citations = paper.get('citations', 0) | |
| if citations > 100: | |
| score += 15 | |
| elif citations > 20: | |
| score += 10 | |
| scores.append(min(100, score)) | |
| return statistics.mean(scores) if scores else 50 | |
| def _score_study_design(self, papers: List[Dict]) -> float: | |
| """Score based on study design""" | |
| if not papers: | |
| return 0 | |
| design_scores = { | |
| 'RCT': 100, | |
| 'randomized controlled trial': 100, | |
| 'prospective cohort': 80, | |
| 'retrospective cohort': 60, | |
| 'case-control': 50, | |
| 'review': 30, | |
| 'meta-analysis': 90, | |
| 'systematic review': 85, | |
| 'case report': 20, | |
| 'case series': 25, | |
| 'guideline': 95, | |
| 'consensus': 90 | |
| } | |
| scores = [] | |
| for paper in papers[:10]: | |
| abstract = paper.get('abstract', '').lower() | |
| title = paper.get('title', '').lower() | |
| text = abstract + " " + title | |
| paper_score = 30 | |
| for design, score in design_scores.items(): | |
| if design in text: | |
| paper_score = max(paper_score, score) | |
| scores.append(paper_score) | |
| return statistics.mean(scores) if scores else 30 | |
| def _score_evidence_recency(self, papers: List[Dict]) -> float: | |
| """Score based on recency""" | |
| if not papers: | |
| return 0 | |
| current_year = datetime.now().year | |
| recent_papers = 0 | |
| for paper in papers[:10]: | |
| year = self._extract_year(paper) | |
| if year and current_year - year <= 3: | |
| recent_papers += 1 | |
| percentage = (recent_papers / min(10, len(papers))) * 100 | |
| return min(100, percentage) | |
| def _score_source_diversity(self, papers: List[Dict]) -> float: | |
| """Score based on source diversity""" | |
| sources = set() | |
| for paper in papers: | |
| source = paper.get('source', '') | |
| if source: | |
| sources.add(source.lower()) | |
| unique_sources = len(sources) | |
| if unique_sources >= 4: | |
| return 100 | |
| elif unique_sources >= 3: | |
| return 75 | |
| elif unique_sources >= 2: | |
| return 50 | |
| elif unique_sources == 1: | |
| return 25 | |
| return 0 | |
| def _score_domain_relevance(self, papers: List[Dict], domain: str) -> float: | |
| """Score based on domain relevance""" | |
| if domain == "general_medical" or domain == "auto": | |
| return 50 | |
| relevant_papers = 0 | |
| domain_terms = domain.split('_') | |
| for paper in papers[:10]: | |
| abstract = paper.get('abstract', '').lower() | |
| title = paper.get('title', '').lower() | |
| text = abstract + " " + title | |
| # Check for domain terms | |
| matches = sum(1 for term in domain_terms if term in text) | |
| if matches >= 1: | |
| relevant_papers += 1 | |
| percentage = (relevant_papers / min(10, len(papers))) * 100 | |
| return min(100, percentage) | |
| def _extract_year(self, paper: Dict) -> Optional[int]: | |
| """Extract year from paper""" | |
| pub_date = paper.get('publication_date', '') | |
| if pub_date and isinstance(pub_date, str): | |
| year_pattern = re.search(r'\b(19|20)\d{2}\b', pub_date) | |
| if year_pattern: | |
| try: | |
| return int(year_pattern.group()) | |
| except: | |
| pass | |
| return None | |
| def _score_to_level(self, score: float) -> str: | |
| """Convert score to confidence level""" | |
| if score >= 80: | |
| return "HIGH 🟢" | |
| elif score >= 60: | |
| return "MODERATE 🟡" | |
| elif score >= 40: | |
| return "FAIR 🟠" | |
| else: | |
| return "LOW 🔴" | |
| def _generate_explanation(self, factors: Dict, score: float, | |
| domain: str, guideline_info: Dict = None) -> str: | |
| """Generate explanation for confidence score with guideline context""" | |
| explanations = [] | |
| if factors.get('paper_count', 0) >= 70: | |
| explanations.append("Strong evidence base") | |
| elif factors.get('paper_count', 0) <= 30: | |
| explanations.append("Limited evidence base") | |
| if factors.get('study_design', 0) >= 70: | |
| explanations.append("High-quality study designs") | |
| if factors.get('evidence_recency', 0) >= 70: | |
| explanations.append("Recent evidence") | |
| if factors.get('source_diversity', 0) >= 70: | |
| explanations.append("Diverse sources") | |
| # Add guideline-specific explanations | |
| if guideline_info: | |
| guideline_score = factors.get('guideline_alignment', 0) | |
| if guideline_score >= 70: | |
| if guideline_info.get('guidelines_found'): | |
| explanations.append( | |
| f"Good guideline coverage ({len(guideline_info['guidelines_found'])} referenced)") | |
| elif guideline_score <= 30: | |
| if guideline_info.get('critical_missing'): | |
| missing_str = ', '.join(guideline_info['critical_missing'][:3]) | |
| explanations.append(f"Missing guideline citations ({missing_str})") | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_name = get_domain_display_name(domain) | |
| except: | |
| domain_name = domain.replace('_', ' ').title() | |
| else: | |
| domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title()) | |
| explanation = f"{domain_name} confidence: {score:.1f}/100" | |
| if explanations: | |
| explanation += f". Factors: {'; '.join(explanations)}" | |
| return explanation | |
| class UserContextDetector: | |
| """Detect user context from query""" | |
| def __init__(self): | |
| self.context_patterns = { | |
| "clinician": ['patient', 'clinical', 'treatment', 'diagnosis', 'therapy', | |
| 'management', 'guidelines', 'recommend', 'prescribe'], | |
| "researcher": ['research', 'study', 'methodology', 'evidence', 'publication', | |
| 'hypothesis', 'experiment', 'results', 'conclusions'], | |
| "student": ['learn', 'study', 'exam', 'textbook', 'course', 'education', | |
| 'explain', 'understand', 'concept', 'basics'], | |
| "patient": ['i have', 'my symptoms', 'my doctor', 'my treatment', 'pain', | |
| 'suffering', 'experience', 'diagnosed', 'medication'] | |
| } | |
| def detect_context(self, query: str, domain: str = None) -> str: | |
| """Detect user context from query""" | |
| query_lower = query.lower() | |
| # Check for explicit mentions | |
| if 'clinician' in query_lower or 'doctor' in query_lower or 'nurse' in query_lower: | |
| return "clinician" | |
| if 'researcher' in query_lower or 'scientist' in query_lower or 'academic' in query_lower: | |
| return "researcher" | |
| if 'student' in query_lower or 'trainee' in query_lower: | |
| return "student" | |
| if 'patient' in query_lower or 'i have' in query_lower or 'my ' in query_lower[:50]: | |
| return "patient" | |
| # Check patterns | |
| context_scores = {} | |
| for context_type, patterns in self.context_patterns.items(): | |
| score = sum(1 for pattern in patterns if pattern in query_lower) | |
| if score > 0: | |
| context_scores[context_type] = score | |
| if context_scores: | |
| return max(context_scores.items(), key=lambda x: x[1])[0] | |
| return "general" | |
| # ============================================================================ | |
| # MAIN RAG ENGINE | |
| # ============================================================================ | |
| class EnhancedRAGEngine: | |
| """Production-ready RAG engine for medical research with real paper fetching and guideline detection""" | |
| def __init__(self, vector_store=None, session_id: str = "default", | |
| model: str = "gpt-oss-120b", use_real_time: bool = True): | |
| print("🚀 Initializing Medical Research RAG Engine...") | |
| # Model mapping | |
| model_map = { | |
| "gpt-oss-120b": "gpt-oss-120b", | |
| "llama-70b": "llama-3-70b-instruct", | |
| "llama-8b": "llama-3-8b-instruct", | |
| "mixtral": "mixtral-8x7b-instruct" | |
| } | |
| self.model = model_map.get(model, model) | |
| print(f"📊 Using model: {self.model}") | |
| self.use_real_time = use_real_time | |
| # Initialize MedicalResearchEngine from main.py | |
| print("📚 Initializing MedicalResearchEngine...") | |
| try: | |
| # Import and create MedicalResearchEngine | |
| from main import MedicalResearchEngine | |
| self.research_engine = MedicalResearchEngine() | |
| print("✅ MedicalResearchEngine loaded successfully!") | |
| self.RESEARCH_ENGINE_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"⚠️ MedicalResearchEngine import failed: {e}") | |
| print("⚠️ Using fallback mode - will generate demo papers") | |
| self.RESEARCH_ENGINE_AVAILABLE = False | |
| except Exception as e: | |
| print(f"⚠️ MedicalResearchEngine initialization failed: {e}") | |
| print("⚠️ Using fallback mode - will generate demo papers") | |
| self.RESEARCH_ENGINE_AVAILABLE = False | |
| # Initialize LLM | |
| try: | |
| # Ensure we can import from llm directory | |
| import sys | |
| import os | |
| # Get the project root (MedSearchPro/ | |
| current_dir = os.path.dirname(os.path.abspath(__file__)) # /MedSearchPro/chat/ | |
| project_root = os.path.dirname(current_dir) | |
| if project_root not in sys.path: | |
| sys.path.insert(0, project_root) | |
| # Now importing GrokLLM | |
| from llm.llm_provider import GrokLLM | |
| # Initialize LLM components | |
| self.llm = GrokLLM(model=self.model) | |
| print(f"✅ LLM loaded: {self.llm.get_active_provider()}") | |
| except Exception as e: | |
| print(f"⚠️ LLM not available - using fallback mode: {e}") | |
| self.llm = None | |
| self.role_reasoning = RoleBasedReasoning() # NEW: Role-based reasoning | |
| self.ranker = PaperRanker() | |
| self.confidence_scorer = ConfidenceScorer() | |
| self.context_detector = UserContextDetector() | |
| self.guideline_detector = GuidelineDetector() # New: Guideline detector | |
| # Initialize RAG components | |
| try: | |
| self.vector_store = vector_store or VectorStore() | |
| except: | |
| self.vector_store = None | |
| print("⚠️ Vector store not available") | |
| try: | |
| self.single_paper_summarizer = SinglePaperSummarizer(model=self.model) if hasattr(self, 'llm') else None | |
| except: | |
| self.single_paper_summarizer = None | |
| # Memory | |
| try: | |
| self.memory = ConversationMemory(session_id=session_id) | |
| except: | |
| print("⚠️ ConversationMemory not available") | |
| self.memory = None | |
| # Metrics | |
| self.metrics = { | |
| 'total_queries': 0, | |
| 'average_confidence': 0, | |
| 'domains_used': Counter(), | |
| 'user_contexts': Counter(), | |
| 'real_papers_fetched': 0, | |
| 'demo_papers_used': 0, | |
| 'guideline_coverage': [] # Track guideline coverage over time | |
| } | |
| print(f"✅ Medical Research RAG Engine Ready! Session: {session_id}") | |
| if self.RESEARCH_ENGINE_AVAILABLE: | |
| print(" 📄 Real paper fetching: ENABLED") | |
| else: | |
| print(" 📄 Real paper fetching: DISABLED (using demo papers)") | |
| print(" 📋 Guideline detection: ENABLED") | |
| print(" 👤 Role-based responses: ENABLED") | |
| def answer_research_question(self, | |
| query: str, | |
| domain: str = "general_medical", | |
| max_papers: int = 20, | |
| use_memory: bool = True, | |
| user_context: str = "auto", | |
| use_fallback: bool = False, | |
| role: str = "general", # NEW: Explicit role parameter | |
| role_system_prompt: str = None, # NEW: Custom role prompt from frontend | |
| **kwargs) -> Dict[str, Any]: | |
| """Answer medical research questions with role-based reasoning""" | |
| start_time = time.time() | |
| self.metrics['total_queries'] += 1 | |
| self.metrics['domains_used'][domain] += 1 | |
| print(f"\n🔍 Processing query: '{query}'") | |
| print(f" Domain: {domain}") | |
| print(f" Role: {role}") | |
| print(f" Max papers: {max_papers}") | |
| print(f" Real-time search: {self.use_real_time}") | |
| try: | |
| # Auto-detect user context if needed (backward compatibility) | |
| if user_context == "auto": | |
| user_context = self.context_detector.detect_context(query, domain) | |
| self.metrics['user_contexts'][user_context] += 1 | |
| # NEW: Check for simple queries first (greetings, basic questions) | |
| simple_response = self._handle_simple_query(query, domain, role) | |
| if simple_response: | |
| return simple_response | |
| # Check if query requires research analysis | |
| requires_research = self._requires_research_analysis(query) | |
| if not requires_research: | |
| # For non-research queries, provide direct role-appropriate response | |
| return self._handle_direct_query(query, domain, role) | |
| # Retrieve papers using MedicalResearchEngine | |
| print("📚 Retrieving relevant papers...") | |
| papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback) | |
| if not papers: | |
| print("⚠️ No papers found, creating fallback response...") | |
| return self._create_no_results_response(query, domain, role) | |
| # Detect guideline citations | |
| print("📋 Detecting guideline citations...") | |
| guideline_info = self.guideline_detector.detect_guidelines(papers, domain, query) | |
| # Store guideline coverage for metrics | |
| if guideline_info: | |
| self.metrics['guideline_coverage'].append({ | |
| 'domain': domain, | |
| 'coverage': guideline_info.get('coverage_percentage', 0), | |
| 'guidelines_found': len(guideline_info.get('guidelines_found', [])), | |
| 'critical_missing': len(guideline_info.get('critical_missing', [])), | |
| 'timestamp': datetime.now().isoformat() | |
| }) | |
| # Rank papers | |
| ranked_papers = self.ranker.rank_papers(papers, query, domain, role) | |
| print(f"📊 Papers found: {len(ranked_papers)}") | |
| # Track paper sources | |
| real_papers = sum(1 for p in ranked_papers if not p.get('is_demo', False)) | |
| demo_papers = sum(1 for p in ranked_papers if p.get('is_demo', False)) | |
| self.metrics['real_papers_fetched'] += real_papers | |
| self.metrics['demo_papers_used'] += demo_papers | |
| if demo_papers > 0: | |
| print(f" ⚠️ Includes {demo_papers} demo papers (real papers: {real_papers})") | |
| # Report guideline findings | |
| if guideline_info.get('guidelines_found'): | |
| print(f" 📋 Guidelines referenced: {', '.join(guideline_info['guidelines_found'])}") | |
| if guideline_info.get('critical_missing'): | |
| print(f" ⚠️ Missing guidelines: {', '.join(guideline_info['critical_missing'][:3])}") | |
| # Calculate confidence with guideline consideration | |
| confidence = self.confidence_scorer.calculate_confidence( | |
| ranked_papers, query, "summary", role, domain, guideline_info | |
| ) | |
| # Generate analysis using role-based reasoning | |
| print("🧠 Generating role-based analysis...") | |
| analysis = self._generate_role_based_analysis( | |
| query, domain, role, ranked_papers, guideline_info, role_system_prompt | |
| ) | |
| # Generate clinical bottom line with role awareness | |
| bottom_line = self._generate_role_bottom_line( | |
| query, domain, role, len(ranked_papers), real_papers, guideline_info | |
| ) | |
| # Synthesize final answer | |
| final_answer = self._synthesize_role_answer( | |
| query, domain, role, analysis, ranked_papers, | |
| bottom_line, confidence, guideline_info | |
| ) | |
| # Update memory | |
| if use_memory and self.memory: | |
| self._update_memory(query, final_answer, domain, role, ranked_papers, guideline_info) | |
| # Update metrics | |
| response_time = time.time() - start_time | |
| self.metrics['average_confidence'] = ( | |
| (self.metrics['average_confidence'] * (self.metrics['total_queries'] - 1) + | |
| confidence['overall_score']) / self.metrics['total_queries'] | |
| ) | |
| print(f"✅ Analysis complete in {response_time:.2f}s") | |
| print(f" Confidence: {confidence['overall_score']}/100") | |
| print(f" Papers used: {len(ranked_papers)}") | |
| print(f" Real papers: {real_papers}, Demo papers: {demo_papers}") | |
| print(f" Guideline coverage: {guideline_info.get('coverage_percentage', 0)}%") | |
| return final_answer | |
| except Exception as e: | |
| print(f"❌ Error in research analysis: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return self._create_error_response(query, domain, role, str(e)) | |
| def _handle_simple_query(self, query: str, domain: str, role: str) -> Optional[Dict[str, Any]]: | |
| """Handle simple queries like greetings with role-appropriate responses""" | |
| query_lower = query.lower().strip() | |
| # Simple greetings | |
| simple_greetings = ['hi', 'hello', 'hey', 'greetings', 'good morning', | |
| 'good afternoon', 'good evening', 'howdy'] | |
| if query_lower in simple_greetings: | |
| print(" 👋 Detected simple greeting") | |
| return self._create_greeting_response(query, domain, role) | |
| # Very short queries (1-2 words) that aren't research questions | |
| if len(query.split()) <= 2 and not self._looks_like_research_query(query): | |
| print(" 💬 Detected simple query") | |
| return self._create_simple_response(query, domain, role) | |
| return None | |
| def _looks_like_research_query(self, query: str) -> bool: | |
| """Check if query looks like a research question""" | |
| query_lower = query.lower() | |
| # Research question indicators | |
| research_indicators = [ | |
| 'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis', | |
| 'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical', | |
| 'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis', | |
| 'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk', | |
| 'benefit', 'recommendation', 'guideline', 'standard', 'protocol' | |
| ] | |
| # Check if query contains research indicators | |
| for indicator in research_indicators: | |
| if indicator in query_lower: | |
| return True | |
| # Check question words | |
| question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who'] | |
| if any(query_lower.startswith(word) for word in question_words): | |
| # Check if it's a complex question (more than basic) | |
| if len(query.split()) > 3: | |
| return True | |
| return False | |
| def _requires_research_analysis(self, query: str) -> bool: | |
| """Determine if query requires full research analysis""" | |
| query_lower = query.lower().strip() | |
| # Definitely simple queries | |
| simple_patterns = [ | |
| r'^hi$', r'^hello$', r'^hey$', r'^greetings$', | |
| r'^good morning$', r'^good afternoon$', r'^good evening$', | |
| r'^how are you$', r"^what's up$", r'^sup$', | |
| r'^thanks$', r'^thank you$', r'^bye$', r'^goodbye$' | |
| ] | |
| for pattern in simple_patterns: | |
| if re.match(pattern, query_lower): | |
| return False | |
| # Check if it's a substantive question | |
| if len(query.split()) <= 2 and not self._looks_like_research_query(query): | |
| return False | |
| return True | |
| def _create_greeting_response(self, query: str, domain: str, role: str) -> Dict[str, Any]: | |
| """Create role-appropriate greeting response""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| # Role-specific greetings | |
| greetings = { | |
| 'patient': "👋 Hello! I'm here to help you understand health topics in simple, clear terms. What would you like to know?", | |
| 'student': "👋 Hi there! I'm here to help you learn about medical topics. What are you curious about today?", | |
| 'clinician': "👋 Hello. I'm ready to assist with evidence-based medical information. How can I help you today?", | |
| 'doctor': "👋 Hello, doctor. I'm available to discuss clinical questions and evidence. What would you like to explore?", | |
| 'researcher': "👋 Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?", | |
| 'professor': "👋 Hello. I can assist with academic discussions and evidence synthesis. What topic interests you?", | |
| 'pharmacist': "👋 Hello. I can help with medication-related questions and information. How can I assist you today?", | |
| 'general': "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?" | |
| } | |
| greeting = greetings.get(role, greetings['general']) | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_info = { | |
| 'name': get_domain_display_name(domain), | |
| 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️') | |
| } | |
| except: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| else: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| answer = f"""# {greeting} | |
| **Role:** {role_info['name']} {role_info['icon']} | |
| **Domain:** {domain_info['name']} {domain_info.get('icon', '')} | |
| Feel free to ask me anything! I'll provide information tailored to your needs as a {role_info['name'].lower()}.""" | |
| return { | |
| "query": query, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "user_context": role, | |
| "user_context_info": role_info, | |
| "answer": answer, | |
| "analysis": greeting, | |
| "bottom_line": greeting, | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": { | |
| 'overall_score': 95.0, | |
| 'level': 'HIGH 🟢', | |
| 'explanation': 'Simple greeting response' | |
| }, | |
| "guideline_info": None, | |
| "reasoning_method": "greeting", | |
| "real_time_search": self.use_real_time, | |
| "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE, | |
| "metrics": { | |
| 'response_time': time.time(), | |
| 'papers_analyzed': 0, | |
| 'domain': domain, | |
| 'user_context': role | |
| } | |
| } | |
| def _create_simple_response(self, query: str, domain: str, role: str) -> Dict[str, Any]: | |
| """Create role-appropriate response for simple queries""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| # Generate simple, direct response | |
| simple_responses = { | |
| 'patient': f"I'd be happy to help with '{query}'. Could you tell me a bit more about what you're looking for?", | |
| 'student': f"That's an interesting topic! To help you best, could you provide more details about what you'd like to know regarding '{query}'?", | |
| 'clinician': f"Regarding '{query}', I can provide evidence-based information. Please share more specifics about your clinical question.", | |
| 'doctor': f"For '{query}', I can offer medical information. Could you elaborate on the clinical context or specific aspects you're interested in?", | |
| 'researcher': f"On the topic of '{query}', I can discuss research perspectives. What specific aspect would you like to explore?", | |
| 'professor': f"Regarding '{query}', I can provide academic perspectives. What particular angle or detail would you like to discuss?", | |
| 'pharmacist': f"About '{query}', I can offer medication-related information. Could you specify what you'd like to know?", | |
| 'general': f"I can help with information about '{query}'. Could you provide more details about what specifically you're interested in?" | |
| } | |
| response = simple_responses.get(role, simple_responses['general']) | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_info = { | |
| 'name': get_domain_display_name(domain), | |
| 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️') | |
| } | |
| except: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| else: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| answer = f"""# 💬 **Response** | |
| **Role:** {role_info['name']} {role_info['icon']} | |
| **Domain:** {domain_info['name']} {domain_info.get('icon', '')} | |
| {response} | |
| *Tip: For more detailed information, try asking a more specific question!*""" | |
| return { | |
| "query": query, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "user_context": role, | |
| "user_context_info": role_info, | |
| "answer": answer, | |
| "analysis": response, | |
| "bottom_line": response, | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": { | |
| 'overall_score': 85.0, | |
| 'level': 'HIGH 🟢', | |
| 'explanation': 'Simple query response' | |
| }, | |
| "guideline_info": None, | |
| "reasoning_method": "simple_response", | |
| "real_time_search": self.use_real_time, | |
| "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE, | |
| "metrics": { | |
| 'response_time': time.time(), | |
| 'papers_analyzed': 0, | |
| 'domain': domain, | |
| 'user_context': role | |
| } | |
| } | |
| def _handle_direct_query(self, query: str, domain: str, role: str) -> Dict[str, Any]: | |
| """Handle direct queries without research papers""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| # Use LLM for direct response if available | |
| if self.llm: | |
| try: | |
| prompt = self.role_reasoning.create_role_prompt(query, domain, role, 0, None) | |
| response = self.llm.generate( | |
| prompt, | |
| system_message=f"You are assisting a {role_info['name'].lower()}. Provide helpful, accurate information.", | |
| max_tokens=1000 | |
| ) | |
| # Clean up response | |
| response = response.strip() | |
| if not response: | |
| response = f"I'd be happy to help with '{query}'. Could you provide more details about what specifically you're looking for?" | |
| except Exception as e: | |
| print(f"⚠️ LLM direct response failed: {e}") | |
| response = f"I can help with information about '{query}'. Please feel free to ask more specific questions!" | |
| else: | |
| response = f"I'd be happy to discuss '{query}'. What specific aspect would you like to know more about?" | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_info = { | |
| 'name': get_domain_display_name(domain), | |
| 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️') | |
| } | |
| except: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| else: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| answer = f"""# 💬 **Response** | |
| **Role:** {role_info['name']} {role_info['icon']} | |
| **Domain:** {domain_info['name']} {domain_info.get('icon', '')} | |
| {response} | |
| *Note: This is a direct response. For evidence-based research analysis with papers, please ask a more specific research question.*""" | |
| return { | |
| "query": query, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "user_context": role, | |
| "user_context_info": role_info, | |
| "answer": answer, | |
| "analysis": response, | |
| "bottom_line": response, | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": { | |
| 'overall_score': 80.0, | |
| 'level': 'HIGH 🟢', | |
| 'explanation': 'Direct query response without papers' | |
| }, | |
| "guideline_info": None, | |
| "reasoning_method": "direct_response", | |
| "real_time_search": self.use_real_time, | |
| "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE, | |
| "metrics": { | |
| 'response_time': time.time(), | |
| 'papers_analyzed': 0, | |
| 'domain': domain, | |
| 'user_context': role | |
| } | |
| } | |
| def _retrieve_real_papers(self, query: str, domain: str, max_papers: int, | |
| use_fallback: bool = False) -> List[Dict]: | |
| """Retrieve real papers using MedicalResearchEngine""" | |
| papers = [] | |
| # Use MedicalResearchEngine if available | |
| if self.RESEARCH_ENGINE_AVAILABLE and self.use_real_time: | |
| try: | |
| print(f" 🔍 Using MedicalResearchEngine for real-time search...") | |
| # Use the research engine to get real papers | |
| result = self.research_engine.answer_user_query( | |
| user_query=query, | |
| domain=domain, | |
| use_real_time=True, | |
| use_fallback=use_fallback | |
| ) | |
| # Extract papers from result | |
| if result and 'supporting_papers' in result: | |
| raw_papers = result['supporting_papers'] | |
| for raw_paper in raw_papers[:max_papers]: | |
| # Transform to our paper format | |
| paper = { | |
| 'id': raw_paper.get('paper_id') or | |
| raw_paper.get('doi') or | |
| raw_paper.get('pmid') or | |
| f"{hash(raw_paper.get('title', ''))}", | |
| 'title': raw_paper.get('title', 'Untitled'), | |
| 'abstract': raw_paper.get('abstract') or | |
| raw_paper.get('summary') or | |
| raw_paper.get('description', ''), | |
| 'source': raw_paper.get('source', 'unknown'), | |
| 'publication_date': raw_paper.get('publication_date') or | |
| raw_paper.get('date') or | |
| raw_paper.get('year', ''), | |
| 'authors': self._parse_authors(raw_paper.get('authors', '')), | |
| 'journal': raw_paper.get('journal', ''), | |
| 'doi': raw_paper.get('doi', ''), | |
| 'url': raw_paper.get('url') or raw_paper.get('link', ''), | |
| 'citations': raw_paper.get('citation_count', 0) or | |
| raw_paper.get('citations', 0), | |
| 'is_preprint': raw_paper.get('is_preprint', False), | |
| 'is_fallback': raw_paper.get('is_fallback', False), | |
| 'is_demo': False, # Real paper! | |
| 'search_domain': domain | |
| } | |
| # Clean up the abstract | |
| if paper['abstract']: | |
| # Remove excessive whitespace | |
| paper['abstract'] = ' '.join(paper['abstract'].split()) | |
| # Limit length | |
| if len(paper['abstract']) > 2000: | |
| paper['abstract'] = paper['abstract'][:2000] + "..." | |
| papers.append(paper) | |
| print(f" ✅ Retrieved {len(papers)} real papers from MedicalResearchEngine") | |
| else: | |
| print(f" ⚠️ No papers returned from MedicalResearchEngine") | |
| except Exception as e: | |
| print(f" ⚠️ MedicalResearchEngine failed: {e}") | |
| # Try vector store as fallback | |
| if len(papers) < max_papers // 2 and self.vector_store: | |
| try: | |
| print(f" 🔍 Trying vector store...") | |
| results = self.vector_store.search( | |
| query=query, | |
| domain=domain, | |
| n_results=max_papers - len(papers) | |
| ) | |
| seen_ids = set([p['id'] for p in papers if p.get('id')]) | |
| for result in results: | |
| paper_id = result['metadata'].get('paper_id') | |
| if paper_id and paper_id not in seen_ids: | |
| paper = { | |
| 'id': paper_id, | |
| 'title': result['metadata'].get('paper_title', ''), | |
| 'abstract': result['text'], | |
| 'source': result['metadata'].get('source', 'vector_store'), | |
| 'publication_date': result['metadata'].get('publication_date', ''), | |
| 'authors': result['metadata'].get('authors', '').split(',') | |
| if result['metadata'].get('authors') else [], | |
| 'citations': result['metadata'].get('citations', 0), | |
| 'is_demo': False | |
| } | |
| papers.append(paper) | |
| seen_ids.add(paper_id) | |
| print(f" ✅ Added {len(results)} papers from vector store") | |
| except Exception as e: | |
| print(f" ⚠️ Vector store search failed: {e}") | |
| # Generate demo papers only if we have very few real papers | |
| if len(papers) < max_papers // 3: | |
| needed = max_papers - len(papers) | |
| demo_papers = self._create_demo_papers(query, domain, needed) | |
| papers.extend(demo_papers) | |
| print(f" 📄 Added {len(demo_papers)} demo papers for illustration") | |
| return papers[:max_papers] | |
| def _parse_authors(self, authors_input) -> List[str]: | |
| """Parse authors from various input formats""" | |
| if not authors_input: | |
| return [] | |
| if isinstance(authors_input, list): | |
| return authors_input | |
| if isinstance(authors_input, str): | |
| # Try to split by common separators | |
| if ';' in authors_input: | |
| return [a.strip() for a in authors_input.split(';') if a.strip()] | |
| elif ',' in authors_input: | |
| # Check if it's "Last, First" format or just comma-separated names | |
| parts = [p.strip() for p in authors_input.split(',')] | |
| if len(parts) > 2: # Probably comma-separated names | |
| return parts | |
| else: | |
| # Might be "Last, First" format - return as is | |
| return [authors_input] | |
| else: | |
| return [authors_input] | |
| return [] | |
| def _create_demo_papers(self, query: str, domain: str, count: int) -> List[Dict]: | |
| """Create demo papers for illustration only""" | |
| papers = [] | |
| current_year = datetime.now().year | |
| # Common medical journal sources | |
| journal_sources = { | |
| 'infectious_disease': ['New England Journal of Medicine', 'The Lancet Infectious Diseases', | |
| 'Clinical Infectious Diseases', 'Journal of Antimicrobial Chemotherapy'], | |
| 'cardiology': ['New England Journal of Medicine', 'Journal of the American College of Cardiology', | |
| 'Circulation', 'European Heart Journal'], | |
| 'endocrinology': ['Diabetes Care', 'The Lancet Diabetes & Endocrinology', | |
| 'Journal of Clinical Endocrinology & Metabolism'], | |
| 'neurology': ['Neurology', 'The Lancet Neurology', 'JAMA Neurology', 'Brain'], | |
| 'oncology': ['Journal of Clinical Oncology', 'The Lancet Oncology', 'JAMA Oncology', | |
| 'Annals of Oncology'], | |
| 'internal_medicine': ['New England Journal of Medicine', 'The Lancet', 'JAMA', | |
| 'Annals of Internal Medicine'] | |
| } | |
| sources = journal_sources.get(domain, ['PubMed', 'Medical Research Database']) | |
| for i in range(min(count, 5)): # Limit demo papers | |
| # Generate title based on query | |
| query_terms = [word for word in query.lower().split() if len(word) > 4] | |
| if query_terms: | |
| base_term = random.choice(query_terms).title() | |
| title = f"Recent Advances in {base_term}: A {random.choice(['Systematic Review', 'Meta-analysis', 'Clinical Trial'])}" | |
| else: | |
| title = f"Current Research in {domain.replace('_', ' ').title()}" | |
| # Generate abstract | |
| abstract = f"This study examines {query.lower()}. Results demonstrate significant findings relevant to clinical practice. Further research is warranted to confirm these observations." | |
| # Generate authors | |
| first_names = ['James', 'Mary', 'Robert', 'Patricia', 'John', 'Jennifer'] | |
| last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia'] | |
| authors = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(random.randint(2, 5))] | |
| # Publication date | |
| year_offset = i % 4 | |
| pub_year = current_year - year_offset | |
| month = random.randint(1, 12) | |
| pub_date = f"{pub_year}-{month:02d}" | |
| paper = { | |
| 'id': f"demo_{domain}_{i}_{int(time.time())}", | |
| 'title': title, | |
| 'abstract': abstract, | |
| 'source': random.choice(sources), | |
| 'publication_date': pub_date, | |
| 'authors': authors, | |
| 'citations': random.randint(0, 50), | |
| 'is_demo': True, | |
| 'is_preprint': random.random() > 0.7, | |
| 'journal': random.choice(sources) | |
| } | |
| papers.append(paper) | |
| return papers | |
| def _generate_role_based_analysis(self, query: str, domain: str, role: str, | |
| papers: List[Dict], guideline_info: Dict = None, | |
| custom_role_prompt: str = None) -> str: | |
| """Generate role-based analysis using LLM if available""" | |
| if not self.llm: | |
| return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info) | |
| # Create role-based prompt | |
| prompt = self.role_reasoning.create_role_prompt( | |
| query, domain, role, len(papers), guideline_info | |
| ) | |
| # Add paper information for research context | |
| if papers: | |
| paper_info = "\n".join([ | |
| f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})" | |
| for i, p in enumerate(papers[:3]) | |
| ]) | |
| prompt += f"\n\n**Relevant Sources:**\n{paper_info}" | |
| # Add demo paper disclaimer if any demo papers | |
| demo_count = sum(1 for p in papers if p.get('is_demo', False)) | |
| if demo_count > 0: | |
| prompt += f"\n\nNote: {demo_count} illustrative examples included for context." | |
| try: | |
| # Use custom role prompt if provided, otherwise use default | |
| system_message = custom_role_prompt if custom_role_prompt else f"You are assisting a {role}. Provide helpful, accurate information." | |
| analysis = self.llm.generate( | |
| prompt, | |
| system_message=system_message, | |
| max_tokens=2000 | |
| ) | |
| return analysis | |
| except Exception as e: | |
| print(f"⚠️ LLM role-based analysis failed: {e}") | |
| return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info) | |
| def _create_fallback_role_analysis(self, query: str, domain: str, role: str, | |
| papers: List[Dict], guideline_info: Dict = None) -> str: | |
| """Create fallback analysis when LLM is unavailable""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_name = get_domain_display_name(domain) | |
| except: | |
| domain_name = domain.replace('_', ' ').title() | |
| else: | |
| domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title()) | |
| # Count real vs demo papers | |
| real_papers = [p for p in papers if not p.get('is_demo', False)] | |
| demo_papers = [p for p in papers if p.get('is_demo', False)] | |
| analysis = f"""**{role_info['name']}-Focused Analysis** | |
| **Query:** {query} | |
| **Domain Context:** {domain_name} | |
| **Role Perspective:** {role_info['name']}""" | |
| # Add guideline information | |
| if guideline_info: | |
| if guideline_info.get('guidelines_found'): | |
| analysis += f"\n**Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}" | |
| if guideline_info.get('critical_missing'): | |
| analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}" | |
| analysis += f""" | |
| **Key Information for {role_info['name']}:** | |
| Based on analysis of {len(papers)} relevant sources ({len(real_papers)} real, {len(demo_papers)} illustrative): | |
| 1. **{role_info['name']}-Relevant Insights:** | |
| - Information tailored to {role_info['name'].lower()} needs and perspective | |
| - Practical implications for {role_info['name'].lower()} context | |
| - Actionable takeaways appropriate for this role | |
| 2. **Domain Context:** | |
| - Considerations specific to {domain_name} | |
| - Relevant standards and approaches in this field | |
| - Important context for application | |
| 3. **Evidence Considerations:** | |
| - {len(papers)} sources analyzed | |
| - Quality and relevance assessed for {role_info['name'].lower()} needs | |
| - {"Guideline awareness as noted above" if guideline_info else "Standard evidence considerations"} | |
| **Recommendations for {role_info['name']}:** | |
| - Apply information within {role_info['name'].lower()} role context | |
| - Consider individual circumstances and specific needs | |
| - {"Consult referenced guidelines as appropriate" if guideline_info and guideline_info.get('guidelines_found') else "Reference standard practices"} | |
| - Seek additional information for specific cases | |
| - Integrate with professional judgment and experience | |
| *Note: This analysis is tailored for {role_info['name'].lower()} perspective. For other perspectives, different considerations may apply.*""" | |
| if demo_papers: | |
| analysis += f"\n\n*Includes {len(demo_papers)} illustrative examples for comprehensive analysis.*" | |
| return analysis | |
| def _generate_role_bottom_line(self, query: str, domain: str, role: str, | |
| papers_count: int, real_papers_count: int, | |
| guideline_info: Dict = None) -> str: | |
| """Generate role-appropriate bottom line""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_name = get_domain_display_name(domain) | |
| except: | |
| domain_name = domain.replace('_', ' ').title() | |
| else: | |
| domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title()) | |
| bottom_line = f"""**Bottom Line for {role_info['name']}:** | |
| Based on {papers_count} sources in {domain_name} ({real_papers_count} real sources), here are the key takeaways for {role_info['name'].lower()} perspective.""" | |
| # Add guideline-specific bottom line | |
| if guideline_info: | |
| if guideline_info.get('guidelines_found'): | |
| bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced." | |
| if guideline_info.get('critical_missing'): | |
| missing_list = ', '.join(guideline_info['critical_missing'][:2]) | |
| bottom_line += f"\n**Consider:** Missing explicit guideline citations for {missing_list}." | |
| coverage = guideline_info.get('coverage_percentage', 0) | |
| if coverage < 50: | |
| bottom_line += f"\n**Evidence Note:** Guideline coverage is limited." | |
| bottom_line += f""" | |
| **{role_info['name']}-Specific Considerations:** | |
| - Information tailored to {role_info['name'].lower()} role and needs | |
| - Practical application within {role_info['name'].lower()} context | |
| - Integration with {role_info['name'].lower()} knowledge and experience | |
| - {"Guideline-aware decision making" if guideline_info else "Evidence-informed approach"} | |
| - Consideration of specific circumstances and constraints""" | |
| if papers_count > real_papers_count: | |
| bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for context.*" | |
| return bottom_line | |
| def _synthesize_role_answer(self, query: str, domain: str, role: str, | |
| analysis: str, papers: List[Dict], | |
| bottom_line: str, confidence: Dict[str, Any], | |
| guideline_info: Dict = None) -> Dict[str, Any]: | |
| """Synthesize final answer with role information""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_info = { | |
| 'name': get_domain_display_name(domain), | |
| 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️'), | |
| 'description': get_domain_description(domain) | |
| } | |
| except: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️', | |
| 'description': f'Research in {domain.replace("_", " ")}' | |
| }) | |
| else: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️', | |
| 'description': f'Research in {domain.replace("_", " ")}' | |
| }) | |
| # Count real vs demo papers | |
| real_papers = [p for p in papers if not p.get('is_demo', False)] | |
| demo_papers = [p for p in papers if p.get('is_demo', False)] | |
| # Format paper citations | |
| paper_citations = [] | |
| for i, paper in enumerate(papers[:5], 1): | |
| title = paper.get('title', 'Untitled') | |
| authors = paper.get('authors', []) | |
| year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else '' | |
| source = paper.get('source', 'Unknown') | |
| journal = paper.get('journal', '') | |
| is_demo = paper.get('is_demo', False) | |
| is_preprint = paper.get('is_preprint', False) | |
| # Format authors | |
| if authors and isinstance(authors, list) and len(authors) > 0: | |
| if len(authors) == 1: | |
| author_str = authors[0] | |
| elif len(authors) == 2: | |
| author_str = f"{authors[0]} and {authors[1]}" | |
| else: | |
| author_str = f"{authors[0]} et al." | |
| else: | |
| author_str = "Authors not specified" | |
| # Build citation | |
| citation = f"{i}. **{title}**" | |
| demo_indicator = "📄 " if is_demo else "" | |
| preprint_indicator = "⚡ " if is_preprint else "" | |
| if author_str and year: | |
| citation += f"\n {demo_indicator}{preprint_indicator}*{author_str} ({year})*" | |
| elif author_str: | |
| citation += f"\n {demo_indicator}{preprint_indicator}*{author_str}*" | |
| else: | |
| citation += f"\n {demo_indicator}{preprint_indicator}*Unknown authors*" | |
| if journal: | |
| citation += f"\n Journal: {journal}" | |
| elif source and source != 'unknown': | |
| citation += f"\n Source: {source}" | |
| paper_citations.append(citation) | |
| # Build guideline summary section | |
| guideline_summary = "" | |
| if guideline_info: | |
| guideline_summary = "## 📋 **Guideline Assessment**\n\n" | |
| if guideline_info.get('guidelines_found'): | |
| guideline_summary += f"**✅ Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n" | |
| if guideline_info.get('critical_missing'): | |
| missing_list = ', '.join(guideline_info['critical_missing']) | |
| guideline_summary += f"**⚠️ Missing Guideline Citations:** {missing_list}\n\n" | |
| guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n" | |
| # Build answer | |
| answer = f"""# 🔬 **{role_info['name']}-Focused Analysis** | |
| **Role:** {role_info['name']} {role_info['icon']} | |
| **Domain:** {domain_info['name']} {domain_info.get('icon', '')} | |
| **Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100) | |
| **Sources Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative) | |
| --- | |
| ## 📋 **Executive Summary** | |
| {bottom_line} | |
| --- | |
| {guideline_summary}## 🔍 **Detailed Analysis** | |
| {analysis} | |
| --- | |
| ## 📊 **Supporting Evidence** | |
| {chr(10).join(paper_citations) if paper_citations else "*No papers cited for this simple query*"} | |
| --- | |
| ## 🎯 **Key Takeaways for {role_info['name']}** | |
| 1. Role-appropriate information and insights | |
| 2. Domain-specific considerations for {domain_info['name'].lower()} | |
| 3. Practical implications tailored to {role_info['name'].lower()} needs | |
| 4. {"Guideline-aware recommendations" if guideline_info else "Evidence-informed approach"} | |
| *Analysis performed with {role_info['name'].lower()}-focused reasoning* | |
| *Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*""" | |
| return { | |
| "query": query, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "user_context": role, | |
| "user_context_info": role_info, | |
| "answer": answer, | |
| "analysis": analysis, | |
| "bottom_line": bottom_line, | |
| "papers_used": len(papers), | |
| "real_papers_used": len(real_papers), | |
| "demo_papers_used": len(demo_papers), | |
| "confidence_score": confidence, | |
| "guideline_info": guideline_info, | |
| "reasoning_method": "role_based", | |
| "real_time_search": self.use_real_time, | |
| "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE, | |
| "metrics": { | |
| 'response_time': time.time(), | |
| 'papers_analyzed': len(papers), | |
| 'domain': domain, | |
| 'user_context': role | |
| } | |
| } | |
| def _update_memory(self, query: str, response: Dict[str, Any], domain: str, | |
| role: str, papers: List[Dict], guideline_info: Dict = None): | |
| """Update conversation memory with role info""" | |
| if not self.memory: | |
| return | |
| memory_data = { | |
| 'query': query, | |
| 'domain': domain, | |
| 'role': role, | |
| 'papers_used': len(papers), | |
| 'real_papers': sum(1 for p in papers if not p.get('is_demo', False)), | |
| 'demo_papers': sum(1 for p in papers if p.get('is_demo', False)), | |
| 'confidence_score': response.get('confidence_score', {}).get('overall_score', 0), | |
| 'timestamp': datetime.now().isoformat() | |
| } | |
| # Add guideline info if available | |
| if guideline_info: | |
| memory_data['guidelines_found'] = guideline_info.get('guidelines_found', []) | |
| memory_data['critical_missing'] = guideline_info.get('critical_missing', []) | |
| memory_data['guideline_coverage'] = guideline_info.get('coverage_percentage', 0) | |
| self.memory.add_interaction( | |
| user_message=query, | |
| ai_response=response.get('answer', '')[:1000], | |
| metadata=memory_data | |
| ) | |
| def _create_no_results_response(self, query: str, domain: str, role: str) -> Dict[str, Any]: | |
| """Create response when no papers are found""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_info = { | |
| 'name': get_domain_display_name(domain), | |
| 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️') | |
| } | |
| except: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| else: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| answer = f"""# 🔍 **Limited Research Found** | |
| **Role:** {role_info['name']} {role_info['icon']} | |
| **Domain:** {domain_info['name']} | |
| **Query:** {query} | |
| **Suggestions for {role_info['name']}:** | |
| 1. Try broadening your search terms | |
| 2. Consider related topics in {domain_info['name']} | |
| 3. Check spelling of technical terms | |
| 4. Try a more general domain selection | |
| **For Role-Appropriate Information:** | |
| - Ask more general questions about the topic | |
| - Request explanations of concepts | |
| - Inquire about standard approaches or practices | |
| - Seek practical guidance rather than specific research | |
| **Example {role_info['name'].lower()}-appropriate queries:** | |
| - "Basic explanation of [topic] for {role_info['name'].lower()}" | |
| - "Standard approaches to [issue]" | |
| - "Practical guidance for [situation]" | |
| - "Key concepts about [subject]" | |
| *Note: Some specialized topics may have limited published research. I can still provide general information and guidance tailored to your role.*""" | |
| return { | |
| "query": query, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "user_context": role, | |
| "user_context_info": role_info, | |
| "answer": answer, | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": { | |
| 'overall_score': 0, | |
| 'level': 'VERY LOW ⚫', | |
| 'explanation': 'No supporting evidence found' | |
| }, | |
| "error": "no_results", | |
| "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE | |
| } | |
| def _create_error_response(self, query: str, domain: str, role: str, error: str) -> Dict[str, Any]: | |
| """Create error response""" | |
| role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general']) | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_info = { | |
| 'name': get_domain_display_name(domain), | |
| 'icon': DOMAIN_INFO.get(domain, {}).get('icon', '⚕️') | |
| } | |
| except: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| else: | |
| domain_info = DOMAIN_INFO.get(domain, { | |
| 'name': domain.replace('_', ' ').title(), | |
| 'icon': '⚕️' | |
| }) | |
| answer = f"""# 🚨 **Analysis Error** | |
| **Role:** {role_info['name']} {role_info['icon']} | |
| **Domain:** {domain_info['name']} | |
| **Query:** {query} | |
| **Error:** {error} | |
| **Troubleshooting for {role_info['name']}:** | |
| 1. Check your internet connection | |
| 2. Try a simpler query or rephrase | |
| 3. Verify domain selection is appropriate | |
| 4. Contact support if problem persists | |
| **For Role-Appropriate Alternatives:** | |
| - Ask a simpler version of your question | |
| - Request general information instead of specific research | |
| - Try breaking complex questions into smaller parts | |
| - Use more common terminology | |
| Please try again or reformulate your question for {role_info['name'].lower()}-appropriate assistance.""" | |
| return { | |
| "query": query, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "user_context": role, | |
| "user_context_info": role_info, | |
| "answer": answer, | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": { | |
| 'overall_score': 0, | |
| 'level': 'ERROR 🔴', | |
| 'explanation': f'Analysis failed: {error}' | |
| }, | |
| "error": error, | |
| "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE | |
| } | |
| def summarize_single_paper(self, | |
| paper_id: str = None, | |
| paper_title: str = None, | |
| paper_data: Dict = None, | |
| user_query: str = None, | |
| domain: str = "general_medical") -> Dict[str, Any]: | |
| """Summarize a single research paper with guideline detection""" | |
| print(f"\n📄 Summarizing paper: {paper_title or paper_id}") | |
| try: | |
| # Use single paper summarizer if available | |
| if self.single_paper_summarizer and (paper_title or paper_data): | |
| paper = paper_data or {'title': paper_title or 'Unknown'} | |
| summary_result = self.single_paper_summarizer.summarize_paper( | |
| paper, user_query | |
| ) | |
| if summary_result.get("success"): | |
| # Add guideline detection for single paper | |
| if paper.get('abstract'): | |
| guideline_info = self.guideline_detector.detect_guidelines([paper], domain, user_query or "") | |
| if guideline_info.get('guidelines_found'): | |
| summary_result[ | |
| 'guideline_context'] = f"References {', '.join(guideline_info['guidelines_found'])} guidelines" | |
| return summary_result | |
| # Fallback summary | |
| return self._create_fallback_summary(paper_title, domain, user_query) | |
| except Exception as e: | |
| print(f"❌ Paper summarization failed: {e}") | |
| return { | |
| "success": False, | |
| "error": str(e), | |
| "paper_title": paper_title, | |
| "summary": f"Unable to generate summary. Error: {e}" | |
| } | |
| def _create_fallback_summary(self, paper_title: str, domain: str, user_query: str) -> Dict[str, Any]: | |
| """Create fallback paper summary""" | |
| if CONFIG_AVAILABLE: | |
| try: | |
| domain_name = get_domain_display_name(domain) | |
| except: | |
| domain_name = domain.replace('_', ' ').title() | |
| else: | |
| domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title()) | |
| summary = f"""**Paper Summary:** {paper_title} | |
| **Domain Context:** {domain_name} | |
| **User Query:** {user_query or 'General summary requested'} | |
| **Key Points:** | |
| 1. This paper contributes to the {domain_name} literature | |
| 2. Study design and methodology align with field standards | |
| 3. Findings have implications for clinical practice and research | |
| 4. Limitations and future directions are discussed | |
| **Guideline Considerations:** | |
| - Review paper for explicit guideline citations | |
| - Consider alignment with major {domain_name} guidelines | |
| - Consult specific guidelines for clinical application | |
| **Recommendations:** | |
| - Review full text for detailed methodology | |
| - Consider findings in context of broader literature | |
| - Consult with domain experts for application | |
| - Reference established clinical guidelines | |
| *Note: This is a general summary. Full paper review is recommended for detailed analysis.*""" | |
| return { | |
| "success": True, | |
| "paper_title": paper_title, | |
| "summary": summary, | |
| "quick_summary": f"Summary of {paper_title} in {domain_name}", | |
| "domain": domain, | |
| "confidence": 0.6 | |
| } | |
| def get_engine_status(self) -> Dict[str, Any]: | |
| """Get engine status with role metrics""" | |
| # Calculate average guideline coverage | |
| avg_guideline_coverage = 0 | |
| if self.metrics['guideline_coverage']: | |
| avg_guideline_coverage = sum(g['coverage'] for g in self.metrics['guideline_coverage']) / len( | |
| self.metrics['guideline_coverage']) | |
| return { | |
| "engine_name": "Medical Research RAG Engine", | |
| "version": "2.2.0", | |
| "model": self.model if hasattr(self, 'model') else "Unknown", | |
| "features": ["role_based_reasoning", "real_paper_fetching", | |
| "confidence_scoring", "guideline_detection", "simple_query_handling"], | |
| "research_engine_available": self.RESEARCH_ENGINE_AVAILABLE, | |
| "real_time_search": self.use_real_time, | |
| "roles_supported": list(RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.keys()), | |
| "guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES), | |
| "metrics": { | |
| "total_queries": self.metrics['total_queries'], | |
| "average_confidence": round(self.metrics['average_confidence'], 1), | |
| "average_guideline_coverage": round(avg_guideline_coverage, 1), | |
| "domains_used": dict(self.metrics['domains_used']), | |
| "user_contexts": dict(self.metrics['user_contexts']), | |
| "real_papers_fetched": self.metrics['real_papers_fetched'], | |
| "demo_papers_used": self.metrics['demo_papers_used'] | |
| }, | |
| "domains_supported": len(DOMAIN_INFO), | |
| "simple_query_handling": "ENABLED" | |
| } | |
| def clear_memory(self): | |
| """Clear conversation memory""" | |
| if self.memory: | |
| self.memory.clear_memory() | |
| print("🧹 Engine memory cleared") | |
| else: | |
| print("⚠️ Memory system not available") | |
| # ============================================================================ | |
| # TEST FUNCTION | |
| # ============================================================================ | |
| def test_role_based_rag_engine(): | |
| """Test the medical RAG engine with role-based responses""" | |
| print("\n" + "=" * 60) | |
| print("🧪 TESTING ROLE-BASED RAG ENGINE") | |
| print("=" * 60) | |
| try: | |
| # Initialize engine | |
| engine = EnhancedRAGEngine( | |
| session_id="role_test", | |
| model="gpt-oss-120b", | |
| use_real_time=False # Disable real-time for faster testing | |
| ) | |
| # Test queries with different roles | |
| test_cases = [ | |
| { | |
| "query": "hi", | |
| "domain": "general_medical", | |
| "role": "patient" | |
| }, | |
| { | |
| "query": "hello", | |
| "domain": "cardiology", | |
| "role": "doctor" | |
| }, | |
| { | |
| "query": "hey", | |
| "domain": "endocrinology", | |
| "role": "student" | |
| }, | |
| { | |
| "query": "Compare first-line antibiotics for community-acquired pneumonia", | |
| "domain": "infectious_disease", | |
| "role": "clinician" | |
| } | |
| ] | |
| for i, test_case in enumerate(test_cases, 1): | |
| print(f"\n📝 Test Case {i}:") | |
| print(f" Query: '{test_case['query']}'") | |
| print(f" Domain: {test_case['domain']}") | |
| print(f" Role: {test_case['role']}") | |
| # Process query | |
| response = engine.answer_research_question( | |
| query=test_case['query'], | |
| domain=test_case['domain'], | |
| max_papers=5, | |
| role=test_case['role'], | |
| use_fallback=True | |
| ) | |
| if response and 'error' not in response: | |
| print(f"\n✅ Test Successful!") | |
| print(f" Response type: {response.get('reasoning_method', 'unknown')}") | |
| print(f" Papers used: {response.get('papers_used', 0)}") | |
| print(f" Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100") | |
| # Check if it's a simple response | |
| if response.get('reasoning_method') in ['greeting', 'simple_response', 'direct_response']: | |
| print(f" ⭐ Simple query handled appropriately!") | |
| # Show engine status | |
| status = engine.get_engine_status() | |
| print(f"\n🔧 Engine Status:") | |
| print(f" Role-based responses: ENABLED") | |
| print(f" Simple query handling: ENABLED") | |
| print(f" Roles supported: {len(status['roles_supported'])}") | |
| print(f" Total queries: {status['metrics']['total_queries']}") | |
| return True | |
| except Exception as e: | |
| print(f"\n❌ Test failed with exception: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| if __name__ == "__main__": | |
| # Run test | |
| test_result = test_role_based_rag_engine() | |
| if test_result: | |
| print(f"\n{'=' * 60}") | |
| print("🎉 ROLE-BASED RAG ENGINE TEST COMPLETE!") | |
| print(" Role-based reasoning: ✓") | |
| print(" Simple query handling: ✓") | |
| print(" Domain-agnostic approach: ✓") | |
| print(" Guideline detection: ✓") | |
| print(f"{'=' * 60}") | |
| else: | |
| print("\n❌ Engine test failed") |