Spaces:
Running
Running
| """ | |
| api/engine.py - Production-Ready Medical Research Engine | |
| Updated to support role-based reasoning and integrate with EnhancedRAGEngine | |
| """ | |
| import asyncio | |
| import json | |
| import os | |
| import sys | |
| import re | |
| from typing import Dict, Any, Optional, List | |
| from datetime import datetime | |
| import concurrent.futures | |
| from pathlib import Path | |
| # ============================================================================ | |
| # ENVIRONMENT SETUP | |
| # ============================================================================ | |
| # Add project root to Python path | |
| project_root = Path(__file__).parent.parent | |
| sys.path.insert(0, str(project_root)) | |
| # Load environment variables | |
| from dotenv import load_dotenv | |
| env_paths = [ | |
| project_root / ".env", | |
| project_root / "api" / ".env", | |
| Path.cwd() / ".env", | |
| ] | |
| env_loaded = False | |
| for env_path in env_paths: | |
| if env_path.exists(): | |
| load_dotenv(dotenv_path=env_path, override=True) | |
| print(f"✅ Loaded environment from: {env_path}") | |
| env_loaded = True | |
| break | |
| if not env_loaded: | |
| print("⚠️ No .env file found. Using system environment variables.") | |
| # Check critical environment variables | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| XAI_API_KEY = os.getenv("XAI_API_KEY") | |
| MODEL = os.getenv("MODEL", "gpt-oss-120b") | |
| if not GROQ_API_KEY and not XAI_API_KEY: | |
| print("❌ WARNING: No API key found in environment!") | |
| print(" Set GROQ_API_KEY or XAI_API_KEY in .env file") | |
| else: | |
| last4 = (GROQ_API_KEY or XAI_API_KEY)[-4:] | |
| print(f"✅ API Key found: {'*' * 16}{last4}") | |
| print(f"✅ Model configured: {MODEL}") | |
| # ============================================================================ | |
| # ROLE-BASED REASONING ADAPTER | |
| # ============================================================================ | |
| class RoleBasedReasoningAdapter: | |
| """Adapter for role-based reasoning from rag_engine.py""" | |
| # Role descriptions that match rag_engine.py | |
| ROLE_DESCRIPTIONS = { | |
| 'patient': { | |
| 'name': 'Patient', | |
| 'icon': '🩺', | |
| 'description': 'Patients and general public seeking health information' | |
| }, | |
| 'student': { | |
| 'name': 'Student', | |
| 'icon': '🎓', | |
| 'description': 'Medical students and trainees' | |
| }, | |
| 'clinician': { | |
| 'name': 'Clinician', | |
| 'icon': '👨⚕️', | |
| 'description': 'Healthcare providers and nurses' | |
| }, | |
| 'doctor': { | |
| 'name': 'Doctor', | |
| 'icon': '⚕️', | |
| 'description': 'Medical doctors and physicians' | |
| }, | |
| 'researcher': { | |
| 'name': 'Researcher', | |
| 'icon': '🔬', | |
| 'description': 'Academic researchers and scientists' | |
| }, | |
| 'professor': { | |
| 'name': 'Professor', | |
| 'icon': '📚', | |
| 'description': 'Academic educators and professors' | |
| }, | |
| 'pharmacist': { | |
| 'name': 'Pharmacist', | |
| 'icon': '💊', | |
| 'description': 'Pharmacy professionals and pharmacists' | |
| }, | |
| 'general': { | |
| 'name': 'General User', | |
| 'icon': '👤', | |
| 'description': 'General audience' | |
| }, | |
| 'auto': { | |
| 'name': 'Auto-detect', | |
| 'icon': '🤖', | |
| 'description': 'Automatically detect user role' | |
| } | |
| } | |
| def get_role_info(role_id: str) -> Dict[str, Any]: | |
| """Get information about a user role""" | |
| return RoleBasedReasoningAdapter.ROLE_DESCRIPTIONS.get(role_id, RoleBasedReasoningAdapter.ROLE_DESCRIPTIONS['general']) | |
| def detect_role_from_query(query: str, current_role: str = "auto") -> str: | |
| """Detect user role from query text""" | |
| if current_role != "auto": | |
| return current_role | |
| query_lower = query.lower() | |
| # Role detection patterns from rag_engine.py | |
| role_patterns = { | |
| 'patient': ['i have', 'my symptoms', 'my doctor', 'my treatment', 'pain', 'suffering', 'experience', 'diagnosed', 'medication'], | |
| 'student': ['learn', 'study', 'exam', 'textbook', 'course', 'education', 'explain', 'understand', 'concept', 'basics'], | |
| 'clinician': ['patient', 'clinical', 'treatment', 'diagnosis', 'therapy', 'management', 'guidelines', 'recommend', 'prescribe'], | |
| 'doctor': ['physician', 'consult', 'referral', 'differential', 'prognosis', 'etiology', 'pathophysiology'], | |
| 'researcher': ['research', 'study', 'methodology', 'evidence', 'publication', 'hypothesis', 'experiment', 'results', 'conclusions'], | |
| 'professor': ['teach', 'lecture', 'curriculum', 'syllabus', 'academic', 'pedagogy', 'assessment'], | |
| 'pharmacist': ['medication', 'drug', 'dose', 'pharmacokinetics', 'interaction', 'formulary', 'prescription'] | |
| } | |
| # Check for explicit mentions | |
| explicit_roles = { | |
| 'patient': ['i am a patient', 'as a patient', 'patient here'], | |
| 'student': ['i am a student', 'medical student', 'as a student'], | |
| 'clinician': ['i am a clinician', 'as a clinician', 'clinician here'], | |
| 'doctor': ['i am a doctor', 'physician here', 'as a physician'], | |
| 'researcher': ['i am a researcher', 'as a researcher', 'research scientist'], | |
| 'professor': ['i am a professor', 'as a professor', 'faculty member'], | |
| 'pharmacist': ['i am a pharmacist', 'as a pharmacist', 'pharmacy professional'] | |
| } | |
| for role, patterns in explicit_roles.items(): | |
| if any(pattern in query_lower for pattern in patterns): | |
| return role | |
| # Check patterns | |
| role_scores = {} | |
| for role, patterns in role_patterns.items(): | |
| score = sum(1 for pattern in patterns if pattern in query_lower) | |
| if score > 0: | |
| role_scores[role] = score | |
| if role_scores: | |
| return max(role_scores.items(), key=lambda x: x[1])[0] | |
| return "general" | |
| # ============================================================================ | |
| # DOMAIN DETECTION (UPDATED) | |
| # ============================================================================ | |
| class DomainDetector: | |
| """Detect medical domain from query text""" | |
| # Domain detection patterns (simplified from rag_engine.py) | |
| DOMAIN_PATTERNS = { | |
| 'internal_medicine': ['diagnosis', 'chronic disease', 'acute disease', 'primary care', 'internal medicine'], | |
| 'endocrinology': ['diabetes', 'thyroid', 'hormone', 'metabolism', 'insulin', 'glucose'], | |
| 'cardiology': ['heart', 'cardiovascular', 'hypertension', 'ecg', 'echocardiogram', 'myocardial'], | |
| 'neurology': ['brain', 'stroke', 'alzheimer', 'parkinson', 'seizure', 'migraine'], | |
| 'oncology': ['cancer', 'tumor', 'chemotherapy', 'radiation', 'oncology', 'malignancy'], | |
| 'infectious_disease': ['infection', 'bacterial', 'viral', 'antibiotic', 'sepsis', 'pneumonia'], | |
| 'pulmonology': ['lung', 'respiratory', 'asthma', 'copd', 'oxygen', 'ventilator'], | |
| 'gastroenterology': ['stomach', 'liver', 'intestine', 'colon', 'gastrointestinal', 'digestive'], | |
| 'nephrology': ['kidney', 'renal', 'dialysis', 'creatinine', 'glomerular'], | |
| 'hematology': ['blood', 'anemia', 'leukemia', 'hemoglobin', 'coagulation'], | |
| 'psychiatry': ['mental', 'depression', 'anxiety', 'psychiatric', 'therapy', 'psychotherapy'], | |
| 'dermatology': ['skin', 'rash', 'dermatitis', 'eczema', 'acne'], | |
| 'orthopedics': ['bone', 'fracture', 'joint', 'orthopedic', 'musculoskeletal'], | |
| 'ophthalmology': ['eye', 'vision', 'retina', 'glaucoma', 'cataract'], | |
| 'urology': ['urinary', 'bladder', 'prostate', 'kidney stone', 'urological'], | |
| 'pediatrics': ['child', 'pediatric', 'neonatal', 'infant', 'adolescent'], | |
| 'obstetrics_gynecology': ['pregnancy', 'obstetric', 'gynecological', 'women\'s health', 'reproductive'], | |
| 'surgery': ['surgical', 'operation', 'procedure', 'anesthesia', 'postoperative'], | |
| 'emergency_medicine': ['emergency', 'trauma', 'acute care', 'resuscitation'], | |
| 'critical_care': ['icu', 'critical care', 'intensive care', 'ventilator'], | |
| 'pathology': ['biopsy', 'histology', 'pathological', 'tissue examination'], | |
| 'laboratory_medicine': ['lab test', 'biomarker', 'diagnostic test', 'laboratory'], | |
| 'medical_imaging': ['imaging', 'radiology', 'x-ray', 'ct scan', 'mri', 'ultrasound'], | |
| 'bioinformatics': ['computational', 'data analysis', 'algorithm', 'bioinformatics'], | |
| 'genomics': ['genetic', 'genome', 'sequencing', 'dna', 'genomic'], | |
| 'pharmacology': ['drug', 'pharmacology', 'pharmacokinetic', 'medication'], | |
| 'public_health': ['epidemiology', 'population health', 'public health', 'prevention'], | |
| 'pain_medicine': ['pain', 'analgesia', 'pain management', 'chronic pain'], | |
| 'nutrition': ['diet', 'nutrition', 'vitamin', 'malnutrition', 'obesity'], | |
| 'allergy_immunology': ['allergy', 'immune', 'immunology', 'allergic', 'hypersensitivity'], | |
| 'rehabilitation_medicine': ['rehabilitation', 'physical therapy', 'recovery', 'disability'] | |
| } | |
| def detect_domain_from_query(query: str, current_domain: str = "auto") -> str: | |
| """Detect medical domain from query text""" | |
| if current_domain != "auto": | |
| return current_domain | |
| query_lower = query.lower() | |
| best_domain = 'general_medical' | |
| best_score = 0 | |
| for domain_id, patterns in DomainDetector.DOMAIN_PATTERNS.items(): | |
| score = sum(1 for pattern in patterns if pattern in query_lower) | |
| if score > best_score: | |
| best_score = score | |
| best_domain = domain_id | |
| return best_domain if best_score > 0 else 'general_medical' | |
| # ============================================================================ | |
| # MEDICAL DOMAIN CONFIGURATION (UPDATED) | |
| # ============================================================================ | |
| MEDICAL_DOMAINS = [ | |
| {"id": "internal_medicine", "name": "Internal Medicine", "icon": "🏥", | |
| "description": "General internal medicine and diagnosis"}, | |
| {"id": "endocrinology", "name": "Endocrinology", "icon": "🧬", | |
| "description": "Hormonal and metabolic disorders"}, | |
| {"id": "cardiology", "name": "Cardiology", "icon": "❤️", | |
| "description": "Heart and cardiovascular diseases"}, | |
| {"id": "neurology", "name": "Neurology", "icon": "🧠", | |
| "description": "Brain and nervous system disorders"}, | |
| {"id": "oncology", "name": "Oncology", "icon": "🦠", | |
| "description": "Cancer research and treatment"}, | |
| {"id": "infectious_disease", "name": "Infectious Diseases", "icon": "🦠", | |
| "description": "Infectious diseases and microbiology"}, | |
| {"id": "clinical_research", "name": "Clinical Research", "icon": "📊", | |
| "description": "Clinical trials and evidence-based medicine"}, | |
| {"id": "general_medical", "name": "General Medical", "icon": "⚕️", | |
| "description": "General medical research and clinical questions"}, | |
| {"id": "pulmonology", "name": "Pulmonology", "icon": "🫁", | |
| "description": "Respiratory diseases and lung health"}, | |
| {"id": "gastroenterology", "name": "Gastroenterology", "icon": "🍽️", | |
| "description": "Digestive system disorders"}, | |
| {"id": "nephrology", "name": "Nephrology", "icon": "🫘", | |
| "description": "Kidney diseases and disorders"}, | |
| {"id": "hematology", "name": "Hematology", "icon": "🩸", | |
| "description": "Blood disorders and hematologic diseases"}, | |
| {"id": "surgery", "name": "Surgery", "icon": "🔪", | |
| "description": "Surgical procedures and interventions"}, | |
| {"id": "orthopedics", "name": "Orthopedics", "icon": "🦴", | |
| "description": "Musculoskeletal disorders and injuries"}, | |
| {"id": "urology", "name": "Urology", "icon": "🚽", | |
| "description": "Urinary tract and male reproductive system"}, | |
| {"id": "ophthalmology", "name": "Ophthalmology", "icon": "👁️", | |
| "description": "Eye diseases and vision disorders"}, | |
| {"id": "dermatology", "name": "Dermatology", "icon": "🦋", | |
| "description": "Skin diseases and disorders"}, | |
| {"id": "psychiatry", "name": "Psychiatry", "icon": "🧘", | |
| "description": "Mental health and psychiatric disorders"}, | |
| {"id": "obstetrics_gynecology", "name": "Obstetrics & Gynecology", "icon": "🤰", | |
| "description": "Women's health and reproductive medicine"}, | |
| {"id": "pediatrics", "name": "Pediatrics", "icon": "👶", | |
| "description": "Child health and pediatric medicine"}, | |
| {"id": "emergency_medicine", "name": "Emergency Medicine", "icon": "🚑", | |
| "description": "Emergency care and acute medicine"}, | |
| {"id": "critical_care", "name": "Critical Care Medicine", "icon": "🏥", | |
| "description": "Intensive care and critical care medicine"}, | |
| {"id": "pathology", "name": "Pathology", "icon": "🔬", | |
| "description": "Disease diagnosis and laboratory medicine"}, | |
| {"id": "laboratory_medicine", "name": "Laboratory Medicine", "icon": "🧪", | |
| "description": "Clinical laboratory testing and diagnostics"}, | |
| {"id": "medical_imaging", "name": "Medical Imaging & Radiology AI", "icon": "📷", | |
| "description": "Medical imaging and radiological diagnosis"}, | |
| {"id": "bioinformatics", "name": "Bioinformatics", "icon": "💻", | |
| "description": "Computational biology and data analysis"}, | |
| {"id": "genomics", "name": "Genomics & Sequencing", "icon": "🧬", | |
| "description": "Genomic research and sequencing technologies"}, | |
| {"id": "pharmacology", "name": "Pharmacology", "icon": "💊", | |
| "description": "Drug research and pharmacology"}, | |
| {"id": "public_health", "name": "Public Health Analytics", "icon": "🌍", | |
| "description": "Public health and epidemiology"}, | |
| {"id": "pain_medicine", "name": "Pain Medicine", "icon": "🩹", | |
| "description": "Pain management and treatment"}, | |
| {"id": "nutrition", "name": "Nutrition", "icon": "🍎", | |
| "description": "Nutritional science and dietetics"}, | |
| {"id": "allergy_immunology", "name": "Allergy & Immunology", "icon": "🤧", | |
| "description": "Allergies and immune system disorders"}, | |
| {"id": "rehabilitation_medicine", "name": "Rehabilitation Medicine", "icon": "♿", | |
| "description": "Physical medicine and rehabilitation"}, | |
| {"id": "auto", "name": "Auto-detect", "icon": "🔍", | |
| "description": "Automatic domain detection"} | |
| ] | |
| USER_ROLES = [ | |
| {"id": "patient", "name": "Patient", "icon": "🩺", | |
| "description": "Patients and general public seeking health information"}, | |
| {"id": "student", "name": "Student", "icon": "🎓", | |
| "description": "Medical students and trainees"}, | |
| {"id": "clinician", "name": "Clinician", "icon": "👨⚕️", | |
| "description": "Healthcare providers and nurses"}, | |
| {"id": "doctor", "name": "Doctor", "icon": "⚕️", | |
| "description": "Medical doctors and physicians"}, | |
| {"id": "researcher", "name": "Researcher", "icon": "🔬", | |
| "description": "Academic researchers and scientists"}, | |
| {"id": "professor", "name": "Professor", "icon": "📚", | |
| "description": "Academic educators and professors"}, | |
| {"id": "pharmacist", "name": "Pharmacist", "icon": "💊", | |
| "description": "Pharmacy professionals and pharmacists"}, | |
| {"id": "general", "name": "General User", "icon": "👤", | |
| "description": "General audience"}, | |
| {"id": "auto", "name": "Auto-detect", "icon": "🤖", | |
| "description": "Automatically detect user role"} | |
| ] | |
| # ============================================================================ | |
| # SIMPLE QUERY HANDLER | |
| # ============================================================================ | |
| class SimpleQueryHandler: | |
| """Handle simple queries like greetings without research analysis""" | |
| # Basic responses for common queries (matching rag_engine.py) | |
| BASIC_RESPONSES = { | |
| "hi": "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based medical research questions across various specialties. How can I assist you today?", | |
| "hello": "👋 Welcome! I specialize in medical research analysis using evidence-based reasoning. What medical topic would you like to explore?", | |
| "hey": "👋 Hey there! I'm ready to help with medical research questions. What would you like to know?", | |
| "greetings": "👋 Greetings! I'm your Medical Research Assistant, here to help with evidence-based medical information. What's on your mind?", | |
| "good morning": "🌅 Good morning! I'm ready to assist with medical research questions. How can I help you today?", | |
| "good afternoon": "☀️ Good afternoon! I'm here to help with evidence-based medical research. What would you like to discuss?", | |
| "good evening": "🌙 Good evening! I'm available to assist with medical research questions. How can I help?", | |
| "how are you": "😊 I'm doing well, thank you! Ready to help with medical research questions. How can I assist you today?", | |
| "what's up": "👋 Not much! I'm here and ready to help with medical research. What would you like to explore?", | |
| "sup": "👋 Hey! I'm here to help with medical research. What's on your mind?", | |
| "thanks": "🙏 You're welcome! I'm here whenever you need help with medical research.", | |
| "thank you": "🙏 You're welcome! Feel free to ask more medical research questions anytime.", | |
| "bye": "👋 Goodbye! Feel free to return anytime for medical research assistance.", | |
| "goodbye": "👋 Goodbye! I'm here whenever you need help with medical research questions.", | |
| "help": "🆘 **How to use:**\n1. Ask medical research questions\n2. Specify domain or use auto-detect\n3. Choose your role (patient, doctor, researcher, etc.)\n\n**Examples:**\n• 'Latest treatments for diabetes'\n• 'Research gaps in cancer immunotherapy'\n• 'Clinical guidelines for hypertension'\n• 'Explain MRI findings in simple terms' (as a patient)\n• 'Compare treatment protocols for pneumonia' (as a clinician)", | |
| "what can you do": "🔬 **Medical Research Assistant Capabilities:**\n• Evidence-based medical analysis\n• Domain-specific research insights\n• Role-based responses (patient, doctor, researcher, etc.)\n• Paper summarization and analysis\n• Research gap identification\n• Guideline detection and analysis\n• Simple query handling (greetings, basic questions)\n\nAsk me about any medical research topic!" | |
| } | |
| def is_simple_query(query: str) -> bool: | |
| """Check if query is a simple greeting or basic question""" | |
| query_lower = query.lower().strip() | |
| # Check exact matches | |
| if query_lower in SimpleQueryHandler.BASIC_RESPONSES: | |
| return True | |
| # Check for very short queries (1-2 words) | |
| words = query.split() | |
| if len(words) <= 2 and not SimpleQueryHandler._looks_like_research_query(query): | |
| return True | |
| return False | |
| def _looks_like_research_query(query: str) -> bool: | |
| """Check if query looks like a research question""" | |
| query_lower = query.lower() | |
| # Research question indicators | |
| research_indicators = [ | |
| 'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis', | |
| 'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical', | |
| 'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis', | |
| 'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk', | |
| 'benefit', 'recommendation', 'guideline', 'standard', 'protocol' | |
| ] | |
| # Check if query contains research indicators | |
| for indicator in research_indicators: | |
| if indicator in query_lower: | |
| return True | |
| # Check question words | |
| question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who'] | |
| if any(query_lower.startswith(word) for word in question_words): | |
| # Check if it's a complex question (more than basic) | |
| if len(query.split()) > 3: | |
| return True | |
| return False | |
| def get_simple_response(query: str, role: str = "general") -> str: | |
| """Get appropriate simple response based on role""" | |
| query_lower = query.lower().strip() | |
| # Get base response | |
| if query_lower in SimpleQueryHandler.BASIC_RESPONSES: | |
| response = SimpleQueryHandler.BASIC_RESPONSES[query_lower] | |
| else: | |
| # Generic simple response | |
| role_info = RoleBasedReasoningAdapter.get_role_info(role) | |
| response = f"👋 Hello! I'm your Medical Research Assistant. As a {role_info['name'].lower()}, how can I help with your medical questions today?" | |
| return response | |
| # ============================================================================ | |
| # MEDICAL RESEARCH CHAT ENGINE (UPDATED FOR ROLE-BASED REASONING) | |
| # ============================================================================ | |
| class MedicalResearchEngine: | |
| """Production-ready medical research engine with role-based reasoning""" | |
| def __init__(self): | |
| self.engines: Dict[str, Any] = {} | |
| self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10) | |
| self.api_configured = False | |
| self.api_error = None | |
| self.model = MODEL | |
| self.domain_detector = DomainDetector() | |
| self.role_adapter = RoleBasedReasoningAdapter() | |
| self.simple_query_handler = SimpleQueryHandler() | |
| # Basic responses for common queries | |
| self.basic_responses = SimpleQueryHandler.BASIC_RESPONSES | |
| self._test_api_connection() | |
| print(f"🚀 Medical Research Engine with Role-Based Reasoning Initialized") | |
| def _test_api_connection(self): | |
| """Test API connection""" | |
| try: | |
| # Try to import EnhancedRAGEngine from rag_engine.py | |
| from chat.rag_engine import EnhancedRAGEngine | |
| # Test initialization | |
| test_engine = EnhancedRAGEngine(session_id="test_init", model=self.model, use_real_time=False) | |
| self.api_configured = True | |
| print("✅ API Connection Test: SUCCESS") | |
| print(f" Model: {self.model}") | |
| print(f" Role-based reasoning: ENABLED") | |
| print(f" Simple query handling: ENABLED") | |
| except ImportError as e: | |
| self.api_configured = False | |
| self.api_error = str(e) | |
| print(f"❌ API Connection Test: FAILED - {e}") | |
| except Exception as e: | |
| self.api_configured = False | |
| self.api_error = str(e) | |
| print(f"❌ API Connection Test: FAILED - {e}") | |
| def detect_domain_from_query(self, query: str, current_domain: str = "auto") -> str: | |
| """Detect medical domain from query text""" | |
| return self.domain_detector.detect_domain_from_query(query, current_domain) | |
| def detect_user_role_from_query(self, query: str, current_role: str = "auto") -> str: | |
| """Detect user role from query text""" | |
| return self.role_adapter.detect_role_from_query(query, current_role) | |
| def get_domain_info(self, domain_id: str) -> Dict: | |
| """Get information about a domain""" | |
| for domain in MEDICAL_DOMAINS: | |
| if domain["id"] == domain_id: | |
| return domain | |
| return { | |
| "id": domain_id, | |
| "name": domain_id.replace('_', ' ').title(), | |
| "icon": "⚕️", | |
| "description": "Medical research domain" | |
| } | |
| def get_user_role_info(self, role_id: str) -> Dict: | |
| """Get information about a user role""" | |
| return self.role_adapter.get_role_info(role_id) | |
| def _classify_query(self, query: str) -> str: | |
| """Classify query type""" | |
| # Check if it's a simple query | |
| if self.simple_query_handler.is_simple_query(query): | |
| return "simple" | |
| # Check for paper summarization | |
| query_lower = query.lower().strip() | |
| if any(term in query_lower for term in ['summarize paper', 'paper titled', 'article about', 'summary of paper']): | |
| return "paper_summary" | |
| # Default to research query | |
| return "research" | |
| async def process_query_async( | |
| self, | |
| query: str, | |
| domain: str = "general_medical", | |
| session_id: str = "default", | |
| user_role: str = "auto", # Updated from user_context | |
| custom_role_prompt: Optional[str] = None, # New: Custom role prompt | |
| max_papers: int = 15, | |
| use_real_time: Optional[bool] = True, # New: Control real-time search | |
| use_fallback: Optional[bool] = False, # New: Use fallback papers | |
| **kwargs | |
| ) -> Dict[str, Any]: | |
| """Process medical research query with role-based reasoning""" | |
| # Auto-detect domain if needed | |
| if domain == "auto": | |
| domain = self.detect_domain_from_query(query) | |
| # Auto-detect user role if needed | |
| if user_role == "auto": | |
| user_role = self.detect_user_role_from_query(query) | |
| # Get domain and role info | |
| domain_info = self.get_domain_info(domain) | |
| role_info = self.get_user_role_info(user_role) | |
| # Classify the query | |
| query_type = self._classify_query(query) | |
| # Handle simple queries | |
| if query_type == "simple": | |
| print(f" 💬 Detected simple query - using role-appropriate response") | |
| response_text = self.simple_query_handler.get_simple_response(query, user_role) | |
| return { | |
| "answer": self._format_simple_response(response_text, domain_info, role_info, query), | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 95.0, "level": "HIGH 🟢"}, | |
| "query_type": "simple", | |
| "user_role": user_role, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "role_info": role_info, | |
| "reasoning_method": "simple_response" | |
| } | |
| # Handle paper summarization | |
| elif query_type == "paper_summary": | |
| print(f" 📄 Detected paper summarization request") | |
| return await self._handle_paper_summarization(query, session_id, domain, user_role, custom_role_prompt) | |
| # Handle research queries | |
| else: | |
| print(f" 🔬 Detected research query - using role-based reasoning") | |
| return await self._handle_research_query(query, domain, user_role, session_id, | |
| custom_role_prompt, max_papers, use_real_time, use_fallback, kwargs) | |
| def _format_simple_response(self, response_text: str, domain_info: Dict, | |
| role_info: Dict, query: str) -> str: | |
| """Format simple response with role and domain info""" | |
| return f"""# {response_text} | |
| **Role:** {role_info['name']} {role_info.get('icon', '👤')} | |
| **Domain:** {domain_info['name']} {domain_info.get('icon', '⚕️')} | |
| Feel free to ask me medical research questions! I'll provide information tailored to your needs as a {role_info['name'].lower()}.""" | |
| async def _handle_research_query(self, query: str, domain: str, user_role: str, | |
| session_id: str, custom_role_prompt: str, | |
| max_papers: int, use_real_time: bool, | |
| use_fallback: bool, kwargs: Dict) -> Dict[str, Any]: | |
| """Handle medical research queries with role-based reasoning""" | |
| # Get domain and role info | |
| domain_info = self.get_domain_info(domain) | |
| role_info = self.get_user_role_info(user_role) | |
| # Initialize engine | |
| engine = self.initialize_session(session_id) | |
| # Run in thread pool | |
| loop = asyncio.get_event_loop() | |
| try: | |
| # Process query with timeout | |
| print(f" 🔍 Processing with role-based reasoning (role: {user_role}, domain: {domain})") | |
| response = await asyncio.wait_for( | |
| loop.run_in_executor( | |
| self.executor, | |
| lambda: engine.answer_research_question( | |
| query=query, | |
| domain=domain, | |
| max_papers=max_papers, | |
| use_memory=True, | |
| user_context=user_role, # For backward compatibility | |
| use_fallback=use_fallback, | |
| role=user_role, # NEW: Role parameter | |
| role_system_prompt=custom_role_prompt, # NEW: Custom role prompt | |
| use_real_time=use_real_time if hasattr(engine, 'use_real_time') else True | |
| ) | |
| ), | |
| timeout=kwargs.get('timeout', 90.0) # Increased timeout for research | |
| ) | |
| # Clean up response | |
| answer = response.get("answer", "") | |
| # Prepare result | |
| result = { | |
| "answer": answer, | |
| "papers_used": response.get("papers_used", 0), | |
| "real_papers_used": response.get("real_papers_used", 0), | |
| "demo_papers_used": response.get("demo_papers_used", 0), | |
| "confidence_score": response.get("confidence_score", {"overall_score": 0}), | |
| "query_type": "research", | |
| "user_role": response.get("user_context", user_role), # Get from response | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "role_info": role_info, | |
| "reasoning_method": response.get("reasoning_method", "role_based"), | |
| "guideline_info": response.get("guideline_info") | |
| } | |
| # Add enhanced metrics if available | |
| if "enhanced_metrics" in response: | |
| result["metrics"] = response["enhanced_metrics"] | |
| print(f" ✅ Research query processed successfully") | |
| print(f" Papers used: {result['papers_used']} (real: {result['real_papers_used']}, demo: {result['demo_papers_used']})") | |
| print(f" Confidence: {result['confidence_score'].get('overall_score', 0)}/100") | |
| return result | |
| except asyncio.TimeoutError: | |
| print(f" ⏱️ Query timeout - creating timeout response") | |
| return self._create_timeout_response(query, domain_info, role_info) | |
| except Exception as e: | |
| print(f" ❌ Research query error: {e}") | |
| return self._create_error_response(query, domain_info, role_info, str(e)) | |
| async def _handle_paper_summarization(self, query: str, session_id: str, | |
| domain: str, user_role: str, | |
| custom_role_prompt: str) -> Dict[str, Any]: | |
| """Handle single paper summarization requests""" | |
| try: | |
| engine = self.initialize_session(session_id) | |
| # Extract paper title from query | |
| paper_title = self._extract_paper_title(query) | |
| if not paper_title: | |
| return { | |
| "answer": """# 📄 **Paper Summarization Help** | |
| Please provide a paper title to summarize, for example: | |
| • "Summarize the paper 'Deep Learning for Medical Imaging'" | |
| • "What does the paper 'COVID-19 Vaccine Efficacy Study' find?" | |
| • "Give me a summary of 'Guidelines for Hypertension Management'" | |
| I'll provide a comprehensive analysis including methodology, findings, and implications.""", | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 0}, | |
| "query_type": "help", | |
| "user_role": user_role | |
| } | |
| # Get domain and role info | |
| domain_info = self.get_domain_info(domain) | |
| role_info = self.get_user_role_info(user_role) | |
| # Run summarization | |
| loop = asyncio.get_event_loop() | |
| summary_result = await asyncio.wait_for( | |
| loop.run_in_executor( | |
| self.executor, | |
| lambda: engine.summarize_single_paper( | |
| paper_title=paper_title, | |
| user_query=query, | |
| domain=domain | |
| ) | |
| ), | |
| timeout=30.0 | |
| ) | |
| if summary_result.get("success"): | |
| # Format the response with role context | |
| response_text = self._format_paper_summary(summary_result, domain_info, role_info) | |
| return { | |
| "answer": response_text, | |
| "papers_used": 1, | |
| "real_papers_used": 1 if not summary_result.get("is_demo", True) else 0, | |
| "demo_papers_used": 1 if summary_result.get("is_demo", False) else 0, | |
| "confidence_score": {"overall_score": summary_result.get("confidence", 0.7) * 100}, | |
| "query_type": "paper_summary", | |
| "user_role": user_role, | |
| "domain": domain, | |
| "domain_info": domain_info, | |
| "role_info": role_info, | |
| "reasoning_method": "paper_summary", | |
| "paper_details": { | |
| "title": summary_result.get("paper_title", ""), | |
| "authors": summary_result.get("authors", []), | |
| "date": summary_result.get("publication_date", ""), | |
| "source": summary_result.get("source", "") | |
| } | |
| } | |
| else: | |
| return { | |
| "answer": f"""# 🔍 **Paper Not Found** | |
| I couldn't find the paper: *"{paper_title}"* | |
| **Suggestions:** | |
| 1. Check the exact title spelling | |
| 2. Try a more general search | |
| 3. Search by key concepts instead | |
| You can also request: "Find papers about [topic]" or "Research on [condition]".""", | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 0}, | |
| "query_type": "paper_summary_error", | |
| "user_role": user_role | |
| } | |
| except Exception as e: | |
| print(f" ❌ Paper summarization error: {e}") | |
| return { | |
| "answer": f"""# 🚨 **Summarization Error** | |
| Error: {str(e)} | |
| Please try again with a different paper or simpler request.""", | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 0}, | |
| "query_type": "error", | |
| "user_role": user_role | |
| } | |
| def _extract_paper_title(self, query: str) -> Optional[str]: | |
| """Extract paper title from query""" | |
| # Pattern 1: Paper titled "Title" | |
| match = re.search(r'paper (?:titled|called) "([^"]+)"', query.lower()) | |
| if match: | |
| return match.group(1).strip() | |
| # Pattern 2: "Title" paper | |
| match = re.search(r'"([^"]+)" paper', query.lower()) | |
| if match: | |
| return match.group(1).strip() | |
| # Pattern 3: Summarize the paper Title | |
| match = re.search(r'summarize (?:the )?paper (.+)', query.lower()) | |
| if match: | |
| title = match.group(1).strip() | |
| title = re.sub(r'\?$', '', title) | |
| return title.strip() | |
| # Pattern 4: Summary of paper Title | |
| match = re.search(r'summary of (?:the )?paper (.+)', query.lower()) | |
| if match: | |
| title = match.group(1).strip() | |
| title = re.sub(r'\?$', '', title) | |
| return title.strip() | |
| return None | |
| def _format_paper_summary(self, summary_result: Dict, domain_info: Dict, | |
| role_info: Dict) -> str: | |
| """Format paper summary for display with role context""" | |
| title = summary_result.get("paper_title", "Unknown Paper") | |
| authors = summary_result.get("authors", []) | |
| date = summary_result.get("publication_date", "") | |
| source = summary_result.get("source", "") | |
| summary = summary_result.get("summary", "") | |
| confidence = summary_result.get("confidence", 0.7) * 100 | |
| # Format authors | |
| if authors and isinstance(authors, list): | |
| if len(authors) <= 3: | |
| author_str = ", ".join(authors) | |
| else: | |
| author_str = f"{authors[0]} et al." | |
| else: | |
| author_str = "Unknown authors" | |
| # Build response with role context | |
| response = f"""# 📄 **Paper Analysis** | |
| **Role:** {role_info['name']} {role_info.get('icon', '👤')} | |
| **Domain:** {domain_info['name']} {domain_info.get('icon', '⚕️')} | |
| **Title:** {title} | |
| **Authors:** {author_str} | |
| **Published:** {date} | |
| **Source:** {source} | |
| --- | |
| ## 📋 **Summary** | |
| {summary} | |
| --- | |
| ## 🔍 **Key Points for {role_info['name']}** | |
| • Main findings and conclusions relevant to {role_info['name'].lower()} needs | |
| • Methodology and study design appropriate for {role_info['name'].lower()} understanding | |
| • Clinical/research implications from {role_info['name'].lower()} perspective | |
| • Limitations and future directions | |
| *Analysis confidence: {confidence:.1f}%* | |
| *Tailored for {role_info['name'].lower()} perspective*""" | |
| return response | |
| def _create_timeout_response(self, query: str, domain_info: Dict, role_info: Dict) -> Dict[str, Any]: | |
| """Create timeout response""" | |
| return { | |
| "answer": f"""# ⏱️ **Query Timed Out** | |
| **Role:** {role_info['name']} {role_info.get('icon', '👤')} | |
| **Domain:** {domain_info['name']} | |
| **Query:** {query} | |
| The analysis was taking too long. Try: | |
| • Simplifying your question | |
| • Being more specific | |
| • Reducing the scope | |
| **Example for {role_info['name'].lower()}:** | |
| "Key treatments for [condition] in {domain_info['name']}" """, | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 0}, | |
| "query_type": "error", | |
| "user_role": role_info.get('id', 'general'), | |
| "domain": domain_info.get('id', 'general_medical'), | |
| "error": "timeout" | |
| } | |
| def _create_error_response(self, query: str, domain_info: Dict, role_info: Dict, error: str) -> Dict[str, Any]: | |
| """Create error response""" | |
| return { | |
| "answer": f"""# 🚨 **Analysis Error** | |
| **Role:** {role_info['name']} {role_info.get('icon', '👤')} | |
| **Domain:** {domain_info['name']} | |
| **Error:** {error} | |
| **Troubleshooting for {role_info['name'].lower()}:** | |
| 1. Check your internet connection | |
| 2. Try a simpler query | |
| 3. Verify domain selection | |
| 4. Contact support if problem persists""", | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 0}, | |
| "query_type": "error", | |
| "user_role": role_info.get('id', 'general'), | |
| "domain": domain_info.get('id', 'general_medical'), | |
| "error": error | |
| } | |
| def initialize_session(self, session_id: str): | |
| """Initialize engine for a session""" | |
| if session_id not in self.engines: | |
| try: | |
| if not self.api_configured: | |
| self.engines[session_id] = self._create_fallback_engine() | |
| print(f"⚠️ Session {session_id}: Using fallback engine") | |
| else: | |
| from chat.rag_engine import EnhancedRAGEngine | |
| self.engines[session_id] = EnhancedRAGEngine( | |
| session_id=session_id, | |
| model=self.model, | |
| use_real_time=True | |
| ) | |
| print(f"✅ Session engine initialized: {session_id}") | |
| except Exception as e: | |
| print(f"❌ Failed to initialize engine for {session_id}: {e}") | |
| self.engines[session_id] = self._create_fallback_engine() | |
| return self.engines[session_id] | |
| def _create_fallback_engine(self): | |
| """Create a fallback engine when API fails""" | |
| class FallbackEngine: | |
| def __init__(self): | |
| self.session_id = "fallback" | |
| self.metrics = {"total_queries": 0} | |
| self.use_real_time = False | |
| def answer_research_question(self, **kwargs): | |
| query = kwargs.get("query", "") | |
| domain = kwargs.get("domain", "general_medical") | |
| role = kwargs.get("role", "general") | |
| custom_role_prompt = kwargs.get("role_system_prompt") | |
| self.metrics["total_queries"] += 1 | |
| if query.lower().strip() in {"hi", "hello", "hey"}: | |
| role_info = RoleBasedReasoningAdapter.get_role_info(role) | |
| return { | |
| "answer": f"""# 👋 Welcome to Medical Research Assistant! | |
| **Role:** {role_info['name']} {role_info.get('icon', '👤')} | |
| **Domain:** {domain.replace('_', ' ').title()} | |
| **Setup Required:** | |
| 1. Get an API key from https://console.groq.com | |
| 2. Create a `.env` file with: | |
| GROQ_API_KEY=your_key_here | |
| MODEL=gpt-oss-120b | |
| 3. Restart the server | |
| **Features After Setup:** | |
| • Role-based medical research analysis | |
| • Domain-specific insights tailored to {role_info['name'].lower()} needs | |
| • Paper summarization with guideline detection | |
| • Research gap analysis""", | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 15}, | |
| "user_context": role, | |
| "reasoning_method": "fallback" | |
| } | |
| role_info = RoleBasedReasoningAdapter.get_role_info(role) | |
| return { | |
| "answer": f"""⚠️ **API Not Configured** | |
| **Role:** {role_info['name']} {role_info.get('icon', '👤')} | |
| **Domain:** {domain.replace('_', ' ').title()} | |
| Current query: {query} | |
| Please configure your GROQ_API_KEY in the .env file and restart the server. | |
| For {role_info['name'].lower()}-appropriate responses, setup is required.""", | |
| "papers_used": 0, | |
| "real_papers_used": 0, | |
| "demo_papers_used": 0, | |
| "confidence_score": {"overall_score": 10}, | |
| "user_context": role, | |
| "reasoning_method": "fallback" | |
| } | |
| def summarize_single_paper(self, **kwargs): | |
| """Fallback for single paper summarization""" | |
| paper_title = kwargs.get("paper_title", "Unknown Paper") | |
| domain = kwargs.get("domain", "general_medical") | |
| role = kwargs.get("role", "general") | |
| role_info = RoleBasedReasoningAdapter.get_role_info(role) | |
| return { | |
| "success": False, | |
| "error": "API not configured", | |
| "paper_title": paper_title, | |
| "summary": f"Please configure your API key to use paper analysis.\n\nRole: {role_info['name']}\nDomain: {domain}", | |
| "is_demo": True | |
| } | |
| return FallbackEngine() | |
| def get_engine_status(self) -> Dict[str, Any]: | |
| """Get engine status and metrics""" | |
| # Calculate metrics from all sessions | |
| total_queries = 0 | |
| for engine in self.engines.values(): | |
| if hasattr(engine, 'metrics'): | |
| total_queries += engine.metrics.get("total_queries", 0) | |
| return { | |
| "api_configured": self.api_configured, | |
| "api_error": self.api_error if not self.api_configured else None, | |
| "model": self.model, | |
| "active_sessions": len(self.engines), | |
| "total_queries": total_queries, | |
| "domains_supported": len(MEDICAL_DOMAINS), | |
| "user_roles_supported": len(USER_ROLES), | |
| "reasoning_technique": "role_based_reasoning", | |
| "features": [ | |
| "role_based_medical_analysis", | |
| "domain_specific_insights", | |
| "user_role_adaptation", | |
| "paper_summarization", | |
| "guideline_detection", | |
| "simple_query_handling", | |
| "real_time_search" | |
| ], | |
| "simple_query_handler": "ENABLED", | |
| "role_based_reasoning": "ENABLED", | |
| "version": "2.2.0" | |
| } | |
| def clear_memory(self): | |
| """Clear engine memory for all sessions""" | |
| self.engines.clear() | |
| print("🧹 Engine memory cleared for all sessions") | |
| # ============================================================================ | |
| # DEVELOPMENT TESTING | |
| # ============================================================================ | |
| if __name__ == "__main__" and os.getenv("VERCEL") is None: | |
| # Test the engine | |
| print("\n" + "=" * 60) | |
| print("🧪 TESTING MEDICAL RESEARCH ENGINE") | |
| print("=" * 60) | |
| engine = MedicalResearchEngine() | |
| # Test status | |
| status = engine.get_engine_status() | |
| print(f"\n🔧 Engine Status:") | |
| print(f" API Configured: {status['api_configured']}") | |
| print(f" Model: {status['model']}") | |
| print(f" Features: {', '.join(status['features'][:3])}...") | |
| print(f" Role-based reasoning: {status['role_based_reasoning']}") | |
| # Test domain detection | |
| test_queries = [ | |
| ("What are the latest treatments for diabetes?", "endocrinology"), | |
| ("How to manage hypertension in elderly patients?", "cardiology"), | |
| ("Research on Alzheimer's disease biomarkers", "neurology"), | |
| ("Hello, how are you?", "simple greeting") | |
| ] | |
| print(f"\n🔍 Testing domain detection:") | |
| for query, expected in test_queries: | |
| detected = engine.detect_domain_from_query(query) | |
| print(f" '{query[:30]}...' → {detected} (expected: {expected})") | |
| # Test role detection | |
| print(f"\n👤 Testing role detection:") | |
| role_queries = [ | |
| ("I have diabetes and want to understand my treatment options", "patient"), | |
| ("As a medical student, I need to learn about ECG interpretation", "student"), | |
| ("What are the clinical guidelines for pneumonia treatment?", "clinician"), | |
| ("Latest research on cancer immunotherapy protocols", "researcher") | |
| ] | |
| for query, expected in role_queries: | |
| detected = engine.detect_user_role_from_query(query) | |
| print(f" '{query[:30]}...' → {detected} (expected: {expected})") | |
| print(f"\n✅ Engine test complete!") |