paulhemb commited on
Commit
028d742
·
verified ·
1 Parent(s): 27364d6

Update api/engine.py

Browse files
Files changed (1) hide show
  1. api/engine.py +1056 -764
api/engine.py CHANGED
@@ -1,764 +1,1056 @@
1
- # api/engine.py - Production-Ready Medical Research Engine
2
- # Simplified with one robust reasoning technique for medical research
3
-
4
- import asyncio
5
- import json
6
- import os
7
- import sys
8
- import re
9
- from typing import Dict, Any, Optional, List
10
- from datetime import datetime
11
- import concurrent.futures
12
- from pathlib import Path
13
-
14
- # ============================================================================
15
- # ENVIRONMENT SETUP
16
- # ============================================================================
17
-
18
- # Add project root to Python path
19
- project_root = Path(__file__).parent.parent
20
- sys.path.insert(0, str(project_root))
21
-
22
- # Load environment variables
23
- from dotenv import load_dotenv
24
-
25
- env_paths = [
26
- project_root / ".env",
27
- project_root / "api" / ".env",
28
- Path.cwd() / ".env",
29
- ]
30
-
31
- env_loaded = False
32
- for env_path in env_paths:
33
- if env_path.exists():
34
- load_dotenv(dotenv_path=env_path, override=True)
35
- print(f"✅ Loaded environment from: {env_path}")
36
- env_loaded = True
37
- break
38
-
39
- if not env_loaded:
40
- print("⚠️ No .env file found. Using system environment variables.")
41
-
42
- # Check critical environment variables
43
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
44
- XAI_API_KEY = os.getenv("XAI_API_KEY")
45
- MODEL = os.getenv("MODEL", "gpt-oss-120b")
46
-
47
- if not GROQ_API_KEY and not XAI_API_KEY:
48
- print("❌ WARNING: No API key found in environment!")
49
- print(" Set GROQ_API_KEY or XAI_API_KEY in .env file")
50
- else:
51
- last4 = (GROQ_API_KEY or XAI_API_KEY)[-4:]
52
- print(f"✅ API Key found: {'*' * 16}{last4}")
53
- print(f"✅ Model configured: {MODEL}")
54
-
55
-
56
- # ============================================================================
57
- # SINGLE REASONING TECHNIQUE: EVIDENCE-BASED MEDICAL REASONING
58
- # ============================================================================
59
-
60
- class MedicalReasoning:
61
- """Single, robust reasoning technique for medical research"""
62
-
63
- @staticmethod
64
- def evidence_based_reasoning(query: str, domain: str, user_context: str, papers_count: int = 0) -> str:
65
- """
66
- Evidence-based medical reasoning for research insights
67
- Focuses on clinical evidence, study quality, and practical implications
68
- """
69
- # Map user context to specific focus areas
70
- context_focus = {
71
- "clinician": "Focus on clinical application, treatment decisions, and patient management",
72
- "researcher": "Focus on methodology, evidence quality, and research implications",
73
- "student": "Focus on understanding concepts, foundational knowledge, and learning pathways",
74
- "patient": "Focus on understanding, personal implications, and practical next steps",
75
- "administrator": "Focus on implementation, resources, and systemic considerations",
76
- "general": "Focus on clear explanations and balanced overview"
77
- }
78
-
79
- focus = context_focus.get(user_context, "Focus on evidence-based medical insights")
80
-
81
- return f"""You are a medical research expert specializing in {domain}.
82
- The user is a {user_context}. {focus}
83
-
84
- QUERY: {query}
85
-
86
- **Evidence-Based Reasoning Process:**
87
-
88
- 1. **Evidence Assessment:**
89
- - What is the current state of evidence for this topic?
90
- - What types of studies exist (RCTs, cohort studies, reviews)?
91
- - What is the quality and strength of available evidence?
92
-
93
- 2. **Clinical/Research Context:**
94
- - How does this apply to {domain} specifically?
95
- - What are the practical implications for {user_context}?
96
- - What are the key considerations in this context?
97
-
98
- 3. **Critical Analysis:**
99
- - What are the strengths of current evidence?
100
- - What limitations or gaps exist in current knowledge?
101
- - What controversies or alternative perspectives exist?
102
-
103
- 4. **Practical Implications:**
104
- - What are the actionable insights for {user_context}?
105
- - What are the next steps or recommendations?
106
- - What should be considered for implementation?
107
-
108
- Provide a comprehensive, evidence-based answer that synthesizes medical knowledge
109
- with practical implications for {user_context} in {domain}."""
110
-
111
-
112
- # ============================================================================
113
- # MEDICAL DOMAIN CONFIGURATION
114
- # ============================================================================
115
-
116
- MEDICAL_DOMAINS = [
117
- {"id": "internal_medicine", "name": "Internal Medicine", "icon": "🏥",
118
- "description": "General internal medicine and diagnosis"},
119
- {"id": "endocrinology", "name": "Endocrinology", "icon": "🧬",
120
- "description": "Hormonal and metabolic disorders"},
121
- {"id": "gastroenterology", "name": "Gastroenterology", "icon": "🩸",
122
- "description": "Digestive system disorders"},
123
- {"id": "pulmonology", "name": "Pulmonology", "icon": "🫁",
124
- "description": "Respiratory diseases and lung disorders"},
125
- {"id": "nephrology", "name": "Nephrology", "icon": "🧪",
126
- "description": "Kidney diseases and renal function"},
127
- {"id": "hematology", "name": "Hematology", "icon": "🩸",
128
- "description": "Blood disorders and hematologic diseases"},
129
- {"id": "infectious_disease", "name": "Infectious Diseases", "icon": "🦠",
130
- "description": "Infectious diseases and microbiology"},
131
- {"id": "obstetrics_gynecology", "name": "Obstetrics & Gynecology", "icon": "🤰",
132
- "description": "Women's health, pregnancy and reproductive medicine"},
133
- {"id": "pathology", "name": "Pathology", "icon": "🔬",
134
- "description": "Disease diagnosis through tissue examination"},
135
- {"id": "laboratory_medicine", "name": "Laboratory Medicine", "icon": "🧪",
136
- "description": "Clinical laboratory testing and biomarkers"},
137
- {"id": "bioinformatics", "name": "Bioinformatics", "icon": "💻",
138
- "description": "Computational analysis of biological data"},
139
- {"id": "clinical_research", "name": "Clinical Research", "icon": "📊",
140
- "description": "Clinical trials and evidence-based medicine"},
141
- {"id": "medical_imaging", "name": "Medical Imaging", "icon": "🩻",
142
- "description": "Medical imaging and radiology"},
143
- {"id": "oncology", "name": "Oncology", "icon": "🦠",
144
- "description": "Cancer research and treatment"},
145
- {"id": "cardiology", "name": "Cardiology", "icon": "❤️",
146
- "description": "Heart and cardiovascular diseases"},
147
- {"id": "neurology", "name": "Neurology", "icon": "🧠",
148
- "description": "Brain and nervous system disorders"},
149
- {"id": "pharmacology", "name": "Pharmacology", "icon": "💊",
150
- "description": "Drug therapy and medication management"},
151
- {"id": "genomics", "name": "Genomics", "icon": "🧬",
152
- "description": "Genetic research and personalized medicine"},
153
- {"id": "public_health", "name": "Public Health", "icon": "🌍",
154
- "description": "Population health and epidemiology"},
155
- {"id": "surgery", "name": "Surgery", "icon": "⚕️",
156
- "description": "Surgical procedures and techniques"},
157
- {"id": "pediatrics", "name": "Pediatrics", "icon": "👶",
158
- "description": "Child health and pediatric medicine"},
159
- {"id": "psychiatry", "name": "Psychiatry", "icon": "🧠",
160
- "description": "Mental health and psychiatric disorders"},
161
- {"id": "dermatology", "name": "Dermatology", "icon": "🦋",
162
- "description": "Skin diseases and dermatologic conditions"},
163
- {"id": "orthopedics", "name": "Orthopedics", "icon": "🦴",
164
- "description": "Musculoskeletal disorders and bone health"},
165
- {"id": "ophthalmology", "name": "Ophthalmology", "icon": "👁️",
166
- "description": "Eye diseases and vision care"},
167
- {"id": "urology", "name": "Urology", "icon": "💧",
168
- "description": "Urinary system and male reproductive health"},
169
- {"id": "emergency_medicine", "name": "Emergency Medicine", "icon": "🚑",
170
- "description": "Acute care and emergency response"},
171
- {"id": "critical_care", "name": "Critical Care", "icon": "🏥",
172
- "description": "Intensive care and critical illness"},
173
- {"id": "pain_medicine", "name": "Pain Medicine", "icon": "⚕️",
174
- "description": "Pain management and analgesia"},
175
- {"id": "nutrition", "name": "Nutrition", "icon": "🥗",
176
- "description": "Clinical nutrition and dietary management"},
177
- {"id": "allergy_immunology", "name": "Allergy & Immunology", "icon": "🤧",
178
- "description": "Allergic diseases and immune disorders"},
179
- {"id": "rehabilitation_medicine", "name": "Rehabilitation Medicine", "icon": "♿",
180
- "description": "Physical therapy and recovery"},
181
- {"id": "general_medical", "name": "General Medical", "icon": "⚕️",
182
- "description": "General medical research and clinical questions"},
183
- {"id": "auto", "name": "Auto-detect", "icon": "🤖",
184
- "description": "Automatically detect domain from query"}
185
- ]
186
-
187
- USER_CONTEXTS = [
188
- {"id": "auto", "name": "Auto-detect", "icon": "🤖",
189
- "description": "Automatically detect user context"},
190
- {"id": "clinician", "name": "Clinician", "icon": "👨‍⚕️",
191
- "description": "Medical doctors, nurses, and healthcare providers"},
192
- {"id": "researcher", "name": "Researcher", "icon": "🔬",
193
- "description": "Academic researchers and scientists"},
194
- {"id": "student", "name": "Student", "icon": "🎓",
195
- "description": "Medical students and trainees"},
196
- {"id": "administrator", "name": "Administrator", "icon": "💼",
197
- "description": "Healthcare administrators and managers"},
198
- {"id": "patient", "name": "Patient", "icon": "👤",
199
- "description": "Patients and general public"},
200
- {"id": "general", "name": "General", "icon": "👤",
201
- "description": "General audience"}
202
- ]
203
-
204
- # Domain detection keywords (simplified)
205
- DOMAIN_KEYWORDS = {
206
- 'internal_medicine': ['diagnosis', 'chronic disease', 'acute disease', 'primary care'],
207
- 'endocrinology': ['diabetes', 'thyroid', 'hormone', 'metabolism'],
208
- 'cardiology': ['heart', 'cardiovascular', 'hypertension', 'ecg'],
209
- 'neurology': ['brain', 'stroke', 'alzheimer', 'parkinson'],
210
- 'oncology': ['cancer', 'tumor', 'chemotherapy', 'radiation'],
211
- 'surgery': ['surgical', 'operation', 'procedure', 'anesthesia'],
212
- 'pediatrics': ['child', 'pediatric', 'neonatal', 'infant'],
213
- 'psychiatry': ['mental', 'depression', 'anxiety', 'psychiatric'],
214
- 'infectious_disease': ['infection', 'bacterial', 'viral', 'antibiotic'],
215
- }
216
-
217
- # User context detection keywords
218
- USER_CONTEXT_KEYWORDS = {
219
- 'clinician': ['patient', 'clinical', 'treatment', 'diagnosis', 'therapy'],
220
- 'researcher': ['research', 'study', 'methodology', 'evidence', 'publication'],
221
- 'student': ['learn', 'study', 'exam', 'textbook', 'course'],
222
- 'patient': ['i have', 'my symptoms', 'my doctor', 'my treatment', 'pain'],
223
- 'administrator': ['policy', 'guideline', 'cost', 'efficiency', 'management']
224
- }
225
-
226
-
227
- # ============================================================================
228
- # MEDICAL RESEARCH CHAT ENGINE
229
- # ============================================================================
230
-
231
- class MedicalResearchEngine:
232
- """Production-ready medical research engine with evidence-based reasoning"""
233
-
234
- def __init__(self):
235
- self.engines: Dict[str, Any] = {}
236
- self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
237
- self.api_configured = False
238
- self.api_error = None
239
- self.model = MODEL
240
- self.reasoning = MedicalReasoning()
241
-
242
- # Basic responses for common queries
243
- self.basic_responses = {
244
- "hi": "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based medical research questions across various specialties. How can I assist you today?",
245
- "hello": "👋 Welcome! I specialize in medical research analysis using evidence-based reasoning. What medical topic would you like to explore?",
246
- "help": "🆘 **How to use:**\n1. Ask medical research questions\n2. Specify domain or use auto-detect\n3. Mention your role (clinician, researcher, etc.)\n\n**Examples:**\n• 'Latest treatments for diabetes'\n• 'Research gaps in cancer immunotherapy'\n• 'Clinical guidelines for hypertension'",
247
- "what can you do": "🔬 **Medical Research Assistant Capabilities:**\n• Evidence-based medical analysis\n• Domain-specific research insights\n• Clinical/research perspective adaptation\n• Paper summarization and analysis\n• Research gap identification\n\nAsk me about any medical research topic!"
248
- }
249
-
250
- self._test_api_connection()
251
- print(f"🚀 Medical Research Engine Initialized")
252
-
253
- def _test_api_connection(self):
254
- """Test API connection"""
255
- try:
256
- from chat.rag_engine import EnhancedRAGEngine
257
- EnhancedRAGEngine(session_id="test_init", model=self.model)
258
- self.api_configured = True
259
- print(" API Connection Test: SUCCESS")
260
- except Exception as e:
261
- self.api_configured = False
262
- self.api_error = str(e)
263
- print(f" API Connection Test: FAILED - {e}")
264
-
265
- def detect_domain_from_query(self, query: str, current_domain: str = "auto") -> str:
266
- """Detect medical domain from query text"""
267
- if current_domain != "auto":
268
- return current_domain
269
-
270
- query_lower = query.lower()
271
- best_domain = 'general_medical'
272
- best_score = 0
273
-
274
- for domain_id, keywords in DOMAIN_KEYWORDS.items():
275
- score = sum(1 for keyword in keywords if keyword in query_lower)
276
- if score > best_score:
277
- best_score = score
278
- best_domain = domain_id
279
-
280
- return best_domain if best_score > 0 else 'general_medical'
281
-
282
- def detect_user_context_from_query(self, query: str, current_context: str = "auto") -> str:
283
- """Detect user context from query text"""
284
- if current_context != "auto":
285
- return current_context
286
-
287
- query_lower = query.lower()
288
- best_context = 'general'
289
- best_score = 0
290
-
291
- for context_id, keywords in USER_CONTEXT_KEYWORDS.items():
292
- score = sum(1 for keyword in keywords if keyword in query_lower)
293
- if score > best_score:
294
- best_score = score
295
- best_context = context_id
296
-
297
- return best_context if best_score > 0 else 'general'
298
-
299
- def get_domain_info(self, domain_id: str) -> Dict:
300
- """Get information about a domain"""
301
- for domain in MEDICAL_DOMAINS:
302
- if domain["id"] == domain_id:
303
- return domain
304
- return {
305
- "id": domain_id,
306
- "name": domain_id.replace('_', ' ').title(),
307
- "icon": "⚕️",
308
- "description": "Medical research domain"
309
- }
310
-
311
- def get_user_context_info(self, context_id: str) -> Dict:
312
- """Get information about a user context"""
313
- for context in USER_CONTEXTS:
314
- if context["id"] == context_id:
315
- return context
316
- return {
317
- "id": context_id,
318
- "name": context_id.replace('_', ' ').title(),
319
- "icon": "👤",
320
- "description": "User context"
321
- }
322
-
323
- def _classify_query(self, query: str) -> str:
324
- """Classify query type"""
325
- query_lower = query.lower().strip()
326
-
327
- # Check if it's a basic greeting/help
328
- if query_lower in self.basic_responses:
329
- return "basic"
330
-
331
- # Check for paper summarization
332
- if any(term in query_lower for term in ['summarize paper', 'paper titled', 'article about']):
333
- return "paper_summary"
334
-
335
- # Default to research query
336
- return "research"
337
-
338
- async def process_query_async(
339
- self,
340
- query: str,
341
- domain: str = "general_medical",
342
- session_id: str = "default",
343
- user_context: str = "auto",
344
- **kwargs
345
- ) -> Dict[str, Any]:
346
- """Process medical research query with evidence-based reasoning"""
347
-
348
- # Auto-detect domain if needed
349
- if domain == "auto":
350
- domain = self.detect_domain_from_query(query)
351
-
352
- # Auto-detect user context if needed
353
- if user_context == "auto":
354
- user_context = self.detect_user_context_from_query(query)
355
-
356
- # Get domain and context info
357
- domain_info = self.get_domain_info(domain)
358
- context_info = self.get_user_context_info(user_context)
359
-
360
- # Classify the query
361
- query_type = self._classify_query(query)
362
-
363
- # Handle basic queries
364
- if query_type == "basic":
365
- response_text = self.basic_responses.get(query.lower(),
366
- f"👋 I'm your Medical Research Assistant specializing in {domain_info['name']}. "
367
- f"How can I help with your medical research question today?"
368
- )
369
-
370
- return {
371
- "answer": response_text,
372
- "papers_used": 0,
373
- "confidence_score": {"overall_score": 95.0, "level": "HIGH 🟢"},
374
- "query_type": "basic",
375
- "user_context": user_context,
376
- "domain": domain,
377
- "domain_info": domain_info,
378
- "user_context_info": context_info
379
- }
380
-
381
- # Handle paper summarization
382
- elif query_type == "paper_summary":
383
- return await self._handle_paper_summarization(query, session_id, domain, user_context)
384
-
385
- # Handle research queries
386
- else:
387
- return await self._handle_research_query(query, domain, user_context, session_id, kwargs)
388
-
389
- async def _handle_research_query(self, query: str, domain: str, user_context: str,
390
- session_id: str, kwargs: Dict) -> Dict[str, Any]:
391
- """Handle medical research queries with evidence-based reasoning"""
392
-
393
- # Get domain and context info
394
- domain_info = self.get_domain_info(domain)
395
- context_info = self.get_user_context_info(user_context)
396
-
397
- # Apply evidence-based reasoning
398
- reasoning_prompt = self.reasoning.evidence_based_reasoning(query, domain, user_context)
399
-
400
- # Initialize engine
401
- engine = self.initialize_session(session_id)
402
-
403
- # Run in thread pool
404
- loop = asyncio.get_event_loop()
405
-
406
- try:
407
- # Process query with timeout
408
- response = await asyncio.wait_for(
409
- loop.run_in_executor(
410
- self.executor,
411
- lambda: engine.answer_research_question(
412
- query=query,
413
- domain=domain,
414
- user_context=user_context,
415
- reasoning_method="evidence_based", # Pass reasoning method
416
- **{k: v for k, v in kwargs.items() if k != 'enable_reasoning'}
417
- )
418
- ),
419
- timeout=kwargs.get('timeout', 60.0)
420
- )
421
-
422
- # Clean up response
423
- answer = response.get("answer", "")
424
- cleaned_answer = self._clean_response(answer, domain_info, query)
425
-
426
- # Prepare result
427
- result = {
428
- "answer": cleaned_answer,
429
- "papers_used": response.get("papers_used", 0),
430
- "confidence_score": response.get("confidence_score", {"overall_score": 0}),
431
- "query_type": "research",
432
- "user_context": user_context,
433
- "domain": domain,
434
- "domain_info": domain_info,
435
- "user_context_info": context_info,
436
- "reasoning_method": "evidence_based"
437
- }
438
-
439
- # Add metrics if available
440
- if "enhanced_metrics" in response:
441
- result["metrics"] = response["enhanced_metrics"]
442
-
443
- return result
444
-
445
- except asyncio.TimeoutError:
446
- return self._create_timeout_response(query, domain_info, context_info)
447
- except Exception as e:
448
- return self._create_error_response(query, domain_info, context_info, str(e))
449
-
450
- def _clean_response(self, answer: str, domain_info: Dict, query: str) -> str:
451
- """Clean up the response for presentation"""
452
- if not answer:
453
- return f"# 🔬 **Medical Research Analysis**\n\n**Domain:** {domain_info['name']}\n\nNo analysis generated. Please try again."
454
-
455
- # Remove any internal reasoning prompts that might have leaked
456
- patterns_to_remove = [
457
- r'Chain Of Thought.*?\n\n',
458
- r'Step \d+.*?\n\n',
459
- r'Reasoning Process.*?\n\n'
460
- ]
461
-
462
- cleaned = answer
463
- for pattern in patterns_to_remove:
464
- cleaned = re.sub(pattern, '', cleaned, flags=re.DOTALL | re.IGNORECASE)
465
-
466
- # Ensure clean structure
467
- if not cleaned.startswith('# '):
468
- cleaned = f"# 🔬 **Medical Research Analysis**\n\n**Domain:** {domain_info['name']}\n**Topic:** {query}\n\n{cleaned}"
469
-
470
- return cleaned.strip()
471
-
472
- async def _handle_paper_summarization(self, query: str, session_id: str,
473
- domain: str, user_context: str) -> Dict[str, Any]:
474
- """Handle single paper summarization requests"""
475
- try:
476
- engine = self.initialize_session(session_id)
477
-
478
- # Extract paper title from query
479
- paper_title = self._extract_paper_title(query)
480
-
481
- if not paper_title:
482
- return {
483
- "answer": """# 📄 **Paper Summarization Help**
484
-
485
- Please provide a paper title to summarize, for example:
486
- "Summarize the paper 'Deep Learning for Medical Imaging'"
487
- "What does the paper 'COVID-19 Vaccine Efficacy Study' find?"
488
- "Give me a summary of 'Guidelines for Hypertension Management'"
489
-
490
- I'll provide a comprehensive analysis including methodology, findings, and implications.""",
491
- "papers_used": 0,
492
- "confidence_score": {"overall_score": 0},
493
- "query_type": "help"
494
- }
495
-
496
- # Run summarization
497
- loop = asyncio.get_event_loop()
498
-
499
- summary_result = await asyncio.wait_for(
500
- loop.run_in_executor(
501
- self.executor,
502
- lambda: engine.summarize_single_paper(
503
- paper_title=paper_title,
504
- user_query=query
505
- )
506
- ),
507
- timeout=30.0
508
- )
509
-
510
- if summary_result.get("success"):
511
- # Format the response
512
- response_text = self._format_paper_summary(summary_result, domain)
513
-
514
- return {
515
- "answer": response_text,
516
- "papers_used": 1,
517
- "confidence_score": {"overall_score": summary_result.get("confidence", 0.7) * 100},
518
- "query_type": "paper_summary",
519
- "paper_details": {
520
- "title": summary_result.get("paper_title", ""),
521
- "authors": summary_result.get("authors", []),
522
- "date": summary_result.get("publication_date", ""),
523
- "source": summary_result.get("source", "")
524
- }
525
- }
526
- else:
527
- return {
528
- "answer": f"""# 🔍 **Paper Not Found**
529
-
530
- I couldn't find the paper: *"{paper_title}"*
531
-
532
- **Suggestions:**
533
- 1. Check the exact title spelling
534
- 2. Try a more general search
535
- 3. Search by key concepts instead
536
-
537
- You can also request: "Find papers about [topic]" or "Research on [condition]".""",
538
- "papers_used": 0,
539
- "confidence_score": {"overall_score": 0},
540
- "query_type": "paper_summary_error"
541
- }
542
-
543
- except Exception as e:
544
- return {
545
- "answer": f"""# 🚨 **Summarization Error**
546
-
547
- Error: {str(e)}
548
-
549
- Please try again with a different paper or simpler request.""",
550
- "papers_used": 0,
551
- "confidence_score": {"overall_score": 0},
552
- "query_type": "error"
553
- }
554
-
555
- def _extract_paper_title(self, query: str) -> Optional[str]:
556
- """Extract paper title from query"""
557
- # Pattern 1: Paper titled "Title"
558
- match = re.search(r'paper (?:titled|called) "([^"]+)"', query.lower())
559
- if match:
560
- return match.group(1).strip()
561
-
562
- # Pattern 2: "Title" paper
563
- match = re.search(r'"([^"]+)" paper', query.lower())
564
- if match:
565
- return match.group(1).strip()
566
-
567
- # Pattern 3: Summarize the paper Title
568
- match = re.search(r'summarize (?:the )?paper (.+)', query.lower())
569
- if match:
570
- title = match.group(1).strip()
571
- title = re.sub(r'\?$', '', title)
572
- return title.strip()
573
-
574
- return None
575
-
576
- def _format_paper_summary(self, summary_result: Dict, domain: str) -> str:
577
- """Format paper summary for display"""
578
- title = summary_result.get("paper_title", "Unknown Paper")
579
- authors = summary_result.get("authors", [])
580
- date = summary_result.get("publication_date", "")
581
- source = summary_result.get("source", "")
582
- summary = summary_result.get("summary", "")
583
-
584
- # Format authors
585
- if authors and isinstance(authors, list):
586
- if len(authors) <= 3:
587
- author_str = ", ".join(authors)
588
- else:
589
- author_str = f"{authors[0]} et al."
590
- else:
591
- author_str = "Unknown authors"
592
-
593
- # Build response
594
- response = f"""# 📄 **Paper Analysis**
595
-
596
- **Title:** {title}
597
- **Authors:** {author_str}
598
- **Published:** {date}
599
- **Source:** {source}
600
-
601
- ---
602
-
603
- ## 📋 **Summary**
604
- {summary}
605
-
606
- ---
607
-
608
- ## 🔍 **Key Points**
609
- Main findings and conclusions
610
- Methodology and study design
611
- Clinical/research implications
612
- Limitations and future directions
613
-
614
- *Analysis confidence: {summary_result.get('confidence', 0.7) * 100:.1f}%*"""
615
-
616
- return response
617
-
618
- def _create_timeout_response(self, query: str, domain_info: Dict, context_info: Dict) -> Dict[str, Any]:
619
- """Create timeout response"""
620
- return {
621
- "answer": f"""# ⏱️ **Query Timed Out**
622
-
623
- **Domain:** {domain_info['name']}
624
- **User Context:** {context_info['name']}
625
-
626
- The analysis was taking too long. Try:
627
- Simplifying your question
628
- • Being more specific
629
- Reducing the scope
630
-
631
- **Example:** "Key treatments for [condition] in {domain_info['name']}" """,
632
- "papers_used": 0,
633
- "confidence_score": {"overall_score": 0},
634
- "query_type": "error",
635
- "user_context": context_info["id"],
636
- "domain": domain_info["id"],
637
- "error": "timeout"
638
- }
639
-
640
- def _create_error_response(self, query: str, domain_info: Dict, context_info: Dict, error: str) -> Dict[str, Any]:
641
- """Create error response"""
642
- return {
643
- "answer": f"""# 🚨 **Analysis Error**
644
-
645
- **Domain:** {domain_info['name']}
646
- **User Context:** {context_info['name']}
647
- **Error:** {error}
648
-
649
- **Troubleshooting:**
650
- 1. Check your internet connection
651
- 2. Try a simpler query
652
- 3. Verify domain selection
653
- 4. Contact support if problem persists""",
654
- "papers_used": 0,
655
- "confidence_score": {"overall_score": 0},
656
- "query_type": "error",
657
- "user_context": context_info["id"],
658
- "domain": domain_info["id"],
659
- "error": error
660
- }
661
-
662
- def initialize_session(self, session_id: str):
663
- """Initialize engine for a session"""
664
- if session_id not in self.engines:
665
- try:
666
- if not self.api_configured:
667
- self.engines[session_id] = self._create_fallback_engine()
668
- print(f"⚠️ Session {session_id}: Using fallback engine")
669
- else:
670
- from chat.rag_engine import EnhancedRAGEngine
671
- self.engines[session_id] = EnhancedRAGEngine(
672
- session_id=session_id,
673
- model=self.model
674
- )
675
- print(f"✅ Session engine initialized: {session_id}")
676
-
677
- except Exception as e:
678
- print(f"❌ Failed to initialize engine for {session_id}: {e}")
679
- self.engines[session_id] = self._create_fallback_engine()
680
-
681
- return self.engines[session_id]
682
-
683
- def _create_fallback_engine(self):
684
- """Create a fallback engine when API fails"""
685
-
686
- class FallbackEngine:
687
- def __init__(self):
688
- self.session_id = "fallback"
689
- self.metrics = {"total_queries": 0}
690
-
691
- def answer_research_question(self, **kwargs):
692
- query = kwargs.get("query", "")
693
- domain = kwargs.get("domain", "general_medical")
694
- user_context = kwargs.get("user_context", "auto")
695
-
696
- self.metrics["total_queries"] += 1
697
-
698
- if query.lower().strip() in {"hi", "hello", "hey"}:
699
- return {
700
- "answer": f"""# 👋 Welcome to Medical Research Assistant!
701
-
702
- **Setup Required:**
703
- 1. Get an API key from https://console.groq.com
704
- 2. Create a `.env` file with:
705
- GROQ_API_KEY=your_key_here
706
- MODEL=gpt-oss-120b
707
-
708
- 3. Restart the server
709
-
710
- **Features After Setup:**
711
- • Evidence-based medical research analysis
712
- • Domain-specific insights
713
- Paper summarization
714
- Research gap analysis""",
715
- "papers_used": 0,
716
- "confidence": 0.15,
717
- }
718
-
719
- return {
720
- "answer": f"""⚠️ **API Not Configured**
721
-
722
- Current query: {query}
723
- Domain: {domain}
724
- User Context: {user_context}
725
-
726
- Please configure your GROQ_API_KEY in the .env file and restart the server.""",
727
- "papers_used": 0,
728
- "confidence": 0.10,
729
- }
730
-
731
- def summarize_single_paper(self, **kwargs):
732
- """Fallback for single paper summarization"""
733
- paper_title = kwargs.get("paper_title", "Unknown Paper")
734
- return {
735
- "success": False,
736
- "error": "API not configured",
737
- "paper_title": paper_title,
738
- "summary": "Please configure your API key to use paper analysis."
739
- }
740
-
741
- return FallbackEngine()
742
-
743
- def get_engine_status(self) -> Dict[str, Any]:
744
- """Get engine status and metrics"""
745
- return {
746
- "api_configured": self.api_configured,
747
- "model": self.model,
748
- "active_sessions": len(self.engines),
749
- "domains_supported": len(MEDICAL_DOMAINS),
750
- "user_contexts_supported": len(USER_CONTEXTS),
751
- "reasoning_technique": "evidence_based_reasoning",
752
- "features": [
753
- "medical_research_analysis",
754
- "domain_specific_insights",
755
- "user_context_adaptation",
756
- "paper_summarization",
757
- "evidence_based_reasoning"
758
- ]
759
- }
760
-
761
- def clear_memory(self):
762
- """Clear engine memory for all sessions"""
763
- self.engines.clear()
764
- print("🧹 Engine memory cleared for all sessions")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ api/engine.py - Production-Ready Medical Research Engine
3
+ Updated to support role-based reasoning and integrate with EnhancedRAGEngine
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import os
9
+ import sys
10
+ import re
11
+ from typing import Dict, Any, Optional, List
12
+ from datetime import datetime
13
+ import concurrent.futures
14
+ from pathlib import Path
15
+
16
+ # ============================================================================
17
+ # ENVIRONMENT SETUP
18
+ # ============================================================================
19
+
20
+ # Add project root to Python path
21
+ project_root = Path(__file__).parent.parent
22
+ sys.path.insert(0, str(project_root))
23
+
24
+ # Load environment variables
25
+ from dotenv import load_dotenv
26
+
27
+ env_paths = [
28
+ project_root / ".env",
29
+ project_root / "api" / ".env",
30
+ Path.cwd() / ".env",
31
+ ]
32
+
33
+ env_loaded = False
34
+ for env_path in env_paths:
35
+ if env_path.exists():
36
+ load_dotenv(dotenv_path=env_path, override=True)
37
+ print(f"✅ Loaded environment from: {env_path}")
38
+ env_loaded = True
39
+ break
40
+
41
+ if not env_loaded:
42
+ print("⚠️ No .env file found. Using system environment variables.")
43
+
44
+ # Check critical environment variables
45
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
46
+ XAI_API_KEY = os.getenv("XAI_API_KEY")
47
+ MODEL = os.getenv("MODEL", "gpt-oss-120b")
48
+
49
+ if not GROQ_API_KEY and not XAI_API_KEY:
50
+ print("❌ WARNING: No API key found in environment!")
51
+ print(" Set GROQ_API_KEY or XAI_API_KEY in .env file")
52
+ else:
53
+ last4 = (GROQ_API_KEY or XAI_API_KEY)[-4:]
54
+ print(f"✅ API Key found: {'*' * 16}{last4}")
55
+ print(f"✅ Model configured: {MODEL}")
56
+
57
+
58
+ # ============================================================================
59
+ # ROLE-BASED REASONING ADAPTER
60
+ # ============================================================================
61
+
62
+ class RoleBasedReasoningAdapter:
63
+ """Adapter for role-based reasoning from rag_engine.py"""
64
+
65
+ # Role descriptions that match rag_engine.py
66
+ ROLE_DESCRIPTIONS = {
67
+ 'patient': {
68
+ 'name': 'Patient',
69
+ 'icon': '🩺',
70
+ 'description': 'Patients and general public seeking health information'
71
+ },
72
+ 'student': {
73
+ 'name': 'Student',
74
+ 'icon': '🎓',
75
+ 'description': 'Medical students and trainees'
76
+ },
77
+ 'clinician': {
78
+ 'name': 'Clinician',
79
+ 'icon': '👨‍⚕️',
80
+ 'description': 'Healthcare providers and nurses'
81
+ },
82
+ 'doctor': {
83
+ 'name': 'Doctor',
84
+ 'icon': '⚕️',
85
+ 'description': 'Medical doctors and physicians'
86
+ },
87
+ 'researcher': {
88
+ 'name': 'Researcher',
89
+ 'icon': '🔬',
90
+ 'description': 'Academic researchers and scientists'
91
+ },
92
+ 'professor': {
93
+ 'name': 'Professor',
94
+ 'icon': '📚',
95
+ 'description': 'Academic educators and professors'
96
+ },
97
+ 'pharmacist': {
98
+ 'name': 'Pharmacist',
99
+ 'icon': '💊',
100
+ 'description': 'Pharmacy professionals and pharmacists'
101
+ },
102
+ 'general': {
103
+ 'name': 'General User',
104
+ 'icon': '👤',
105
+ 'description': 'General audience'
106
+ },
107
+ 'auto': {
108
+ 'name': 'Auto-detect',
109
+ 'icon': '🤖',
110
+ 'description': 'Automatically detect user role'
111
+ }
112
+ }
113
+
114
+ @staticmethod
115
+ def get_role_info(role_id: str) -> Dict[str, Any]:
116
+ """Get information about a user role"""
117
+ return RoleBasedReasoningAdapter.ROLE_DESCRIPTIONS.get(role_id, RoleBasedReasoningAdapter.ROLE_DESCRIPTIONS['general'])
118
+
119
+ @staticmethod
120
+ def detect_role_from_query(query: str, current_role: str = "auto") -> str:
121
+ """Detect user role from query text"""
122
+ if current_role != "auto":
123
+ return current_role
124
+
125
+ query_lower = query.lower()
126
+
127
+ # Role detection patterns from rag_engine.py
128
+ role_patterns = {
129
+ 'patient': ['i have', 'my symptoms', 'my doctor', 'my treatment', 'pain', 'suffering', 'experience', 'diagnosed', 'medication'],
130
+ 'student': ['learn', 'study', 'exam', 'textbook', 'course', 'education', 'explain', 'understand', 'concept', 'basics'],
131
+ 'clinician': ['patient', 'clinical', 'treatment', 'diagnosis', 'therapy', 'management', 'guidelines', 'recommend', 'prescribe'],
132
+ 'doctor': ['physician', 'consult', 'referral', 'differential', 'prognosis', 'etiology', 'pathophysiology'],
133
+ 'researcher': ['research', 'study', 'methodology', 'evidence', 'publication', 'hypothesis', 'experiment', 'results', 'conclusions'],
134
+ 'professor': ['teach', 'lecture', 'curriculum', 'syllabus', 'academic', 'pedagogy', 'assessment'],
135
+ 'pharmacist': ['medication', 'drug', 'dose', 'pharmacokinetics', 'interaction', 'formulary', 'prescription']
136
+ }
137
+
138
+ # Check for explicit mentions
139
+ explicit_roles = {
140
+ 'patient': ['i am a patient', 'as a patient', 'patient here'],
141
+ 'student': ['i am a student', 'medical student', 'as a student'],
142
+ 'clinician': ['i am a clinician', 'as a clinician', 'clinician here'],
143
+ 'doctor': ['i am a doctor', 'physician here', 'as a physician'],
144
+ 'researcher': ['i am a researcher', 'as a researcher', 'research scientist'],
145
+ 'professor': ['i am a professor', 'as a professor', 'faculty member'],
146
+ 'pharmacist': ['i am a pharmacist', 'as a pharmacist', 'pharmacy professional']
147
+ }
148
+
149
+ for role, patterns in explicit_roles.items():
150
+ if any(pattern in query_lower for pattern in patterns):
151
+ return role
152
+
153
+ # Check patterns
154
+ role_scores = {}
155
+ for role, patterns in role_patterns.items():
156
+ score = sum(1 for pattern in patterns if pattern in query_lower)
157
+ if score > 0:
158
+ role_scores[role] = score
159
+
160
+ if role_scores:
161
+ return max(role_scores.items(), key=lambda x: x[1])[0]
162
+
163
+ return "general"
164
+
165
+
166
+ # ============================================================================
167
+ # DOMAIN DETECTION (UPDATED)
168
+ # ============================================================================
169
+
170
+ class DomainDetector:
171
+ """Detect medical domain from query text"""
172
+
173
+ # Domain detection patterns (simplified from rag_engine.py)
174
+ DOMAIN_PATTERNS = {
175
+ 'internal_medicine': ['diagnosis', 'chronic disease', 'acute disease', 'primary care', 'internal medicine'],
176
+ 'endocrinology': ['diabetes', 'thyroid', 'hormone', 'metabolism', 'insulin', 'glucose'],
177
+ 'cardiology': ['heart', 'cardiovascular', 'hypertension', 'ecg', 'echocardiogram', 'myocardial'],
178
+ 'neurology': ['brain', 'stroke', 'alzheimer', 'parkinson', 'seizure', 'migraine'],
179
+ 'oncology': ['cancer', 'tumor', 'chemotherapy', 'radiation', 'oncology', 'malignancy'],
180
+ 'infectious_disease': ['infection', 'bacterial', 'viral', 'antibiotic', 'sepsis', 'pneumonia'],
181
+ 'pulmonology': ['lung', 'respiratory', 'asthma', 'copd', 'oxygen', 'ventilator'],
182
+ 'gastroenterology': ['stomach', 'liver', 'intestine', 'colon', 'gastrointestinal', 'digestive'],
183
+ 'nephrology': ['kidney', 'renal', 'dialysis', 'creatinine', 'glomerular'],
184
+ 'hematology': ['blood', 'anemia', 'leukemia', 'hemoglobin', 'coagulation'],
185
+ 'psychiatry': ['mental', 'depression', 'anxiety', 'psychiatric', 'therapy', 'psychotherapy'],
186
+ 'dermatology': ['skin', 'rash', 'dermatitis', 'eczema', 'acne'],
187
+ 'orthopedics': ['bone', 'fracture', 'joint', 'orthopedic', 'musculoskeletal'],
188
+ 'ophthalmology': ['eye', 'vision', 'retina', 'glaucoma', 'cataract'],
189
+ 'urology': ['urinary', 'bladder', 'prostate', 'kidney stone', 'urological'],
190
+ 'pediatrics': ['child', 'pediatric', 'neonatal', 'infant', 'adolescent'],
191
+ 'obstetrics_gynecology': ['pregnancy', 'obstetric', 'gynecological', 'women\'s health', 'reproductive'],
192
+ 'surgery': ['surgical', 'operation', 'procedure', 'anesthesia', 'postoperative'],
193
+ 'emergency_medicine': ['emergency', 'trauma', 'acute care', 'resuscitation'],
194
+ 'critical_care': ['icu', 'critical care', 'intensive care', 'ventilator'],
195
+ 'pathology': ['biopsy', 'histology', 'pathological', 'tissue examination'],
196
+ 'laboratory_medicine': ['lab test', 'biomarker', 'diagnostic test', 'laboratory'],
197
+ 'medical_imaging': ['imaging', 'radiology', 'x-ray', 'ct scan', 'mri', 'ultrasound'],
198
+ 'bioinformatics': ['computational', 'data analysis', 'algorithm', 'bioinformatics'],
199
+ 'genomics': ['genetic', 'genome', 'sequencing', 'dna', 'genomic'],
200
+ 'pharmacology': ['drug', 'pharmacology', 'pharmacokinetic', 'medication'],
201
+ 'public_health': ['epidemiology', 'population health', 'public health', 'prevention'],
202
+ 'pain_medicine': ['pain', 'analgesia', 'pain management', 'chronic pain'],
203
+ 'nutrition': ['diet', 'nutrition', 'vitamin', 'malnutrition', 'obesity'],
204
+ 'allergy_immunology': ['allergy', 'immune', 'immunology', 'allergic', 'hypersensitivity'],
205
+ 'rehabilitation_medicine': ['rehabilitation', 'physical therapy', 'recovery', 'disability']
206
+ }
207
+
208
+ @staticmethod
209
+ def detect_domain_from_query(query: str, current_domain: str = "auto") -> str:
210
+ """Detect medical domain from query text"""
211
+ if current_domain != "auto":
212
+ return current_domain
213
+
214
+ query_lower = query.lower()
215
+ best_domain = 'general_medical'
216
+ best_score = 0
217
+
218
+ for domain_id, patterns in DomainDetector.DOMAIN_PATTERNS.items():
219
+ score = sum(1 for pattern in patterns if pattern in query_lower)
220
+ if score > best_score:
221
+ best_score = score
222
+ best_domain = domain_id
223
+
224
+ return best_domain if best_score > 0 else 'general_medical'
225
+
226
+
227
+ # ============================================================================
228
+ # MEDICAL DOMAIN CONFIGURATION (UPDATED)
229
+ # ============================================================================
230
+
231
+ MEDICAL_DOMAINS = [
232
+ {"id": "internal_medicine", "name": "Internal Medicine", "icon": "🏥",
233
+ "description": "General internal medicine and diagnosis"},
234
+ {"id": "endocrinology", "name": "Endocrinology", "icon": "🧬",
235
+ "description": "Hormonal and metabolic disorders"},
236
+ {"id": "cardiology", "name": "Cardiology", "icon": "❤️",
237
+ "description": "Heart and cardiovascular diseases"},
238
+ {"id": "neurology", "name": "Neurology", "icon": "🧠",
239
+ "description": "Brain and nervous system disorders"},
240
+ {"id": "oncology", "name": "Oncology", "icon": "🦠",
241
+ "description": "Cancer research and treatment"},
242
+ {"id": "infectious_disease", "name": "Infectious Diseases", "icon": "🦠",
243
+ "description": "Infectious diseases and microbiology"},
244
+ {"id": "clinical_research", "name": "Clinical Research", "icon": "📊",
245
+ "description": "Clinical trials and evidence-based medicine"},
246
+ {"id": "general_medical", "name": "General Medical", "icon": "⚕️",
247
+ "description": "General medical research and clinical questions"},
248
+ {"id": "pulmonology", "name": "Pulmonology", "icon": "🫁",
249
+ "description": "Respiratory diseases and lung health"},
250
+ {"id": "gastroenterology", "name": "Gastroenterology", "icon": "🍽️",
251
+ "description": "Digestive system disorders"},
252
+ {"id": "nephrology", "name": "Nephrology", "icon": "🫘",
253
+ "description": "Kidney diseases and disorders"},
254
+ {"id": "hematology", "name": "Hematology", "icon": "🩸",
255
+ "description": "Blood disorders and hematologic diseases"},
256
+ {"id": "surgery", "name": "Surgery", "icon": "🔪",
257
+ "description": "Surgical procedures and interventions"},
258
+ {"id": "orthopedics", "name": "Orthopedics", "icon": "🦴",
259
+ "description": "Musculoskeletal disorders and injuries"},
260
+ {"id": "urology", "name": "Urology", "icon": "🚽",
261
+ "description": "Urinary tract and male reproductive system"},
262
+ {"id": "ophthalmology", "name": "Ophthalmology", "icon": "👁️",
263
+ "description": "Eye diseases and vision disorders"},
264
+ {"id": "dermatology", "name": "Dermatology", "icon": "🦋",
265
+ "description": "Skin diseases and disorders"},
266
+ {"id": "psychiatry", "name": "Psychiatry", "icon": "🧘",
267
+ "description": "Mental health and psychiatric disorders"},
268
+ {"id": "obstetrics_gynecology", "name": "Obstetrics & Gynecology", "icon": "🤰",
269
+ "description": "Women's health and reproductive medicine"},
270
+ {"id": "pediatrics", "name": "Pediatrics", "icon": "👶",
271
+ "description": "Child health and pediatric medicine"},
272
+ {"id": "emergency_medicine", "name": "Emergency Medicine", "icon": "🚑",
273
+ "description": "Emergency care and acute medicine"},
274
+ {"id": "critical_care", "name": "Critical Care Medicine", "icon": "🏥",
275
+ "description": "Intensive care and critical care medicine"},
276
+ {"id": "pathology", "name": "Pathology", "icon": "🔬",
277
+ "description": "Disease diagnosis and laboratory medicine"},
278
+ {"id": "laboratory_medicine", "name": "Laboratory Medicine", "icon": "🧪",
279
+ "description": "Clinical laboratory testing and diagnostics"},
280
+ {"id": "medical_imaging", "name": "Medical Imaging & Radiology AI", "icon": "📷",
281
+ "description": "Medical imaging and radiological diagnosis"},
282
+ {"id": "bioinformatics", "name": "Bioinformatics", "icon": "💻",
283
+ "description": "Computational biology and data analysis"},
284
+ {"id": "genomics", "name": "Genomics & Sequencing", "icon": "🧬",
285
+ "description": "Genomic research and sequencing technologies"},
286
+ {"id": "pharmacology", "name": "Pharmacology", "icon": "💊",
287
+ "description": "Drug research and pharmacology"},
288
+ {"id": "public_health", "name": "Public Health Analytics", "icon": "🌍",
289
+ "description": "Public health and epidemiology"},
290
+ {"id": "pain_medicine", "name": "Pain Medicine", "icon": "🩹",
291
+ "description": "Pain management and treatment"},
292
+ {"id": "nutrition", "name": "Nutrition", "icon": "🍎",
293
+ "description": "Nutritional science and dietetics"},
294
+ {"id": "allergy_immunology", "name": "Allergy & Immunology", "icon": "🤧",
295
+ "description": "Allergies and immune system disorders"},
296
+ {"id": "rehabilitation_medicine", "name": "Rehabilitation Medicine", "icon": "♿",
297
+ "description": "Physical medicine and rehabilitation"},
298
+ {"id": "auto", "name": "Auto-detect", "icon": "🔍",
299
+ "description": "Automatic domain detection"}
300
+ ]
301
+
302
+ USER_ROLES = [
303
+ {"id": "patient", "name": "Patient", "icon": "🩺",
304
+ "description": "Patients and general public seeking health information"},
305
+ {"id": "student", "name": "Student", "icon": "🎓",
306
+ "description": "Medical students and trainees"},
307
+ {"id": "clinician", "name": "Clinician", "icon": "👨‍⚕️",
308
+ "description": "Healthcare providers and nurses"},
309
+ {"id": "doctor", "name": "Doctor", "icon": "⚕️",
310
+ "description": "Medical doctors and physicians"},
311
+ {"id": "researcher", "name": "Researcher", "icon": "🔬",
312
+ "description": "Academic researchers and scientists"},
313
+ {"id": "professor", "name": "Professor", "icon": "📚",
314
+ "description": "Academic educators and professors"},
315
+ {"id": "pharmacist", "name": "Pharmacist", "icon": "💊",
316
+ "description": "Pharmacy professionals and pharmacists"},
317
+ {"id": "general", "name": "General User", "icon": "👤",
318
+ "description": "General audience"},
319
+ {"id": "auto", "name": "Auto-detect", "icon": "🤖",
320
+ "description": "Automatically detect user role"}
321
+ ]
322
+
323
+
324
+ # ============================================================================
325
+ # SIMPLE QUERY HANDLER
326
+ # ============================================================================
327
+
328
+ class SimpleQueryHandler:
329
+ """Handle simple queries like greetings without research analysis"""
330
+
331
+ # Basic responses for common queries (matching rag_engine.py)
332
+ BASIC_RESPONSES = {
333
+ "hi": "👋 Hello! I'm your Medical Research Assistant. I can help with evidence-based medical research questions across various specialties. How can I assist you today?",
334
+ "hello": "👋 Welcome! I specialize in medical research analysis using evidence-based reasoning. What medical topic would you like to explore?",
335
+ "hey": "👋 Hey there! I'm ready to help with medical research questions. What would you like to know?",
336
+ "greetings": "👋 Greetings! I'm your Medical Research Assistant, here to help with evidence-based medical information. What's on your mind?",
337
+ "good morning": "🌅 Good morning! I'm ready to assist with medical research questions. How can I help you today?",
338
+ "good afternoon": "☀️ Good afternoon! I'm here to help with evidence-based medical research. What would you like to discuss?",
339
+ "good evening": "🌙 Good evening! I'm available to assist with medical research questions. How can I help?",
340
+ "how are you": "😊 I'm doing well, thank you! Ready to help with medical research questions. How can I assist you today?",
341
+ "what's up": "👋 Not much! I'm here and ready to help with medical research. What would you like to explore?",
342
+ "sup": "👋 Hey! I'm here to help with medical research. What's on your mind?",
343
+ "thanks": "🙏 You're welcome! I'm here whenever you need help with medical research.",
344
+ "thank you": "🙏 You're welcome! Feel free to ask more medical research questions anytime.",
345
+ "bye": "👋 Goodbye! Feel free to return anytime for medical research assistance.",
346
+ "goodbye": "👋 Goodbye! I'm here whenever you need help with medical research questions.",
347
+ "help": "🆘 **How to use:**\n1. Ask medical research questions\n2. Specify domain or use auto-detect\n3. Choose your role (patient, doctor, researcher, etc.)\n\n**Examples:**\n• 'Latest treatments for diabetes'\n• 'Research gaps in cancer immunotherapy'\n• 'Clinical guidelines for hypertension'\n• 'Explain MRI findings in simple terms' (as a patient)\n• 'Compare treatment protocols for pneumonia' (as a clinician)",
348
+ "what can you do": "🔬 **Medical Research Assistant Capabilities:**\n• Evidence-based medical analysis\n• Domain-specific research insights\n• Role-based responses (patient, doctor, researcher, etc.)\n• Paper summarization and analysis\n• Research gap identification\n• Guideline detection and analysis\n• Simple query handling (greetings, basic questions)\n\nAsk me about any medical research topic!"
349
+ }
350
+
351
+ @staticmethod
352
+ def is_simple_query(query: str) -> bool:
353
+ """Check if query is a simple greeting or basic question"""
354
+ query_lower = query.lower().strip()
355
+
356
+ # Check exact matches
357
+ if query_lower in SimpleQueryHandler.BASIC_RESPONSES:
358
+ return True
359
+
360
+ # Check for very short queries (1-2 words)
361
+ words = query.split()
362
+ if len(words) <= 2 and not SimpleQueryHandler._looks_like_research_query(query):
363
+ return True
364
+
365
+ return False
366
+
367
+ @staticmethod
368
+ def _looks_like_research_query(query: str) -> bool:
369
+ """Check if query looks like a research question"""
370
+ query_lower = query.lower()
371
+
372
+ # Research question indicators
373
+ research_indicators = [
374
+ 'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis',
375
+ 'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical',
376
+ 'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis',
377
+ 'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk',
378
+ 'benefit', 'recommendation', 'guideline', 'standard', 'protocol'
379
+ ]
380
+
381
+ # Check if query contains research indicators
382
+ for indicator in research_indicators:
383
+ if indicator in query_lower:
384
+ return True
385
+
386
+ # Check question words
387
+ question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who']
388
+ if any(query_lower.startswith(word) for word in question_words):
389
+ # Check if it's a complex question (more than basic)
390
+ if len(query.split()) > 3:
391
+ return True
392
+
393
+ return False
394
+
395
+ @staticmethod
396
+ def get_simple_response(query: str, role: str = "general") -> str:
397
+ """Get appropriate simple response based on role"""
398
+ query_lower = query.lower().strip()
399
+
400
+ # Get base response
401
+ if query_lower in SimpleQueryHandler.BASIC_RESPONSES:
402
+ response = SimpleQueryHandler.BASIC_RESPONSES[query_lower]
403
+ else:
404
+ # Generic simple response
405
+ role_info = RoleBasedReasoningAdapter.get_role_info(role)
406
+ response = f"👋 Hello! I'm your Medical Research Assistant. As a {role_info['name'].lower()}, how can I help with your medical questions today?"
407
+
408
+ return response
409
+
410
+
411
+ # ============================================================================
412
+ # MEDICAL RESEARCH CHAT ENGINE (UPDATED FOR ROLE-BASED REASONING)
413
+ # ============================================================================
414
+
415
+ class MedicalResearchEngine:
416
+ """Production-ready medical research engine with role-based reasoning"""
417
+
418
+ def __init__(self):
419
+ self.engines: Dict[str, Any] = {}
420
+ self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
421
+ self.api_configured = False
422
+ self.api_error = None
423
+ self.model = MODEL
424
+ self.domain_detector = DomainDetector()
425
+ self.role_adapter = RoleBasedReasoningAdapter()
426
+ self.simple_query_handler = SimpleQueryHandler()
427
+
428
+ # Basic responses for common queries
429
+ self.basic_responses = SimpleQueryHandler.BASIC_RESPONSES
430
+
431
+ self._test_api_connection()
432
+ print(f"🚀 Medical Research Engine with Role-Based Reasoning Initialized")
433
+
434
+ def _test_api_connection(self):
435
+ """Test API connection"""
436
+ try:
437
+ # Try to import EnhancedRAGEngine from rag_engine.py
438
+ from chat.rag_engine import EnhancedRAGEngine
439
+ # Test initialization
440
+ test_engine = EnhancedRAGEngine(session_id="test_init", model=self.model, use_real_time=False)
441
+ self.api_configured = True
442
+ print("✅ API Connection Test: SUCCESS")
443
+ print(f" Model: {self.model}")
444
+ print(f" Role-based reasoning: ENABLED")
445
+ print(f" Simple query handling: ENABLED")
446
+ except ImportError as e:
447
+ self.api_configured = False
448
+ self.api_error = str(e)
449
+ print(f"❌ API Connection Test: FAILED - {e}")
450
+ except Exception as e:
451
+ self.api_configured = False
452
+ self.api_error = str(e)
453
+ print(f" API Connection Test: FAILED - {e}")
454
+
455
+ def detect_domain_from_query(self, query: str, current_domain: str = "auto") -> str:
456
+ """Detect medical domain from query text"""
457
+ return self.domain_detector.detect_domain_from_query(query, current_domain)
458
+
459
+ def detect_user_role_from_query(self, query: str, current_role: str = "auto") -> str:
460
+ """Detect user role from query text"""
461
+ return self.role_adapter.detect_role_from_query(query, current_role)
462
+
463
+ def get_domain_info(self, domain_id: str) -> Dict:
464
+ """Get information about a domain"""
465
+ for domain in MEDICAL_DOMAINS:
466
+ if domain["id"] == domain_id:
467
+ return domain
468
+ return {
469
+ "id": domain_id,
470
+ "name": domain_id.replace('_', ' ').title(),
471
+ "icon": "⚕️",
472
+ "description": "Medical research domain"
473
+ }
474
+
475
+ def get_user_role_info(self, role_id: str) -> Dict:
476
+ """Get information about a user role"""
477
+ return self.role_adapter.get_role_info(role_id)
478
+
479
+ def _classify_query(self, query: str) -> str:
480
+ """Classify query type"""
481
+ # Check if it's a simple query
482
+ if self.simple_query_handler.is_simple_query(query):
483
+ return "simple"
484
+
485
+ # Check for paper summarization
486
+ query_lower = query.lower().strip()
487
+ if any(term in query_lower for term in ['summarize paper', 'paper titled', 'article about', 'summary of paper']):
488
+ return "paper_summary"
489
+
490
+ # Default to research query
491
+ return "research"
492
+
493
+ async def process_query_async(
494
+ self,
495
+ query: str,
496
+ domain: str = "general_medical",
497
+ session_id: str = "default",
498
+ user_role: str = "auto", # Updated from user_context
499
+ custom_role_prompt: Optional[str] = None, # New: Custom role prompt
500
+ max_papers: int = 15,
501
+ use_real_time: Optional[bool] = True, # New: Control real-time search
502
+ use_fallback: Optional[bool] = False, # New: Use fallback papers
503
+ **kwargs
504
+ ) -> Dict[str, Any]:
505
+ """Process medical research query with role-based reasoning"""
506
+
507
+ # Auto-detect domain if needed
508
+ if domain == "auto":
509
+ domain = self.detect_domain_from_query(query)
510
+
511
+ # Auto-detect user role if needed
512
+ if user_role == "auto":
513
+ user_role = self.detect_user_role_from_query(query)
514
+
515
+ # Get domain and role info
516
+ domain_info = self.get_domain_info(domain)
517
+ role_info = self.get_user_role_info(user_role)
518
+
519
+ # Classify the query
520
+ query_type = self._classify_query(query)
521
+
522
+ # Handle simple queries
523
+ if query_type == "simple":
524
+ print(f" 💬 Detected simple query - using role-appropriate response")
525
+ response_text = self.simple_query_handler.get_simple_response(query, user_role)
526
+
527
+ return {
528
+ "answer": self._format_simple_response(response_text, domain_info, role_info, query),
529
+ "papers_used": 0,
530
+ "real_papers_used": 0,
531
+ "demo_papers_used": 0,
532
+ "confidence_score": {"overall_score": 95.0, "level": "HIGH 🟢"},
533
+ "query_type": "simple",
534
+ "user_role": user_role,
535
+ "domain": domain,
536
+ "domain_info": domain_info,
537
+ "role_info": role_info,
538
+ "reasoning_method": "simple_response"
539
+ }
540
+
541
+ # Handle paper summarization
542
+ elif query_type == "paper_summary":
543
+ print(f" 📄 Detected paper summarization request")
544
+ return await self._handle_paper_summarization(query, session_id, domain, user_role, custom_role_prompt)
545
+
546
+ # Handle research queries
547
+ else:
548
+ print(f" 🔬 Detected research query - using role-based reasoning")
549
+ return await self._handle_research_query(query, domain, user_role, session_id,
550
+ custom_role_prompt, max_papers, use_real_time, use_fallback, kwargs)
551
+
552
+ def _format_simple_response(self, response_text: str, domain_info: Dict,
553
+ role_info: Dict, query: str) -> str:
554
+ """Format simple response with role and domain info"""
555
+ return f"""# {response_text}
556
+
557
+ **Role:** {role_info['name']} {role_info.get('icon', '👤')}
558
+ **Domain:** {domain_info['name']} {domain_info.get('icon', '⚕️')}
559
+
560
+ Feel free to ask me medical research questions! I'll provide information tailored to your needs as a {role_info['name'].lower()}."""
561
+
562
+ async def _handle_research_query(self, query: str, domain: str, user_role: str,
563
+ session_id: str, custom_role_prompt: str,
564
+ max_papers: int, use_real_time: bool,
565
+ use_fallback: bool, kwargs: Dict) -> Dict[str, Any]:
566
+ """Handle medical research queries with role-based reasoning"""
567
+
568
+ # Get domain and role info
569
+ domain_info = self.get_domain_info(domain)
570
+ role_info = self.get_user_role_info(user_role)
571
+
572
+ # Initialize engine
573
+ engine = self.initialize_session(session_id)
574
+
575
+ # Run in thread pool
576
+ loop = asyncio.get_event_loop()
577
+
578
+ try:
579
+ # Process query with timeout
580
+ print(f" 🔍 Processing with role-based reasoning (role: {user_role}, domain: {domain})")
581
+
582
+ response = await asyncio.wait_for(
583
+ loop.run_in_executor(
584
+ self.executor,
585
+ lambda: engine.answer_research_question(
586
+ query=query,
587
+ domain=domain,
588
+ max_papers=max_papers,
589
+ use_memory=True,
590
+ user_context=user_role, # For backward compatibility
591
+ use_fallback=use_fallback,
592
+ role=user_role, # NEW: Role parameter
593
+ role_system_prompt=custom_role_prompt, # NEW: Custom role prompt
594
+ use_real_time=use_real_time if hasattr(engine, 'use_real_time') else True
595
+ )
596
+ ),
597
+ timeout=kwargs.get('timeout', 90.0) # Increased timeout for research
598
+ )
599
+
600
+ # Clean up response
601
+ answer = response.get("answer", "")
602
+
603
+ # Prepare result
604
+ result = {
605
+ "answer": answer,
606
+ "papers_used": response.get("papers_used", 0),
607
+ "real_papers_used": response.get("real_papers_used", 0),
608
+ "demo_papers_used": response.get("demo_papers_used", 0),
609
+ "confidence_score": response.get("confidence_score", {"overall_score": 0}),
610
+ "query_type": "research",
611
+ "user_role": response.get("user_context", user_role), # Get from response
612
+ "domain": domain,
613
+ "domain_info": domain_info,
614
+ "role_info": role_info,
615
+ "reasoning_method": response.get("reasoning_method", "role_based"),
616
+ "guideline_info": response.get("guideline_info")
617
+ }
618
+
619
+ # Add enhanced metrics if available
620
+ if "enhanced_metrics" in response:
621
+ result["metrics"] = response["enhanced_metrics"]
622
+
623
+ print(f" ✅ Research query processed successfully")
624
+ print(f" Papers used: {result['papers_used']} (real: {result['real_papers_used']}, demo: {result['demo_papers_used']})")
625
+ print(f" Confidence: {result['confidence_score'].get('overall_score', 0)}/100")
626
+
627
+ return result
628
+
629
+ except asyncio.TimeoutError:
630
+ print(f" ⏱️ Query timeout - creating timeout response")
631
+ return self._create_timeout_response(query, domain_info, role_info)
632
+ except Exception as e:
633
+ print(f" Research query error: {e}")
634
+ return self._create_error_response(query, domain_info, role_info, str(e))
635
+
636
+ async def _handle_paper_summarization(self, query: str, session_id: str,
637
+ domain: str, user_role: str,
638
+ custom_role_prompt: str) -> Dict[str, Any]:
639
+ """Handle single paper summarization requests"""
640
+ try:
641
+ engine = self.initialize_session(session_id)
642
+
643
+ # Extract paper title from query
644
+ paper_title = self._extract_paper_title(query)
645
+
646
+ if not paper_title:
647
+ return {
648
+ "answer": """# 📄 **Paper Summarization Help**
649
+
650
+ Please provide a paper title to summarize, for example:
651
+ "Summarize the paper 'Deep Learning for Medical Imaging'"
652
+ "What does the paper 'COVID-19 Vaccine Efficacy Study' find?"
653
+ "Give me a summary of 'Guidelines for Hypertension Management'"
654
+
655
+ I'll provide a comprehensive analysis including methodology, findings, and implications.""",
656
+ "papers_used": 0,
657
+ "real_papers_used": 0,
658
+ "demo_papers_used": 0,
659
+ "confidence_score": {"overall_score": 0},
660
+ "query_type": "help",
661
+ "user_role": user_role
662
+ }
663
+
664
+ # Get domain and role info
665
+ domain_info = self.get_domain_info(domain)
666
+ role_info = self.get_user_role_info(user_role)
667
+
668
+ # Run summarization
669
+ loop = asyncio.get_event_loop()
670
+
671
+ summary_result = await asyncio.wait_for(
672
+ loop.run_in_executor(
673
+ self.executor,
674
+ lambda: engine.summarize_single_paper(
675
+ paper_title=paper_title,
676
+ user_query=query,
677
+ domain=domain
678
+ )
679
+ ),
680
+ timeout=30.0
681
+ )
682
+
683
+ if summary_result.get("success"):
684
+ # Format the response with role context
685
+ response_text = self._format_paper_summary(summary_result, domain_info, role_info)
686
+
687
+ return {
688
+ "answer": response_text,
689
+ "papers_used": 1,
690
+ "real_papers_used": 1 if not summary_result.get("is_demo", True) else 0,
691
+ "demo_papers_used": 1 if summary_result.get("is_demo", False) else 0,
692
+ "confidence_score": {"overall_score": summary_result.get("confidence", 0.7) * 100},
693
+ "query_type": "paper_summary",
694
+ "user_role": user_role,
695
+ "domain": domain,
696
+ "domain_info": domain_info,
697
+ "role_info": role_info,
698
+ "reasoning_method": "paper_summary",
699
+ "paper_details": {
700
+ "title": summary_result.get("paper_title", ""),
701
+ "authors": summary_result.get("authors", []),
702
+ "date": summary_result.get("publication_date", ""),
703
+ "source": summary_result.get("source", "")
704
+ }
705
+ }
706
+ else:
707
+ return {
708
+ "answer": f"""# 🔍 **Paper Not Found**
709
+
710
+ I couldn't find the paper: *"{paper_title}"*
711
+
712
+ **Suggestions:**
713
+ 1. Check the exact title spelling
714
+ 2. Try a more general search
715
+ 3. Search by key concepts instead
716
+
717
+ You can also request: "Find papers about [topic]" or "Research on [condition]".""",
718
+ "papers_used": 0,
719
+ "real_papers_used": 0,
720
+ "demo_papers_used": 0,
721
+ "confidence_score": {"overall_score": 0},
722
+ "query_type": "paper_summary_error",
723
+ "user_role": user_role
724
+ }
725
+
726
+ except Exception as e:
727
+ print(f" ❌ Paper summarization error: {e}")
728
+ return {
729
+ "answer": f"""# 🚨 **Summarization Error**
730
+
731
+ Error: {str(e)}
732
+
733
+ Please try again with a different paper or simpler request.""",
734
+ "papers_used": 0,
735
+ "real_papers_used": 0,
736
+ "demo_papers_used": 0,
737
+ "confidence_score": {"overall_score": 0},
738
+ "query_type": "error",
739
+ "user_role": user_role
740
+ }
741
+
742
+ def _extract_paper_title(self, query: str) -> Optional[str]:
743
+ """Extract paper title from query"""
744
+ # Pattern 1: Paper titled "Title"
745
+ match = re.search(r'paper (?:titled|called) "([^"]+)"', query.lower())
746
+ if match:
747
+ return match.group(1).strip()
748
+
749
+ # Pattern 2: "Title" paper
750
+ match = re.search(r'"([^"]+)" paper', query.lower())
751
+ if match:
752
+ return match.group(1).strip()
753
+
754
+ # Pattern 3: Summarize the paper Title
755
+ match = re.search(r'summarize (?:the )?paper (.+)', query.lower())
756
+ if match:
757
+ title = match.group(1).strip()
758
+ title = re.sub(r'\?$', '', title)
759
+ return title.strip()
760
+
761
+ # Pattern 4: Summary of paper Title
762
+ match = re.search(r'summary of (?:the )?paper (.+)', query.lower())
763
+ if match:
764
+ title = match.group(1).strip()
765
+ title = re.sub(r'\?$', '', title)
766
+ return title.strip()
767
+
768
+ return None
769
+
770
+ def _format_paper_summary(self, summary_result: Dict, domain_info: Dict,
771
+ role_info: Dict) -> str:
772
+ """Format paper summary for display with role context"""
773
+ title = summary_result.get("paper_title", "Unknown Paper")
774
+ authors = summary_result.get("authors", [])
775
+ date = summary_result.get("publication_date", "")
776
+ source = summary_result.get("source", "")
777
+ summary = summary_result.get("summary", "")
778
+ confidence = summary_result.get("confidence", 0.7) * 100
779
+
780
+ # Format authors
781
+ if authors and isinstance(authors, list):
782
+ if len(authors) <= 3:
783
+ author_str = ", ".join(authors)
784
+ else:
785
+ author_str = f"{authors[0]} et al."
786
+ else:
787
+ author_str = "Unknown authors"
788
+
789
+ # Build response with role context
790
+ response = f"""# 📄 **Paper Analysis**
791
+
792
+ **Role:** {role_info['name']} {role_info.get('icon', '👤')}
793
+ **Domain:** {domain_info['name']} {domain_info.get('icon', '⚕️')}
794
+
795
+ **Title:** {title}
796
+ **Authors:** {author_str}
797
+ **Published:** {date}
798
+ **Source:** {source}
799
+
800
+ ---
801
+
802
+ ## 📋 **Summary**
803
+ {summary}
804
+
805
+ ---
806
+
807
+ ## 🔍 **Key Points for {role_info['name']}**
808
+ • Main findings and conclusions relevant to {role_info['name'].lower()} needs
809
+ • Methodology and study design appropriate for {role_info['name'].lower()} understanding
810
+ • Clinical/research implications from {role_info['name'].lower()} perspective
811
+ • Limitations and future directions
812
+
813
+ *Analysis confidence: {confidence:.1f}%*
814
+ *Tailored for {role_info['name'].lower()} perspective*"""
815
+
816
+ return response
817
+
818
+ def _create_timeout_response(self, query: str, domain_info: Dict, role_info: Dict) -> Dict[str, Any]:
819
+ """Create timeout response"""
820
+ return {
821
+ "answer": f"""# ⏱️ **Query Timed Out**
822
+
823
+ **Role:** {role_info['name']} {role_info.get('icon', '👤')}
824
+ **Domain:** {domain_info['name']}
825
+ **Query:** {query}
826
+
827
+ The analysis was taking too long. Try:
828
+ • Simplifying your question
829
+ • Being more specific
830
+ • Reducing the scope
831
+
832
+ **Example for {role_info['name'].lower()}:**
833
+ "Key treatments for [condition] in {domain_info['name']}" """,
834
+ "papers_used": 0,
835
+ "real_papers_used": 0,
836
+ "demo_papers_used": 0,
837
+ "confidence_score": {"overall_score": 0},
838
+ "query_type": "error",
839
+ "user_role": role_info.get('id', 'general'),
840
+ "domain": domain_info.get('id', 'general_medical'),
841
+ "error": "timeout"
842
+ }
843
+
844
+ def _create_error_response(self, query: str, domain_info: Dict, role_info: Dict, error: str) -> Dict[str, Any]:
845
+ """Create error response"""
846
+ return {
847
+ "answer": f"""# 🚨 **Analysis Error**
848
+
849
+ **Role:** {role_info['name']} {role_info.get('icon', '👤')}
850
+ **Domain:** {domain_info['name']}
851
+ **Error:** {error}
852
+
853
+ **Troubleshooting for {role_info['name'].lower()}:**
854
+ 1. Check your internet connection
855
+ 2. Try a simpler query
856
+ 3. Verify domain selection
857
+ 4. Contact support if problem persists""",
858
+ "papers_used": 0,
859
+ "real_papers_used": 0,
860
+ "demo_papers_used": 0,
861
+ "confidence_score": {"overall_score": 0},
862
+ "query_type": "error",
863
+ "user_role": role_info.get('id', 'general'),
864
+ "domain": domain_info.get('id', 'general_medical'),
865
+ "error": error
866
+ }
867
+
868
+ def initialize_session(self, session_id: str):
869
+ """Initialize engine for a session"""
870
+ if session_id not in self.engines:
871
+ try:
872
+ if not self.api_configured:
873
+ self.engines[session_id] = self._create_fallback_engine()
874
+ print(f"⚠️ Session {session_id}: Using fallback engine")
875
+ else:
876
+ from chat.rag_engine import EnhancedRAGEngine
877
+ self.engines[session_id] = EnhancedRAGEngine(
878
+ session_id=session_id,
879
+ model=self.model,
880
+ use_real_time=True
881
+ )
882
+ print(f"✅ Session engine initialized: {session_id}")
883
+
884
+ except Exception as e:
885
+ print(f"❌ Failed to initialize engine for {session_id}: {e}")
886
+ self.engines[session_id] = self._create_fallback_engine()
887
+
888
+ return self.engines[session_id]
889
+
890
+ def _create_fallback_engine(self):
891
+ """Create a fallback engine when API fails"""
892
+
893
+ class FallbackEngine:
894
+ def __init__(self):
895
+ self.session_id = "fallback"
896
+ self.metrics = {"total_queries": 0}
897
+ self.use_real_time = False
898
+
899
+ def answer_research_question(self, **kwargs):
900
+ query = kwargs.get("query", "")
901
+ domain = kwargs.get("domain", "general_medical")
902
+ role = kwargs.get("role", "general")
903
+ custom_role_prompt = kwargs.get("role_system_prompt")
904
+
905
+ self.metrics["total_queries"] += 1
906
+
907
+ if query.lower().strip() in {"hi", "hello", "hey"}:
908
+ role_info = RoleBasedReasoningAdapter.get_role_info(role)
909
+ return {
910
+ "answer": f"""# 👋 Welcome to Medical Research Assistant!
911
+
912
+ **Role:** {role_info['name']} {role_info.get('icon', '👤')}
913
+ **Domain:** {domain.replace('_', ' ').title()}
914
+
915
+ **Setup Required:**
916
+ 1. Get an API key from https://console.groq.com
917
+ 2. Create a `.env` file with:
918
+ GROQ_API_KEY=your_key_here
919
+ MODEL=gpt-oss-120b
920
+
921
+ 3. Restart the server
922
+
923
+ **Features After Setup:**
924
+ • Role-based medical research analysis
925
+ • Domain-specific insights tailored to {role_info['name'].lower()} needs
926
+ • Paper summarization with guideline detection
927
+ • Research gap analysis""",
928
+ "papers_used": 0,
929
+ "real_papers_used": 0,
930
+ "demo_papers_used": 0,
931
+ "confidence_score": {"overall_score": 15},
932
+ "user_context": role,
933
+ "reasoning_method": "fallback"
934
+ }
935
+
936
+ role_info = RoleBasedReasoningAdapter.get_role_info(role)
937
+ return {
938
+ "answer": f"""⚠️ **API Not Configured**
939
+
940
+ **Role:** {role_info['name']} {role_info.get('icon', '👤')}
941
+ **Domain:** {domain.replace('_', ' ').title()}
942
+
943
+ Current query: {query}
944
+
945
+ Please configure your GROQ_API_KEY in the .env file and restart the server.
946
+ For {role_info['name'].lower()}-appropriate responses, setup is required.""",
947
+ "papers_used": 0,
948
+ "real_papers_used": 0,
949
+ "demo_papers_used": 0,
950
+ "confidence_score": {"overall_score": 10},
951
+ "user_context": role,
952
+ "reasoning_method": "fallback"
953
+ }
954
+
955
+ def summarize_single_paper(self, **kwargs):
956
+ """Fallback for single paper summarization"""
957
+ paper_title = kwargs.get("paper_title", "Unknown Paper")
958
+ domain = kwargs.get("domain", "general_medical")
959
+ role = kwargs.get("role", "general")
960
+
961
+ role_info = RoleBasedReasoningAdapter.get_role_info(role)
962
+
963
+ return {
964
+ "success": False,
965
+ "error": "API not configured",
966
+ "paper_title": paper_title,
967
+ "summary": f"Please configure your API key to use paper analysis.\n\nRole: {role_info['name']}\nDomain: {domain}",
968
+ "is_demo": True
969
+ }
970
+
971
+ return FallbackEngine()
972
+
973
+ def get_engine_status(self) -> Dict[str, Any]:
974
+ """Get engine status and metrics"""
975
+ # Calculate metrics from all sessions
976
+ total_queries = 0
977
+ for engine in self.engines.values():
978
+ if hasattr(engine, 'metrics'):
979
+ total_queries += engine.metrics.get("total_queries", 0)
980
+
981
+ return {
982
+ "api_configured": self.api_configured,
983
+ "api_error": self.api_error if not self.api_configured else None,
984
+ "model": self.model,
985
+ "active_sessions": len(self.engines),
986
+ "total_queries": total_queries,
987
+ "domains_supported": len(MEDICAL_DOMAINS),
988
+ "user_roles_supported": len(USER_ROLES),
989
+ "reasoning_technique": "role_based_reasoning",
990
+ "features": [
991
+ "role_based_medical_analysis",
992
+ "domain_specific_insights",
993
+ "user_role_adaptation",
994
+ "paper_summarization",
995
+ "guideline_detection",
996
+ "simple_query_handling",
997
+ "real_time_search"
998
+ ],
999
+ "simple_query_handler": "ENABLED",
1000
+ "role_based_reasoning": "ENABLED",
1001
+ "version": "2.2.0"
1002
+ }
1003
+
1004
+ def clear_memory(self):
1005
+ """Clear engine memory for all sessions"""
1006
+ self.engines.clear()
1007
+ print("🧹 Engine memory cleared for all sessions")
1008
+
1009
+
1010
+ # ============================================================================
1011
+ # DEVELOPMENT TESTING
1012
+ # ============================================================================
1013
+
1014
+ if __name__ == "__main__" and os.getenv("VERCEL") is None:
1015
+ # Test the engine
1016
+ print("\n" + "=" * 60)
1017
+ print("🧪 TESTING MEDICAL RESEARCH ENGINE")
1018
+ print("=" * 60)
1019
+
1020
+ engine = MedicalResearchEngine()
1021
+
1022
+ # Test status
1023
+ status = engine.get_engine_status()
1024
+ print(f"\n🔧 Engine Status:")
1025
+ print(f" API Configured: {status['api_configured']}")
1026
+ print(f" Model: {status['model']}")
1027
+ print(f" Features: {', '.join(status['features'][:3])}...")
1028
+ print(f" Role-based reasoning: {status['role_based_reasoning']}")
1029
+
1030
+ # Test domain detection
1031
+ test_queries = [
1032
+ ("What are the latest treatments for diabetes?", "endocrinology"),
1033
+ ("How to manage hypertension in elderly patients?", "cardiology"),
1034
+ ("Research on Alzheimer's disease biomarkers", "neurology"),
1035
+ ("Hello, how are you?", "simple greeting")
1036
+ ]
1037
+
1038
+ print(f"\n🔍 Testing domain detection:")
1039
+ for query, expected in test_queries:
1040
+ detected = engine.detect_domain_from_query(query)
1041
+ print(f" '{query[:30]}...' → {detected} (expected: {expected})")
1042
+
1043
+ # Test role detection
1044
+ print(f"\n👤 Testing role detection:")
1045
+ role_queries = [
1046
+ ("I have diabetes and want to understand my treatment options", "patient"),
1047
+ ("As a medical student, I need to learn about ECG interpretation", "student"),
1048
+ ("What are the clinical guidelines for pneumonia treatment?", "clinician"),
1049
+ ("Latest research on cancer immunotherapy protocols", "researcher")
1050
+ ]
1051
+
1052
+ for query, expected in role_queries:
1053
+ detected = engine.detect_user_role_from_query(query)
1054
+ print(f" '{query[:30]}...' → {detected} (expected: {expected})")
1055
+
1056
+ print(f"\n✅ Engine test complete!")