Dhruv-Ty commited on
Commit
5a6c013
·
verified ·
1 Parent(s): 39b65e1
Files changed (1) hide show
  1. src/model.py +274 -1112
src/model.py CHANGED
@@ -1,1148 +1,310 @@
1
- import requests
2
  import uuid
3
- import json
4
- import re
5
- import xml.etree.ElementTree as ET
6
- from bs4 import BeautifulSoup
7
  from datetime import datetime
 
8
  import os
9
- import openai
10
- import urllib.parse
11
- from dotenv import load_dotenv
12
-
13
- # Load environment variables
14
- load_dotenv()
15
-
16
- # Initialize OpenAI API key
17
- def get_openai_api_key():
18
- """Get OpenAI API key from environment variables"""
19
- api_key = os.environ.get("OPENAI_API_KEY")
20
- if not api_key:
21
- raise ValueError("OPENAI_API_KEY environment variable is not set")
22
- return api_key
23
-
24
- # Set OpenAI API key
25
- openai.api_key = get_openai_api_key()
26
-
27
- # System prompts
28
- SYSTEM_PROMPT = """You are an advanced clinical AI assistant designed to aid healthcare professionals.
29
- Follow these guidelines in all responses:
30
-
31
- 1. **Clarify First**: Before providing any diagnosis or plan, if the user's query is underspecified, ALWAYS ask relevant clarifying questions to gather necessary patient information. This includes, but is not limited to, symptoms, duration, severity, medical history, age, lifestyle factors (diet, exercise), and current medications.
32
- 2. Professional tone: Maintain a clear, respectful, and professional tone appropriate for medical consultation.
33
- 3. Evidence-based practice: Base all responses on current medical evidence and guidelines.
34
- 4. Transparency: Clearly distinguish between established medical facts, clinical guidance, and areas of uncertainty.
35
- 5. Structured analysis: Present information in a clear, organized manner following clinical reasoning patterns.
36
- 6. Citation: Always cite specific sources for medical claims when available using the [source_id] format.
37
- 7. Limitations: Acknowledge the limits of AI medical advice and recommend in-person consultation when appropriate.
38
- 8. Comprehensive approach: Consider differential diagnoses and relevant contextual factors.
39
- 9. Patient-centered: Focus on clinically relevant information while maintaining respect for the patient.
40
-
41
- For each consultation:
42
- 1. Ask clarifying questions if needed (as per guideline 1).
43
- 2. Provide differential diagnosis with likelihood assessment.
44
- 3. Suggest appropriate next steps (testing, treatment, referral).
45
- 4. Include reasoning for your conclusions.
46
- 5. Cite medical literature or guidelines supporting your assessment using [source_id].
47
-
48
- IMPORTANT: Your primary duty is to support clinical decision-making, not replace clinical judgment.
49
- """
50
-
51
- FOLLOW_UP_PROMPT = """Continue this medical consultation based on the previous discussion.
52
- Consider the information already gathered and the tentative diagnosis/plan.
53
-
54
- When responding to the follow-up:
55
- 1. Reference relevant details from the prior conversation.
56
- 2. Address the specific follow-up question with evidence-based information.
57
- 3. If new information suggests a need for clarification, ask relevant questions.
58
- 4. Update recommendations if appropriate.
59
- 5. Maintain the same structured approach with transparent reasoning.
60
- 6. Cite additional medical literature or guidelines when relevant using [source_id].
61
-
62
- Remember that this is an ongoing consultation where continuity of care is important.
63
- """
64
-
65
- # Function to extract source IDs and replace them with actual links
66
- def extract_and_link_sources(text, evidence_snippets):
67
- """Replace [source_id] placeholders with actual source information"""
68
- source_pattern = r'\[([\w\d:_\-\.+]+)\]' # Expanded to handle more characters including +
69
- matches = re.findall(source_pattern, text)
70
-
71
- source_map = {} # Map to store source_id -> source data
72
-
73
- # First, try direct ID matches (most reliable)
74
- for source_id_match in matches:
75
- for snippet in evidence_snippets:
76
- if source_id_match == snippet["id"]:
77
- source_map[source_id_match] = {
78
- "id": snippet["id"],
79
- "title": snippet["title"].strip(),
80
- "url": snippet["url"],
81
- "citation": snippet["citation"]
82
- }
83
- break
84
-
85
- # Next, try fuzzy matching for cases where the exact ID isn't matched
86
- for source_id_match in matches:
87
- if source_id_match not in source_map and source_id_match != "source_id":
88
- for snippet in evidence_snippets:
89
- # Try to match on partial IDs (e.g. part before a hyphen)
90
- snippet_id_parts = snippet["id"].split("-")
91
- source_id_parts = source_id_match.split("-")
92
-
93
- # Check if the first parts match (journal name)
94
- if (snippet_id_parts and source_id_parts and
95
- snippet_id_parts[0] == source_id_parts[0]):
96
- source_map[source_id_match] = {
97
- "id": snippet["id"],
98
- "title": snippet["title"].strip(),
99
- "url": snippet["url"],
100
- "citation": snippet["citation"]
101
- }
102
- break
103
-
104
- # Handle generic [source_id] placeholder
105
- if "source_id" in matches:
106
- # Use the first snippet available if we have any
107
- if evidence_snippets and "source_id" not in source_map:
108
- snippet = evidence_snippets[0] # Use the first snippet
109
- if snippet.get("url") and snippet.get("title"):
110
- source_map["source_id"] = {
111
- "id": snippet["id"],
112
- "title": snippet["title"].strip(),
113
- "url": snippet["url"],
114
- "citation": snippet["citation"]
115
- }
116
-
117
- # Replace source_id placeholders with actual links in the text
118
- linked_text = text
119
- for source_id_key, source_data in source_map.items():
120
- safe_id = re.escape(source_id_key)
121
- pattern = f"\\[{safe_id}\\]"
122
- replacement = f"[{source_data['title']}]({source_data['url']})"
123
- linked_text = re.sub(pattern, replacement, linked_text)
124
-
125
- # Handle remaining [source_id] placeholders
126
- if "source_id" in source_map and "[source_id]" in linked_text:
127
- generic_data = source_map["source_id"]
128
- replacement = f"[{generic_data['title']}]({generic_data['url']})"
129
- linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
130
-
131
- # Final fallback for any [source_id] not mapped at all
132
- linked_text = re.sub(r'\[source_id\]', "[Medical Reference]", linked_text)
133
-
134
- return linked_text, source_map
135
-
136
- # Implement PubMed API integration for medical evidence retrieval
137
- def fetch_from_pubmed_api(query, max_results=3, api_key=None):
138
- """Fetch medical evidence from PubMed API using E-utilities"""
139
- results = []
140
-
141
- # Clean up the query for better results
142
- cleaned_query = re.sub(r'^(hi|hello|hey|greetings|good morning|good afternoon|good evening)[,\.]?\s+', '', query.lower())
143
- cleaned_query = re.sub(r"(i'?m|i am)\s+a\s+\d+[-\s]year[-\s]old", '', cleaned_query)
144
- cleaned_query = re.sub(r'(my name is|i am|i have been|i\'ve been|i was|i have|i\'ve had|i feel|i\'m feeling|i experienced)', '', cleaned_query)
145
-
146
- # Try to extract key medical symptoms
147
- symptom_patterns = [
148
- r'(muscle weakness)', r'(fatigue)', r'(rash)', r'(pain)', r'(swelling)',
149
- r'(difficulty breathing|shortness of breath)', r'(fever)', r'(headache)',
150
- r'(nausea|vomiting)', r'(dizziness)', r'(numbness)', r'(tingling)'
151
- ]
152
-
153
- medical_terms = []
154
- for pattern in symptom_patterns:
155
- matches = re.findall(pattern, query.lower())
156
- if matches:
157
- medical_terms.extend(matches)
158
-
159
- # If we found medical terms, prioritize them in the search
160
- if medical_terms:
161
- search_query = " AND ".join(medical_terms)
162
- # Add the complete cleaned query as a less weighted part
163
- if cleaned_query:
164
- search_query = f"({search_query}) OR ({cleaned_query})"
165
- else:
166
- # If no medical terms found, use the cleaned query
167
- search_query = cleaned_query
168
-
169
- # Encode the query for the API
170
- encoded_query = urllib.parse.quote(search_query)
171
-
172
- # Base URL for PubMed E-utilities
173
- base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
174
-
175
- # Search parameters
176
- search_params = {
177
- "db": "pubmed",
178
- "term": encoded_query,
179
- "retmax": max_results,
180
- "retmode": "json",
181
- "sort": "relevance"
182
  }
183
 
184
- # Add API key if provided (increases rate limits)
185
- if api_key:
186
- search_params["api_key"] = api_key
187
-
188
- try:
189
- # First get article IDs
190
- search_response = requests.get(f"{base_url}esearch.fcgi", params=search_params)
191
-
192
- if search_response.status_code != 200:
193
- return []
194
-
195
- search_data = search_response.json()
196
-
197
- if "esearchresult" in search_data and "idlist" in search_data["esearchresult"]:
198
- ids = search_data["esearchresult"]["idlist"]
199
-
200
- if ids:
201
- # Fetch article details
202
- fetch_params = {
203
- "db": "pubmed",
204
- "id": ",".join(ids),
205
- "retmode": "xml"
206
- }
207
- if api_key:
208
- fetch_params["api_key"] = api_key
209
-
210
- fetch_response = requests.get(f"{base_url}efetch.fcgi", params=fetch_params)
211
-
212
- if fetch_response.status_code != 200:
213
- return []
214
-
215
- try:
216
- # Parse XML response
217
- root = ET.fromstring(fetch_response.text)
218
-
219
- for article in root.findall(".//PubmedArticle"):
220
- try:
221
- pmid = article.findtext(".//PMID")
222
- title = article.findtext(".//ArticleTitle") or "No title available"
223
-
224
- # Extract abstract
225
- abstract_elements = article.findall(".//AbstractText")
226
- abstract = " ".join([(elem.text or "") for elem in abstract_elements])
227
-
228
- # Extract authors
229
- authors = []
230
- for author in article.findall(".//Author"):
231
- last_name = author.findtext(".//LastName") or ""
232
- initials = author.findtext(".//Initials") or ""
233
- if last_name and initials:
234
- authors.append(f"{last_name} {initials}")
235
-
236
- author_str = ", ".join(authors[:3])
237
- if len(authors) > 3:
238
- author_str += " et al."
239
-
240
- # Extract journal and date
241
- journal = article.findtext(".//Journal/Title") or "Journal not specified"
242
- year = article.findtext(".//PubDate/Year") or "N/A"
243
-
244
- # Create citation
245
- citation = f"{author_str}. ({year}). {title}. {journal}. PMID: {pmid}"
246
-
247
- # Create direct access URL
248
- url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
249
-
250
- # Check if free full text is available via PMC
251
- pmc_id = article.findtext(".//ArticleId[@IdType='pmc']")
252
- has_free_text = bool(pmc_id) or article.findtext(".//PublicationStatus") == "epublish"
253
-
254
- # If PMC ID is available, use that URL instead as it provides full text
255
- if pmc_id:
256
- url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmc_id}/"
257
-
258
- results.append({
259
- "id": f"pubmed:{pmid}",
260
- "title": title,
261
- "text": abstract[:800] + "..." if len(abstract) > 800 else abstract,
262
- "citation": citation,
263
- "url": url,
264
- "source_type": "PubMed" + (" (Free Full Text)" if has_free_text else ""),
265
- "is_open_access": has_free_text
266
- })
267
- except Exception:
268
- continue
269
- except ET.ParseError:
270
- return []
271
-
272
- return results
273
- except Exception:
274
- return []
275
-
276
- def fetch_from_pmc_api(query, max_results=2, api_key=None):
277
- """Fetch free full text articles from PubMed Central (PMC)"""
278
- results = []
279
-
280
- # Clean up the query for better results
281
- cleaned_query = re.sub(r'^(hi|hello|hey|greetings|good morning|good afternoon|good evening)[,\.]?\s+', '', query.lower())
282
- cleaned_query = re.sub(r"(i'?m|i am)\s+a\s+\d+[-\s]year[-\s]old", '', cleaned_query)
283
- cleaned_query = re.sub(r'(my name is|i am|i have been|i\'ve been|i was|i have|i\'ve had|i feel|i\'m feeling|i experienced)', '', cleaned_query)
284
 
285
- # Encode for API
286
- encoded_query = urllib.parse.quote(cleaned_query + " AND free full text[filter]")
 
 
287
 
288
- # Base URL for E-utilities
289
- base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
 
 
 
 
 
290
 
291
- # Search parameters - specifically targeting PMC for open access articles
292
- search_params = {
293
- "db": "pmc",
294
- "term": encoded_query,
295
- "retmax": max_results,
296
- "retmode": "json",
297
- "sort": "relevance"
298
  }
299
 
300
- # Add API key if provided
301
- if api_key:
302
- search_params["api_key"] = api_key
303
-
304
- try:
305
- # First get article IDs
306
- search_response = requests.get(f"{base_url}esearch.fcgi", params=search_params)
307
-
308
- if search_response.status_code != 200:
309
- return []
310
-
311
- search_data = search_response.json()
312
-
313
- if "esearchresult" in search_data and "idlist" in search_data["esearchresult"]:
314
- ids = search_data["esearchresult"]["idlist"]
315
-
316
- if ids:
317
- # Fetch article details
318
- fetch_params = {
319
- "db": "pmc",
320
- "id": ",".join(ids),
321
- "retmode": "xml"
322
- }
323
- if api_key:
324
- fetch_params["api_key"] = api_key
325
-
326
- fetch_response = requests.get(f"{base_url}efetch.fcgi", params=fetch_params)
327
-
328
- if fetch_response.status_code != 200:
329
- return []
330
-
331
- try:
332
- # Parse XML response for PMC articles
333
- root = ET.fromstring(fetch_response.text)
334
-
335
- for article in root.findall(".//article"):
336
- try:
337
- # Get PMC ID
338
- article_id_elements = article.findall(".//article-id")
339
- pmc_id = None
340
- for id_elem in article_id_elements:
341
- if id_elem.get("pub-id-type") == "pmc":
342
- pmc_id = id_elem.text
343
-
344
- if not pmc_id:
345
- continue
346
-
347
- # Get article title
348
- title_elem = article.find(".//article-title")
349
- title = "".join(title_elem.itertext()) if title_elem is not None else "No title available"
350
-
351
- # Extract abstract
352
- abstract_elem = article.find(".//abstract")
353
- abstract = ""
354
- if abstract_elem is not None:
355
- for p in abstract_elem.findall(".//p"):
356
- abstract += " ".join(p.itertext()) + " "
357
-
358
- # If no abstract, try to get from first paragraphs
359
- if not abstract:
360
- body = article.find(".//body")
361
- if body is not None:
362
- paragraphs = body.findall(".//p")
363
- abstract = " ".join([" ".join(p.itertext()) for p in paragraphs[:3]])
364
-
365
- # Extract journal and date information
366
- journal_elem = article.find(".//journal-title")
367
- journal = "".join(journal_elem.itertext()) if journal_elem is not None else "PMC Journal"
368
-
369
- year_elem = article.find(".//pub-date/year")
370
- year = year_elem.text if year_elem is not None else "N/A"
371
-
372
- # Extract authors
373
- authors = []
374
- for contrib in article.findall(".//contrib[@contrib-type='author']"):
375
- surname = contrib.find(".//surname")
376
- given_names = contrib.find(".//given-names")
377
- if surname is not None and given_names is not None:
378
- authors.append(f"{surname.text} {given_names.text[0] if given_names.text else ''}")
379
-
380
- author_str = ", ".join(authors[:3])
381
- if len(authors) > 3:
382
- author_str += " et al."
383
-
384
- # Create citation
385
- citation = f"{author_str}. ({year}). {title}. {journal}. PMC{pmc_id}"
386
-
387
- # Create URL for direct access to full text
388
- url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC{pmc_id}/"
389
-
390
- results.append({
391
- "id": f"pmc:{pmc_id}",
392
- "title": title,
393
- "text": abstract[:800] + "..." if len(abstract) > 800 else abstract,
394
- "citation": citation,
395
- "url": url,
396
- "source_type": "PubMed Central (Open Access)",
397
- "is_open_access": True
398
- })
399
- except Exception:
400
- continue
401
- except ET.ParseError:
402
- return []
403
-
404
- return results
405
- except Exception:
406
- return []
407
-
408
- def fetch_from_who_api(query, max_results=1):
409
- """Fetch information from WHO guidelines - using web scraping as alternative to API"""
410
- try:
411
- # WHO search URL (as they don't have a public API, we use web scraping)
412
- search_url = f"https://www.who.int/publications/search-results?indexTerms={query.replace(' ', '+')}"
413
- response = requests.get(search_url)
414
-
415
- if response.status_code == 200:
416
- soup = BeautifulSoup(response.text, 'html.parser')
417
- results = []
418
-
419
- # Extract article information
420
- articles = soup.select('.search-results article')[:max_results]
421
-
422
- for article in articles:
423
- title_elem = article.select_one('h3')
424
- title = title_elem.text.strip() if title_elem else "WHO Guideline"
425
-
426
- desc_elem = article.select_one('.search-description')
427
- description = desc_elem.text.strip() if desc_elem else ""
428
-
429
- link_elem = article.select_one('a')
430
- link = "https://www.who.int" + link_elem['href'] if link_elem and 'href' in link_elem.attrs else ""
431
-
432
- date_elem = article.select_one('.search-meta')
433
- date = date_elem.text.strip() if date_elem else "Date not specified"
434
-
435
- # Generate a unique ID based on the URL
436
- who_id = link.split('/')[-1] if link else f"who-{uuid.uuid4().hex[:8]}"
437
-
438
- results.append({
439
- "id": f"who:{who_id}",
440
- "title": title,
441
- "text": description[:800] + "..." if len(description) > 800 else description,
442
- "citation": f"World Health Organization. ({date}). {title}.",
443
- "url": link,
444
- "source_type": "WHO Guidelines",
445
- "is_open_access": True # WHO guidelines are freely accessible
446
- })
447
-
448
- return results
449
- return []
450
- except Exception:
451
- return []
452
-
453
- def fetch_from_core_api(query, max_results=2, api_key=None):
454
- """Fetch open access research papers from CORE API"""
455
- results = []
456
 
457
- # Clean up the query for better results
458
- cleaned_query = re.sub(r'^(hi|hello|hey|greetings|good morning|good afternoon|good evening)[,\.]?\s+', '', query.lower())
459
- cleaned_query = re.sub(r"(i'?m|i am)\s+a\s+\d+[-\s]year[-\s]old", '', cleaned_query)
460
- cleaned_query = re.sub(r'(my name is|i am|i have been|i\'ve been|i was|i have|i\'ve had|i feel|i\'m feeling|i experienced)', '', cleaned_query)
461
 
462
- # Extract medical terms for better search
463
- symptom_patterns = [
464
- r'(muscle weakness)', r'(fatigue)', r'(rash)', r'(pain)', r'(swelling)',
465
- r'(difficulty breathing|shortness of breath)', r'(fever)', r'(headache)',
466
- r'(nausea|vomiting)', r'(dizziness)', r'(numbness)', r'(tingling)'
467
- ]
468
 
469
- medical_terms = []
470
- for pattern in symptom_patterns:
471
- matches = re.findall(pattern, query.lower())
472
- if matches:
473
- medical_terms.extend(matches)
474
 
475
- # If we found medical terms, enhance the query
476
- if medical_terms:
477
- search_query = cleaned_query + " " + " ".join(medical_terms)
478
- else:
479
- search_query = cleaned_query
 
 
 
 
480
 
481
- # Base URL for CORE API
482
- base_url = "https://core.ac.uk/api/v3/search/works"
 
 
483
 
484
- # Search parameters with medical focus
485
- search_params = {
486
- "q": search_query,
487
- "limit": max_results * 2, # Get more results to filter for the best ones
488
- "offset": 0,
489
- "fields": ["title", "abstract", "authors", "year", "downloadUrl", "sourceFulltextUrl", "doi", "fullText"]
 
490
  }
491
 
492
- # Headers with API key
493
- headers = {
494
- "Authorization": f"Bearer {api_key}" if api_key else None,
495
- "Content-Type": "application/json"
496
  }
497
 
498
- try:
499
- response = requests.post(base_url, json=search_params, headers=headers)
500
-
501
- if response.status_code != 200:
502
- return []
503
-
504
- data = response.json()
505
-
506
- if "results" in data:
507
- filtered_articles = []
508
-
509
- # First pass: Collect and score all articles
510
- for article in data["results"]:
511
- try:
512
- # Score articles for relevance (higher is better)
513
- score = 0
514
-
515
- # Has downloadUrl or sourceFulltextUrl (direct access)
516
- if article.get("downloadUrl") or article.get("sourceFulltextUrl"):
517
- score += 3
518
-
519
- # Has full text in the response
520
- if article.get("fullText"):
521
- score += 2
522
-
523
- # Has abstract
524
- if article.get("abstract") and len(article.get("abstract")) > 100:
525
- score += 1
526
-
527
- # Medical relevance - check title and abstract for medical terms
528
- for term in medical_terms:
529
- if term in (article.get("title", "") + article.get("abstract", "")).lower():
530
- score += 2
531
-
532
- # Store with score for later filtering
533
- filtered_articles.append((score, article))
534
-
535
- except Exception:
536
- continue
537
-
538
- # Sort by score (highest first) and take the top results
539
- filtered_articles.sort(reverse=True, key=lambda x: x[0])
540
- top_articles = [article for score, article in filtered_articles[:max_results]]
541
-
542
- # Second pass: Process the top articles in detail
543
- for article in top_articles:
544
- try:
545
- # Extract article information
546
- title = article.get("title", "No title available")
547
- abstract = article.get("abstract", "")
548
-
549
- # Try to use full text if available, otherwise use abstract
550
- full_text = article.get("fullText", "")
551
- text_content = ""
552
-
553
- if full_text:
554
- # If full text is available, use a summarized version (first part)
555
- text_content = f"[FULL TEXT AVAILABLE] {full_text[:1500]}..."
556
- else:
557
- # Use abstract if no full text
558
- text_content = abstract
559
-
560
- authors = article.get("authors", [])
561
- year = article.get("year", "N/A")
562
-
563
- # Format authors
564
- author_str = ", ".join([f"{author.get('name', '')}" for author in authors[:3]])
565
- if len(authors) > 3:
566
- author_str += " et al."
567
-
568
- # Get the best available URL - prioritize direct download links
569
- url = ""
570
- download_available = False
571
-
572
- if article.get("downloadUrl"):
573
- url = article.get("downloadUrl")
574
- download_available = True
575
- elif article.get("sourceFulltextUrl"):
576
- url = article.get("sourceFulltextUrl")
577
- download_available = True
578
- elif article.get("doi"):
579
- url = f"https://doi.org/{article.get('doi')}"
580
-
581
- # Create citation
582
- citation = f"{author_str}. ({year}). {title}."
583
- if article.get("doi"):
584
- citation += f" DOI: {article['doi']}"
585
-
586
- # Generate a unique ID
587
- core_id = article.get("id", str(uuid.uuid4()))
588
-
589
- # Create source type with clarity about data availability
590
- source_type = "CORE Open Access"
591
- if download_available:
592
- source_type += " (Full Text Available)"
593
- elif full_text:
594
- source_type += " (Full Text Excerpt Included)"
595
- else:
596
- source_type += " (Abstract Only)"
597
-
598
- results.append({
599
- "id": f"core:{core_id}",
600
- "title": title,
601
- "text": text_content[:800] + "..." if len(text_content) > 800 else text_content,
602
- "citation": citation,
603
- "url": url,
604
- "source_type": source_type,
605
- "is_open_access": True # All CORE articles are open access
606
- })
607
- except Exception:
608
- continue
609
-
610
- return results
611
- except Exception:
612
- return []
613
-
614
- # Enhanced RAG System with real medical sources
615
- def fetch_medical_evidence(query, max_results=5):
616
- """Fetch medical evidence from multiple sources using real APIs"""
617
- results = []
618
 
619
- # Define API keys
620
- pubmed_api_key = os.environ.get("PUBMED_API_KEY")
621
- core_api_key = os.environ.get("CORE_API_KEY")
 
 
 
 
 
622
 
623
- # Source 1: PubMed API - prioritize for relevant medical research
624
- pubmed_results = fetch_from_pubmed_api(query, max_results=max(2, max_results//2), api_key=pubmed_api_key)
625
- if pubmed_results:
626
- results.extend(pubmed_results)
 
627
 
628
- # Source 2: PubMed Central - free full text articles
629
- if len(results) < max_results:
630
- remaining = max_results - len(results)
631
- pmc_results = fetch_from_pmc_api(query, max_results=remaining, api_key=pubmed_api_key)
632
- if pmc_results:
633
- results.extend(pmc_results)
 
 
 
 
 
 
 
 
634
 
635
- # Source 3: CORE API - open access research papers
636
- if len(results) < max_results:
637
- remaining = max_results - len(results)
638
- core_results = fetch_from_core_api(query, max_results=remaining, api_key=core_api_key)
639
- if core_results:
640
- results.extend(core_results)
 
 
 
 
641
 
642
- # Source 4: WHO Guidelines - if still need more results
643
- if len(results) < max_results:
644
- remaining = max_results - len(results)
645
- who_results = fetch_from_who_api(query, max_results=remaining)
646
- if who_results:
647
- results.extend(who_results)
648
 
649
- # Prioritize sources with full text for better diagnosis
650
- results.sort(key=lambda x: (
651
- "Full Text" in x.get("source_type", ""),
652
- "CORE" in x.get("source_type", ""),
653
- "PMC" in x.get("source_type", ""),
654
- "PubMed" in x.get("source_type", "")
655
- ), reverse=True)
 
 
 
656
 
657
- return results[:max_results] # Limit to requested number after sorting
658
-
659
- # Function to parse doctor agent responses
660
- def parse_doctor_response(response_text):
661
- """Parse the doctor agent's response into structured components"""
662
- # Initialize structure
663
- parsed = {
664
- "main_response": response_text,
665
- "diagnosis": "",
666
- "treatment": "",
667
- "reasoning": [],
668
- "sources": []
669
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
670
 
671
- # Try to extract diagnosis
672
- diagnosis_match = re.search(r'(?i)diagnosis:?\s*(.*?)(?:\n\n|\n[A-Z]|\Z)', response_text, re.DOTALL)
673
- if diagnosis_match:
674
- parsed["diagnosis"] = diagnosis_match.group(1).strip()
675
-
676
- # Try to extract treatment/recommendations
677
- treatment_match = re.search(r'(?i)(treatment|recommendations|plan):?\s*(.*?)(?:\n\n|\n[A-Z]|\Z)', response_text, re.DOTALL)
678
- if treatment_match:
679
- parsed["treatment"] = treatment_match.group(2).strip()
680
-
681
- # Try to extract reasoning if present
682
- reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
683
- if reasoning_match:
684
- reasoning_text = reasoning_match.group(1).strip()
685
- # Split into bullet points if present
686
- if '\n-' in reasoning_text:
687
- parsed["reasoning"] = [item.strip() for item in reasoning_text.split('\n-') if item.strip()]
688
- # Clean up first item which might not have a dash
689
- if parsed["reasoning"]:
690
- parsed["reasoning"][0] = parsed["reasoning"][0].lstrip('- ')
691
- else:
692
- parsed["reasoning"] = [reasoning_text]
693
-
694
- # Extract sources/references
695
- sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
696
- if sources_match:
697
- sources_text = sources_match.group(2).strip()
698
- # Split into individual sources
699
- if '\n' in sources_text:
700
- parsed["sources"] = [item.strip() for item in sources_text.split('\n') if item.strip()]
701
- else:
702
- parsed["sources"] = [sources_text]
703
-
704
- # Extract citations in the text (format: [source_id])
705
- citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
706
- for citation in citation_matches:
707
- if citation not in parsed["sources"]:
708
- parsed["sources"].append(citation)
709
-
710
- return parsed
711
-
712
- # Enhanced Doctor Agent call with structured output
713
- def doctor_agent(messages):
714
- """Call the LLM to get a structured response using OpenAI API v0.28.1"""
715
- try:
716
- response = openai.ChatCompletion.create(
717
- model="gpt-4o-mini",
718
- messages=messages,
719
- temperature=0.3
720
- )
721
- return response.choices[0].message['content']
722
- except Exception as e:
723
- return f"I'm sorry, there was an error processing your request. Please try again. Error: {str(e)}"
724
-
725
- # Single orchestrator turn with enhanced reasoning and citation tracking
726
- def orchestrator_chat(history, query, use_rag, is_follow_up=False):
727
- """Handle a single turn of conversation with the doctor agent"""
728
- # Select appropriate system prompt based on whether this is a follow-up
729
- if is_follow_up:
730
- system = {"role": "system", "content": FOLLOW_UP_PROMPT}
731
  else:
732
- system = {"role": "system", "content": SYSTEM_PROMPT}
733
-
734
- msgs = [system] + history
735
-
736
- # Evidence gathering
737
- evidence_snippets = []
738
- if use_rag:
739
- # Only fetch and format evidence if RAG is enabled
740
- evidence_snippets = fetch_medical_evidence(query)
741
-
742
- # Format evidence for the model
743
- if evidence_snippets:
744
- evidence_text = "MEDICAL EVIDENCE FROM AUTHORITATIVE SOURCES:\n\n"
745
-
746
- for i, snippet in enumerate(evidence_snippets):
747
- evidence_text += f"[{snippet['id']}] {snippet['title']}\n"
748
- evidence_text += f"Source: {snippet['source_type']}\n"
749
- evidence_text += f"Content: {snippet['text']}\n"
750
- evidence_text += f"Citation: {snippet['citation']}\n"
751
- evidence_text += f"URL: {snippet['url']}\n\n"
752
-
753
- # Enhanced instructions for better source utilization
754
- evidence_text += """CITATION INSTRUCTIONS:
755
- 1. When referencing these sources in your response, use the format [source_id] to cite them.
756
- 2. Prioritize information from sources marked with "Full Text Available" as they provide more comprehensive data.
757
- 3. CORE API sources provide open access full text articles that are particularly valuable for diagnosis.
758
- 4. Use the most relevant medical evidence to support your diagnostic reasoning.
759
- 5. Try to cite multiple sources to provide a well-rounded assessment.
760
- """
761
-
762
- msgs.append({"role": "system", "content": evidence_text})
763
  else:
764
- # If no evidence was found, inform the model
765
- no_evidence_msg = ("Note: No specific medical evidence was found in our databases for this query. "
766
- "Please rely on your general medical knowledge and be sure to recommend "
767
- "appropriate diagnostic steps and medical consultation.")
768
- msgs.append({"role": "system", "content": no_evidence_msg})
769
-
770
- # Add instructions for structured output
771
- if use_rag:
772
- output_instructions = """
773
- Please structure your response clearly.
774
-
775
- **Priority 1: Ask Clarifying Questions**
776
- If the user's query lacks detail for a proper assessment (e.g., age, specific symptoms, medical history, duration, severity), your HIGHEST priority is to ask these questions first. Do not provide a diagnosis or plan until sufficient information is gathered.
777
-
778
- **Priority 2: Main Response (After Clarification)**
779
- Once sufficient information is available (either initially or after asking questions), provide:
780
- 1. A direct answer to the patient's concerns.
781
- 2. If appropriate, a clear diagnosis or differential diagnosis.
782
- 3. Recommendations for a treatment plan or next steps.
783
- 4. Ensure you cite medical evidence using the [source_id] format for any claims or information taken from the provided MEDICAL EVIDENCE snippets.
784
-
785
- **After your main response, ALWAYS include these sections:**
786
- - **Reasoning**: Bullet points detailing your clinical reasoning.
787
- - **Sources**: A list of all references cited in your main response, using their full titles and corresponding URLs if they were linked (e.g., [Title of Source](URL)). If a source was just an ID without a direct link in the text, list its ID or citation.
788
- """
789
- else:
790
- # Different instructions when RAG is disabled - no mention of sources or citations
791
- output_instructions = """
792
- Please structure your response clearly.
793
-
794
- **Priority 1: Ask Clarifying Questions**
795
- If the user's query lacks detail for a proper assessment (e.g., age, specific symptoms, medical history, duration, severity), your HIGHEST priority is to ask these questions first. Do not provide a diagnosis or plan until sufficient information is gathered.
796
-
797
- **Priority 2: Main Response (After Clarification)**
798
- Once sufficient information is available (either initially or after asking questions), provide:
799
- 1. A direct answer to the patient's concerns.
800
- 2. If appropriate, a clear diagnosis or differential diagnosis.
801
- 3. Recommendations for a treatment plan or next steps.
802
-
803
- **After your main response, ALWAYS include this section:**
804
- - **Reasoning**: Bullet points detailing your clinical reasoning.
805
-
806
- IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
807
- """
808
-
809
- msgs.append({"role": "system", "content": output_instructions})
810
- msgs.append({"role": "user", "content": query})
811
-
812
- # Get response from doctor agent
813
- response = doctor_agent(msgs)
814
-
815
- # Process the response based on whether RAG is enabled
816
- if use_rag:
817
- # Process the response to replace source placeholders with actual links
818
- linked_response, source_map = extract_and_link_sources(response, evidence_snippets)
819
-
820
- # Parse the response
821
- parsed_response = parse_doctor_response(linked_response)
822
-
823
- # Enhance source information with evidence snippets data
824
- enhanced_sources = []
825
- # Use the source_map from extract_and_link_sources as the primary guide for cited sources
826
- for source_id_key, mapped_data in source_map.items():
827
- enhanced_sources.append({
828
- "id": mapped_data["id"], # This is the original ID from the snippet
829
- "title": mapped_data["title"],
830
- "citation": mapped_data["citation"],
831
- "url": mapped_data["url"],
832
- "source_type": "Referenced Source" # Or derive from snippet if available
833
- })
834
-
835
- # Get source types and open access status from original snippets
836
- for es in enhanced_sources:
837
- for snippet in evidence_snippets:
838
- if es["id"] == snippet["id"]:
839
- es["source_type"] = snippet.get("source_type", "Referenced Source")
840
- es["is_open_access"] = snippet.get("is_open_access", False)
841
- break
842
 
843
- # If there are sources in parsed_response["sources"] that are not in source_map
844
- # (e.g., LLM hallucinated an ID or cited something not in snippets), add them.
845
- current_enhanced_ids = {es['id'] for es in enhanced_sources}
 
 
 
 
846
 
847
- for source_text in parsed_response["sources"]: # source_text could be "[id]", "title (url)", or just "citation"
848
- source_id_candidate = source_text.strip("[]") # Basic extraction
849
-
850
- # Check if this source_id_candidate was part of the original evidence
851
- found_in_evidence = False
852
- for snippet in evidence_snippets:
853
- if source_id_candidate == snippet["id"]:
854
- if source_id_candidate not in current_enhanced_ids:
855
- enhanced_sources.append({
856
- "id": snippet["id"],
857
- "title": snippet["title"],
858
- "citation": snippet["citation"],
859
- "url": snippet["url"],
860
- "source_type": snippet["source_type"],
861
- "is_open_access": snippet.get("is_open_access", False)
862
- })
863
- current_enhanced_ids.add(snippet["id"]) # Add to set to avoid re-adding
864
- found_in_evidence = True
865
- break
866
-
867
- if not found_in_evidence:
868
- # If it's not in source_map and not directly in evidence_snippets by a simple ID match,
869
- # it might be a raw citation or a URL. Add it with available info.
870
- is_duplicate = False
871
- for es_item in enhanced_sources:
872
- if es_item["title"] == source_text or es_item["url"] == source_text or es_item["citation"] == source_text:
873
- is_duplicate = True
874
- break
875
- if not is_duplicate and source_text not in current_enhanced_ids:
876
- # Try to extract a URL if present in markdown format
877
- url_match = re.search(r'\[(.*?)\]\((https?://[^)]+)\)', source_text)
878
- if url_match:
879
- title = url_match.group(1)
880
- url = url_match.group(2)
881
- else:
882
- title = source_text # Could be a citation string or a plain title
883
- url = "" # No URL found directly
884
-
885
- enhanced_sources.append({
886
- "id": source_id_candidate, # Use the candidate, might be a simple title or part of citation
887
- "title": title,
888
- "citation": source_text, # The original text from LLM's source list
889
- "url": url,
890
- "source_type": "Referenced Source (uncategorized)"
891
- })
892
- current_enhanced_ids.add(source_id_candidate)
893
-
894
- # Add the enhanced sources back to the parsed response
895
- parsed_response["enhanced_sources"] = enhanced_sources
896
- main_response = linked_response
897
- else:
898
- # If RAG is disabled, just parse the response without source processing
899
- parsed_response = parse_doctor_response(response)
900
- parsed_response["enhanced_sources"] = []
901
- main_response = response
902
-
903
- # Create detailed explanation with reasoning and sources
904
- explanation = []
905
-
906
- # Add reasoning section
907
- if parsed_response["reasoning"]:
908
- explanation.append("## REASONING")
909
- for i, reason in enumerate(parsed_response["reasoning"]):
910
- explanation.append(f"{i+1}. {reason}")
911
- explanation.append("")
912
-
913
- # Only add sources section if RAG is enabled
914
- if use_rag and parsed_response["enhanced_sources"]:
915
- explanation.append("## SOURCES USED")
916
-
917
- # Add enhanced sources first (these are the ones actually cited in the response)
918
- source_added_count = 0
919
 
920
- unique_sources_for_display = {} # id: {title, url, citation, source_type}
921
- for source in parsed_response["enhanced_sources"]:
922
- # Prefer using the mapped title and URL from extract_and_link_sources if available
923
- display_id = source.get('id', source.get('title', 'Unknown Source'))
924
-
925
- if display_id not in unique_sources_for_display:
926
- unique_sources_for_display[display_id] = {
927
- "title": source.get('title', 'N/A'),
928
- "url": source.get('url', ''),
929
- "citation": source.get('citation', ''),
930
- "source_type": source.get('source_type', 'Referenced Source'),
931
- "is_open_access": source.get('is_open_access', False)
932
- }
933
-
934
- # Create a categorized display of sources
935
- source_categories = {
936
- "CORE": [], # CORE API full text
937
- "PMC": [], # PubMed Central full text
938
- "PubMed": [], # PubMed abstracts
939
- "WHO": [], # WHO guidelines
940
- "Other": [] # Uncategorized
941
- }
942
 
943
- # Categorize sources
944
- for key, src_data in unique_sources_for_display.items():
945
- source_type = src_data['source_type']
946
-
947
- if "CORE" in source_type:
948
- source_categories["CORE"].append((key, src_data))
949
- elif "PMC" in source_type:
950
- source_categories["PMC"].append((key, src_data))
951
- elif "PubMed" in source_type:
952
- source_categories["PubMed"].append((key, src_data))
953
- elif "WHO" in source_type:
954
- source_categories["WHO"].append((key, src_data))
955
- else:
956
- source_categories["Other"].append((key, src_data))
957
-
958
- # Display sources by category
959
- for category, sources in source_categories.items():
960
- if sources:
961
- if category != "Other": # Skip category header for Other
962
- explanation.append(f"### {category} Sources:")
963
 
964
- for key, src_data in sources:
965
- title = src_data['title']
966
- url = src_data['url']
967
- is_open_access = src_data.get('is_open_access', False)
968
-
969
- if url: # If URL exists, make it a markdown link
970
- explanation.append(f"- [{title}]({url}) {' 🔓' if is_open_access else ''}")
971
- else: # Otherwise, just list the title or ID
972
- explanation.append(f"- {title}")
973
-
974
- if src_data['source_type']:
975
- explanation.append(f" Source Type: {src_data['source_type']}")
976
- if src_data['citation']: # Always show citation if available
977
- explanation.append(f" Citation: {src_data['citation']}")
978
- explanation.append("") # Add a blank line for spacing
979
- source_added_count += 1
980
-
981
- if source_added_count == 0 and parsed_response["sources"]: # Fallback to raw sources if enhanced list is empty but LLM listed some
982
- explanation.append("## SOURCES MENTIONED (Raw)") # Indicate these are less processed
983
- for source_text in parsed_response["sources"]:
984
- explanation.append(f"- {source_text.strip()}")
985
- explanation.append("")
986
- source_added_count +=1
987
-
988
- # If we still have no sources, remove the header
989
- if source_added_count == 0: # Check if any sources were actually added to explanation
990
- # Remove "## SOURCES USED" header if it was added but no sources followed
991
- if explanation and explanation[-1] == "## SOURCES USED":
992
- explanation.pop()
993
-
994
- # Enhanced version to display clickable article links
995
- # Check if we have evidence snippets but no sources in the explanation
996
- if evidence_snippets and "## SOURCES USED" not in "\n".join(explanation):
997
- # If AI didn't explicitly cite sources, show available evidence anyway
998
- additional_explanation = ["\n## AVAILABLE MEDICAL SOURCES"]
999
-
1000
- # Create categorized display of all available sources
1001
- categorized_snippets = {
1002
- "CORE Open Access": [], # CORE API full text
1003
- "PubMed Central": [], # PMC full text
1004
- "PubMed": [], # PubMed abstracts
1005
- "WHO Guidelines": [], # WHO guidelines
1006
- "Other": [] # Uncategorized
1007
- }
1008
-
1009
- # Categorize snippets
1010
- for snippet in evidence_snippets:
1011
- source_type = snippet.get("source_type", "")
1012
-
1013
- if "CORE" in source_type:
1014
- categorized_snippets["CORE Open Access"].append(snippet)
1015
- elif "PMC" in source_type:
1016
- categorized_snippets["PubMed Central"].append(snippet)
1017
- elif "PubMed" in source_type and "PMC" not in source_type:
1018
- categorized_snippets["PubMed"].append(snippet)
1019
- elif "WHO" in source_type:
1020
- categorized_snippets["WHO Guidelines"].append(snippet)
1021
- else:
1022
- categorized_snippets["Other"].append(snippet)
1023
-
1024
- # Display snippets by category
1025
- for category, snippets in categorized_snippets.items():
1026
- if snippets:
1027
- if category != "Other": # Skip category header for Other
1028
- additional_explanation.append(f"### {category}:")
1029
-
1030
- for snippet in snippets:
1031
- title = snippet.get("title", "Unknown Title")
1032
- url = snippet.get("url", "")
1033
- source_type = snippet.get("source_type", "Medical Source")
1034
- is_open_access = snippet.get("is_open_access", False)
1035
-
1036
- if url:
1037
- # Format as clickable markdown link with open access indicator
1038
- additional_explanation.append(f"- [{title}]({url}) {' 🔓' if is_open_access else ''}")
1039
- else:
1040
- additional_explanation.append(f"- {title} {' 🔓' if is_open_access else ''}")
1041
-
1042
- if "source_type" in snippet:
1043
- additional_explanation.append(f" Source Type: {snippet['source_type']}")
1044
- if "citation" in snippet:
1045
- additional_explanation.append(f" Citation: {snippet['citation']}")
1046
- additional_explanation.append("")
1047
-
1048
- # Add to the main explanation
1049
- explanation.extend(additional_explanation)
1050
-
1051
- # Add a note about data availability
1052
- data_availability_note = [
1053
- "\n## DATA AVAILABILITY NOTE",
1054
- "- PubMed sources typically provide abstracts only, unless marked as free full text",
1055
- "- PubMed Central (PMC) sources provide complete free full text articles",
1056
- "- CORE Open Access sources provide full text content from research repositories",
1057
- "- WHO Guidelines provide official medical recommendations and protocols",
1058
- "- Sources marked with 🔓 indicate open access content with full text available"
1059
- ]
1060
- explanation.extend(data_availability_note)
1061
-
1062
- # Format explanation as string
1063
- explanation_text = "\n".join(explanation)
1064
-
1065
- # Update conversation history
1066
- history.append({"role": "user", "content": query})
1067
- history.append({"role": "assistant", "content": main_response})
1068
-
1069
- return main_response, explanation_text, evidence_snippets
1070
-
1071
- # Enhanced interactive loop with better handling of consultations
1072
- def run_consultation(use_rag=True):
1073
- """Run an interactive medical consultation"""
1074
- history = []
1075
- print("\n===== MEDICAL AI ASSISTANT =====")
1076
- print("Type 'exit' to end or 'next' for a new case.\n")
1077
-
1078
- if use_rag:
1079
- print("Using medical evidence from: PubMed, PMC, CORE, and WHO")
1080
- print("Sources marked with 🔓 provide full text access\n")
1081
-
1082
- consultation_id = str(uuid.uuid4())[:8]
1083
- print(f"Consultation ID: {consultation_id}")
1084
-
1085
- query = input("\nYou: ")
1086
- while query.lower() != "exit":
1087
- # Track if this is a follow-up question
1088
- is_follow_up = len(history) > 0
1089
-
1090
- # Inform user that evidence is being fetched if RAG is enabled
1091
- if use_rag:
1092
- print("\nSearching medical databases...")
1093
-
1094
- # Process query
1095
- reply, explanation, evidence = orchestrator_chat(history, query, use_rag, is_follow_up)
1096
-
1097
- # Display the AI response
1098
- print("\n" + "=" * 30)
1099
- print("AI RESPONSE")
1100
- print("=" * 30)
1101
- print(reply)
1102
-
1103
- # Always show explanation/reasoning
1104
- print("\n" + "=" * 30)
1105
- print("DETAILED EXPLANATION")
1106
- print("=" * 30)
1107
- # Ensure explanation is not empty before printing, or print a default message
1108
- if explanation and explanation.strip() and explanation.strip() != "="*50:
1109
- print(explanation)
1110
- else:
1111
- print("No detailed explanation or sources were generated for this response.")
1112
-
1113
- # Add Open Access Legend if evidence sources were found
1114
- if evidence:
1115
- print("\nLEGEND: 🔓 = Open Access (full text available)")
1116
-
1117
- # Check if we need to continue with follow-up or start a new case
1118
- next_action = input("\nFollow-up? (or 'next' for new case, 'exit' to end): ")
1119
-
1120
- if next_action.lower() == "exit":
1121
- break
1122
- elif next_action.lower() == "next":
1123
- # Start a new consultation
1124
- history = []
1125
- consultation_id = str(uuid.uuid4())[:8]
1126
- print(f"\nNew Consultation ID: {consultation_id}")
1127
- query = input("\nYou: ")
1128
- else:
1129
- # Continue with follow-up
1130
- query = next_action
1131
-
1132
- print("\nConsultation ended.")
1133
-
1134
- # Save consultation to file
1135
- def save_consultation(history, consultation_id):
1136
- """Save the consultation history to a file"""
1137
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1138
- filename = f"consultation_{consultation_id}_{timestamp}.json"
1139
-
1140
- with open(filename, 'w') as f:
1141
- json.dump(history, f, indent=2)
1142
-
1143
- print(f"Consultation saved to {filename}")
1144
-
1145
- # Main entry point
1146
- if __name__ == "__main__":
1147
- print("\nInitializing Medical AI Assistant...")
1148
- run_consultation(use_rag=True)
 
1
+ import streamlit as st
2
  import uuid
 
 
 
 
3
  from datetime import datetime
4
+ import json
5
  import os
6
+ import re
7
+ from model import (
8
+ orchestrator_chat,
9
+ fetch_medical_evidence,
10
+ extract_and_link_sources,
11
+ parse_doctor_response
12
+ )
13
+
14
+ # Set page config with dark theme
15
+ st.set_page_config(
16
+ page_title="Medical AI Assistant",
17
+ page_icon=None,
18
+ layout="wide",
19
+ initial_sidebar_state="collapsed"
20
+ )
21
+
22
+ # Custom CSS for styling with purple->teal gradient and dark mode
23
+ st.markdown("""
24
+ <style>
25
+ /* Dark mode with purple->teal gradient */
26
+ body {
27
+ background-color: #121212;
28
+ color: #f0f0f0;
29
+ font-family: 'DM Sans', sans-serif;
30
+ letter-spacing: -0.02em;
31
+ line-height: 1.4;
32
+ margin: 0;
33
+ padding: 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
 
36
+ /* Main container styling */
37
+ .main {
38
+ background: linear-gradient(135deg, rgba(122, 95, 255, 0.05), rgba(0, 209, 178, 0.05));
39
+ padding: 2rem;
40
+ max-width: 100%;
41
+ border-radius: 16px;
42
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
+ /* Header & sidebar styling */
45
+ .css-1v3fvcr, .css-1vq4p4l {
46
+ background-color: #121212;
47
+ }
48
 
49
+ /* Chat container */
50
+ .stChatFloatingInputContainer {
51
+ border-radius: 16px;
52
+ box-shadow: 0 4px 20px rgba(0, 0, 0, 0.2);
53
+ padding: 8px;
54
+ background-color: #1e1e24;
55
+ }
56
 
57
+ /* Chat input */
58
+ .stChatInputContainer textarea {
59
+ border-radius: 12px;
60
+ padding: 12px;
61
+ background-color: #2b2b36;
62
+ border: 1px solid rgba(122, 95, 255, 0.2);
63
+ color: #f0f0f0;
64
  }
65
 
66
+ /* Chat messages */
67
+ .chat-message {
68
+ padding: 1.5rem;
69
+ border-radius: 16px;
70
+ margin-bottom: 1rem;
71
+ display: flex;
72
+ flex-direction: column;
73
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
74
+ transition: all 0.2s ease-in-out;
75
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ .chat-message:hover {
78
+ box-shadow: 0 6px 16px rgba(0, 0, 0, 0.15);
79
+ }
 
80
 
81
+ .chat-message.user {
82
+ background-color: #2b313e;
83
+ border-left: 3px solid #7A5FFF;
84
+ }
 
 
85
 
86
+ .chat-message.assistant {
87
+ background-color: #1e1e2e;
88
+ border-left: 3px solid #00D1B2;
89
+ }
 
90
 
91
+ /* Buttons and toggles */
92
+ .stButton>button, .stToggle>label {
93
+ border-radius: 12px;
94
+ padding: 8px 16px;
95
+ background: linear-gradient(135deg, #7A5FFF, #00D1B2);
96
+ color: white;
97
+ border: none;
98
+ transition: all 0.2s ease;
99
+ }
100
 
101
+ .stButton>button:hover, .stToggle>label:hover {
102
+ transform: translateY(-2px);
103
+ box-shadow: 0 4px 12px rgba(122, 95, 255, 0.4);
104
+ }
105
 
106
+ /* Expander/Dropdown */
107
+ .streamlit-expanderHeader {
108
+ border-radius: 12px;
109
+ background-color: #2b2b36;
110
+ border: 1px solid rgba(0, 209, 178, 0.2);
111
+ color: #f0f0f0;
112
+ font-weight: 500;
113
  }
114
 
115
+ .streamlit-expanderContent {
116
+ background-color: #1e1e24;
117
+ border-radius: 0 0 12px 12px;
118
+ padding: 12px;
119
  }
120
 
121
+ /* Hide empty elements and default header */
122
+ .element-container:has(h1:empty) {
123
+ display: none;
124
+ }
125
+ .block-container {
126
+ padding-top: 1rem;
127
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ /* Citations and sources */
130
+ a {
131
+ color: #00D1B2;
132
+ text-decoration: none;
133
+ }
134
+ a:hover {
135
+ text-decoration: underline;
136
+ }
137
 
138
+ /* Italicize uncertainties as per UX spec */
139
+ .uncertainty {
140
+ font-style: italic;
141
+ color: rgba(255, 107, 107, 0.8);
142
+ }
143
 
144
+ /* New toggle switch */
145
+ .toggle-container {
146
+ display: flex;
147
+ align-items: center;
148
+ margin-bottom: 1rem;
149
+ background-color: #1e1e24;
150
+ padding: 8px 16px;
151
+ border-radius: 12px;
152
+ }
153
+ .toggle-label {
154
+ margin-right: 10px;
155
+ font-weight: 500;
156
+ color: #f0f0f0;
157
+ }
158
 
159
+ /* ChatGPT-like toggle style */
160
+ .chatgpt-toggle {
161
+ display: flex;
162
+ align-items: center;
163
+ justify-content: flex-end;
164
+ background-color: rgba(32, 33, 35, 0.5);
165
+ padding: 4px 12px;
166
+ border-radius: 8px;
167
+ margin: 5px 0;
168
+ }
169
 
170
+ .chatgpt-toggle .stToggle>label {
171
+ background: rgba(122, 95, 255, 0.2);
172
+ padding: 4px 10px;
173
+ font-size: 0.8rem;
174
+ }
 
175
 
176
+ /* Bottom controls tray */
177
+ .bottom-controls {
178
+ position: fixed;
179
+ bottom: 0;
180
+ left: 0;
181
+ right: 0;
182
+ z-index: 100;
183
+ background-color: #121212;
184
+ padding-bottom: 10px;
185
+ }
186
 
187
+ /* Legal disclaimer (small and muted) */
188
+ .footer-text {
189
+ font-size: 0.7rem;
190
+ color: rgba(240, 240, 240, 0.5);
191
+ text-align: center;
192
+ padding: 10px;
193
+ position: fixed;
194
+ bottom: 0;
195
+ width: 100%;
196
+ z-index: 99;
 
 
197
  }
198
+ </style>
199
+ """, unsafe_allow_html=True)
200
+
201
+ # Initialize session state
202
+ if 'history' not in st.session_state:
203
+ st.session_state.history = []
204
+ if 'consultation_id' not in st.session_state:
205
+ st.session_state.consultation_id = str(uuid.uuid4())[:8]
206
+ if 'use_rag' not in st.session_state:
207
+ st.session_state.use_rag = True
208
+
209
+ # Helper function to check if explanation has meaningful content
210
+ def has_meaningful_content(text):
211
+ if not text:
212
+ return False
213
+
214
+ # Check if the text is just equal signs or other separators
215
+ stripped_text = text.strip()
216
+ if re.match(r'^[=\-_*]+$', stripped_text.replace('\n', '')):
217
+ return False
218
+
219
+ # Check if the text only contains "## REASONING" with no actual content
220
+ if "## REASONING" in stripped_text and len(stripped_text) < 20:
221
+ return False
222
+
223
+ return True
224
 
225
+ # Display chat history
226
+ for message in st.session_state.history:
227
+ if message["role"] == "user":
228
+ with st.chat_message("user"):
229
+ st.write(message["content"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  else:
231
+ with st.chat_message("assistant"):
232
+ st.markdown(message["content"])
233
+ # Only display the explanation in an expander if it exists AND has actual content
234
+ if message.get("explanation") and has_meaningful_content(message.get("explanation")):
235
+ with st.expander("Show Reasoning"):
236
+ st.markdown(message["explanation"])
237
+ if message.get("evidence"):
238
+ st.markdown("---")
239
+ st.markdown("**Legend:** 🔓 = Open Access (full text available)")
240
+
241
+ # Add spacing at the bottom
242
+ st.markdown("<div style='height: 80px'></div>", unsafe_allow_html=True)
243
+
244
+ # Chat input
245
+ if prompt := st.chat_input("Describe your symptoms or ask a medical question..."):
246
+ # Add user message to chat
247
+ st.session_state.history.append({"role": "user", "content": prompt})
248
+ with st.chat_message("user"):
249
+ st.write(prompt)
250
+
251
+ # Show thinking message
252
+ with st.chat_message("assistant"):
253
+ thinking_placeholder = st.empty()
254
+ if st.session_state.use_rag:
255
+ thinking_placeholder.markdown("🔍 Searching medical databases and analyzing your query...")
 
 
 
 
 
 
256
  else:
257
+ thinking_placeholder.markdown("Analyzing your query...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
+ # Get AI response
260
+ reply, explanation, evidence = orchestrator_chat(
261
+ st.session_state.history[:-1], # Exclude the current message
262
+ prompt,
263
+ use_rag=st.session_state.use_rag,
264
+ is_follow_up=len(st.session_state.history) > 1
265
+ )
266
 
267
+ # Clear thinking message
268
+ thinking_placeholder.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
+ # Display response
271
+ st.markdown(reply)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
+ # Add explanation in an expander ONLY if explanation has meaningful content
274
+ if explanation and has_meaningful_content(explanation):
275
+ with st.expander("Show Reasoning"):
276
+ st.markdown(explanation)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
+ # Add legend if evidence was found
279
+ if evidence:
280
+ st.markdown("---")
281
+ st.markdown("**Legend:** 🔓 = Open Access (full text available)")
282
+
283
+ # Add assistant response to history with all necessary fields
284
+ st.session_state.history.append({
285
+ "role": "assistant",
286
+ "content": reply,
287
+ "explanation": explanation,
288
+ "evidence": evidence if evidence else []
289
+ })
290
+
291
+ # Fixed bottom toggle position - after the chat input
292
+ with st.container():
293
+ # Create bottom controls tray
294
+ st.markdown("<div class='bottom-controls'>", unsafe_allow_html=True)
295
+
296
+ # Add toggle inside the bottom tray
297
+ cols = st.columns([3, 1])
298
+ with cols[1]:
299
+ st.markdown("<div class='chatgpt-toggle'>", unsafe_allow_html=True)
300
+ st.session_state.use_rag = st.toggle("Database Search", value=st.session_state.use_rag,
301
+ help="Toggle to enable or disable medical database search")
302
+ st.markdown("</div>", unsafe_allow_html=True)
303
+ st.markdown("</div>", unsafe_allow_html=True)
304
+
305
+ # Small, unobtrusive legal disclaimer
306
+ st.markdown("""
307
+ <div class="footer-text">
308
+ For informational purposes only. Not a substitute for professional medical advice.
309
+ </div>
310
+ """, unsafe_allow_html=True)