Axcel1 commited on
Commit
0ee5e7e
·
verified ·
1 Parent(s): 2dd2e39

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +808 -0
  2. chapter_retrieval_system_v2.py +865 -0
  3. requirements.txt +0 -0
  4. service_v2.py +462 -0
app.py ADDED
@@ -0,0 +1,808 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import time
4
+ import re
5
+ import threading
6
+ import uvicorn
7
+ import logging
8
+ import os
9
+ import signal
10
+ import sys
11
+ from typing import Dict, List, Optional, Tuple
12
+ from collections import defaultdict
13
+
14
+ # Import your backend modules
15
+ from service_v2 import app as fastapi_app
16
+ from chapter_retrieval_system_v2 import MultiCollectionChapterRetrieval
17
+
18
+ # Configure logging for Spaces
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
22
+ )
23
+ logger = logging.getLogger(__name__)
24
+
25
+ class ICD10SearchInterface:
26
+ def __init__(self, api_base_url: str = "http://127.0.0.1:8000"):
27
+ """Initialize the interface with API base URL"""
28
+ self.api_base_url = api_base_url.rstrip('/')
29
+ self.server_ready = False
30
+ self.max_retries = 30 # Increased for Spaces startup time
31
+
32
+ # ICD-10 code to chapter mapping
33
+ self.code_to_chapter = self._build_code_to_chapter_mapping()
34
+
35
+ def _build_code_to_chapter_mapping(self) -> Dict[str, Dict[str, str]]:
36
+ """Build mapping from ICD-10 code ranges to chapters"""
37
+ return {
38
+ # Chapter I: Certain infectious and parasitic diseases (A00-B99)
39
+ "chapter_1_I": {
40
+ "title": "Certain infectious and parasitic diseases",
41
+ "code_ranges": ["A", "B"],
42
+ "description": "Infectious diseases, parasitic diseases, and related conditions"
43
+ },
44
+
45
+ # Chapter II: Neoplasms (C00-D49)
46
+ "chapter_2_II": {
47
+ "title": "Neoplasms",
48
+ "code_ranges": ["C", "D"],
49
+ "description": "Malignant neoplasms, benign neoplasms, and neoplasms of uncertain behavior"
50
+ },
51
+
52
+ # Chapter III: Diseases of blood and blood-forming organs (D50-D89)
53
+ "chapter_3_III": {
54
+ "title": "Diseases of the blood and blood-forming organs",
55
+ "code_ranges": ["D5", "D6", "D7", "D8"],
56
+ "description": "Anemias, coagulation defects, and other blood disorders"
57
+ },
58
+
59
+ # Chapter IV: Endocrine, nutritional and metabolic diseases (E00-E89)
60
+ "chapter_4_IV": {
61
+ "title": "Endocrine, nutritional and metabolic diseases",
62
+ "code_ranges": ["E"],
63
+ "description": "Diabetes, thyroid disorders, nutritional deficiencies, and metabolic disorders"
64
+ },
65
+
66
+ # Chapter V: Mental and behavioural disorders (F01-F99)
67
+ "chapter_5_V": {
68
+ "title": "Mental and behavioural disorders",
69
+ "code_ranges": ["F"],
70
+ "description": "Mental disorders, substance abuse, and behavioral conditions"
71
+ },
72
+
73
+ # Chapter VI: Diseases of the nervous system (G00-G99)
74
+ "chapter_6_VI": {
75
+ "title": "Diseases of the nervous system",
76
+ "code_ranges": ["G"],
77
+ "description": "Neurological disorders, epilepsy, migraines, and nervous system diseases"
78
+ },
79
+
80
+ # Chapter VII: Diseases of the eye and adnexa (H00-H59)
81
+ "chapter_7_VII": {
82
+ "title": "Diseases of the eye and adnexa",
83
+ "code_ranges": ["H0", "H1", "H2", "H3", "H4", "H5"],
84
+ "description": "Eye diseases, visual disorders, and related conditions"
85
+ },
86
+
87
+ # Chapter VIII: Diseases of the ear and mastoid process (H60-H95)
88
+ "chapter_8_VIII": {
89
+ "title": "Diseases of the ear and mastoid process",
90
+ "code_ranges": ["H6", "H7", "H8", "H9"],
91
+ "description": "Hearing disorders, ear infections, and mastoid conditions"
92
+ },
93
+
94
+ # Chapter IX: Diseases of the circulatory system (I00-I99)
95
+ "chapter_9_IX": {
96
+ "title": "Diseases of the circulatory system",
97
+ "code_ranges": ["I"],
98
+ "description": "Heart disease, hypertension, stroke, and vascular disorders"
99
+ },
100
+
101
+ # Chapter X: Diseases of the respiratory system (J00-J99)
102
+ "chapter_10_X": {
103
+ "title": "Diseases of the respiratory system",
104
+ "code_ranges": ["J"],
105
+ "description": "Pneumonia, asthma, COPD, and other respiratory conditions"
106
+ },
107
+
108
+ # Chapter XI: Diseases of the digestive system (K00-K95)
109
+ "chapter_11_XI": {
110
+ "title": "Diseases of the digestive system",
111
+ "code_ranges": ["K"],
112
+ "description": "Gastrointestinal disorders, liver disease, and digestive conditions"
113
+ },
114
+
115
+ # Chapter XII: Diseases of the skin and subcutaneous tissue (L00-L99)
116
+ "chapter_12_XII": {
117
+ "title": "Diseases of the skin and subcutaneous tissue",
118
+ "code_ranges": ["L"],
119
+ "description": "Skin infections, dermatitis, and subcutaneous tissue disorders"
120
+ },
121
+
122
+ # Chapter XIII: Diseases of the musculoskeletal system (M00-M99)
123
+ "chapter_13_XIII": {
124
+ "title": "Diseases of the musculoskeletal system and connective tissue",
125
+ "code_ranges": ["M"],
126
+ "description": "Arthritis, bone disorders, muscle diseases, and connective tissue conditions"
127
+ },
128
+
129
+ # Chapter XIV: Diseases of the genitourinary system (N00-N99)
130
+ "chapter_14_XIV": {
131
+ "title": "Diseases of the genitourinary system",
132
+ "code_ranges": ["N"],
133
+ "description": "Kidney disease, urinary disorders, and reproductive system conditions"
134
+ },
135
+
136
+ # Chapter XV: Pregnancy, childbirth and the puerperium (O00-O9A)
137
+ "chapter_15_XV": {
138
+ "title": "Pregnancy, childbirth and the puerperium",
139
+ "code_ranges": ["O"],
140
+ "description": "Pregnancy complications, delivery issues, and postpartum conditions"
141
+ },
142
+
143
+ # Chapter XVI: Certain conditions originating in the perinatal period (P00-P96)
144
+ "chapter_16_XVI": {
145
+ "title": "Certain conditions originating in the perinatal period",
146
+ "code_ranges": ["P"],
147
+ "description": "Newborn conditions and perinatal complications"
148
+ },
149
+
150
+ # Chapter XVII: Congenital malformations (Q00-Q99)
151
+ "chapter_17_XVII": {
152
+ "title": "Congenital malformations, deformations and chromosomal abnormalities",
153
+ "code_ranges": ["Q"],
154
+ "description": "Birth defects and chromosomal disorders"
155
+ },
156
+
157
+ # Chapter XVIII: Symptoms, signs and abnormal findings (R00-R99)
158
+ "chapter_18_XVIII": {
159
+ "title": "Symptoms, signs and abnormal clinical and laboratory findings",
160
+ "code_ranges": ["R"],
161
+ "description": "Symptoms and signs not elsewhere classified"
162
+ },
163
+
164
+ # Chapter XIX: Injury, poisoning and external causes (S00-T88)
165
+ "chapter_19_XIX": {
166
+ "title": "Injury, poisoning and certain other consequences of external causes",
167
+ "code_ranges": ["S", "T"],
168
+ "description": "Injuries, poisoning, and external cause consequences"
169
+ },
170
+
171
+ # Chapter XX: External causes of morbidity (V01-Y99)
172
+ "chapter_20_XX": {
173
+ "title": "External causes of morbidity",
174
+ "code_ranges": ["V", "W", "X", "Y"],
175
+ "description": "External causes of injury and poisoning"
176
+ },
177
+
178
+ # Chapter XXI: Factors influencing health status (Z00-Z99)
179
+ "chapter_21_XXI": {
180
+ "title": "Factors influencing health status and contact with health services",
181
+ "code_ranges": ["Z"],
182
+ "description": "Health maintenance, screening, and healthcare encounters"
183
+ }
184
+ }
185
+
186
+ def wait_for_server(self, max_wait_time=60):
187
+ """Wait for FastAPI server to be ready with enhanced logging"""
188
+ logger.info(f"Waiting for FastAPI server at {self.api_base_url}")
189
+ start_time = time.time()
190
+ attempt = 0
191
+
192
+ while time.time() - start_time < max_wait_time:
193
+ attempt += 1
194
+ try:
195
+ response = requests.get(f"{self.api_base_url}/health", timeout=10)
196
+ if response.status_code == 200:
197
+ self.server_ready = True
198
+ logger.info(f"FastAPI server ready after {attempt} attempts ({time.time() - start_time:.1f}s)")
199
+ return True
200
+ else:
201
+ logger.warning(f"Server returned status {response.status_code}, attempt {attempt}")
202
+ except requests.exceptions.RequestException as e:
203
+ if attempt % 10 == 0: # Log every 10 attempts
204
+ logger.info(f"Waiting for server... attempt {attempt} ({time.time() - start_time:.1f}s)")
205
+ time.sleep(2)
206
+ continue
207
+
208
+ logger.error(f"FastAPI server failed to start within {max_wait_time} seconds")
209
+ return False
210
+
211
+ def get_server_status(self) -> Tuple[bool, str]:
212
+ """Get current server status for UI display"""
213
+ if not self.server_ready:
214
+ return False, "Server starting up..."
215
+
216
+ try:
217
+ response = requests.get(f"{self.api_base_url}/health", timeout=5)
218
+ if response.status_code == 200:
219
+ return True, "Server Ready"
220
+ else:
221
+ return False, f"Server Error (Status: {response.status_code})"
222
+ except requests.exceptions.RequestException as e:
223
+ return False, f"Connection Error: {str(e)}"
224
+
225
+ def test_connection(self) -> Tuple[bool, str]:
226
+ """Test if the API is accessible"""
227
+ return self.get_server_status()
228
+
229
+ # Keep all your existing methods (copy from original code)
230
+ def extract_category_code(self, icd_code: str) -> str:
231
+ """Extract the main category code from ICD-10 code (e.g., I21.0 -> I21)"""
232
+ if not icd_code:
233
+ return ""
234
+
235
+ code = icd_code.strip().upper()
236
+ match = re.match(r'^([A-Z]\d{2,3})', code)
237
+ if match:
238
+ return match.group(1)
239
+
240
+ return code
241
+
242
+ def group_codes_by_category(self, results: List[Dict]) -> Dict[str, List[Dict]]:
243
+ """Group ICD-10 codes by their main category"""
244
+ categories = defaultdict(list)
245
+
246
+ for result in results:
247
+ code = result.get('code', '')
248
+ category = self.extract_category_code(code)
249
+ if category:
250
+ categories[category].append(result)
251
+
252
+ return dict(categories)
253
+
254
+ def get_category_info(self, category_code: str, codes_in_category: List[Dict]) -> Dict:
255
+ """Get information about a category from its codes"""
256
+ category_result = None
257
+ max_score = 0
258
+
259
+ for code_info in codes_in_category:
260
+ if code_info['code'] == category_code:
261
+ category_result = code_info
262
+ break
263
+ if code_info['score'] > max_score:
264
+ max_score = code_info['score']
265
+ category_result = code_info
266
+
267
+ return category_result or codes_in_category[0]
268
+
269
+ def get_chapter_info_for_code(self, icd_code: str) -> Optional[Dict[str, str]]:
270
+ """Get chapter information for a given ICD-10 code"""
271
+ if not icd_code:
272
+ return None
273
+
274
+ code = icd_code.strip().upper()
275
+
276
+ # Check each chapter's code ranges
277
+ for chapter_id, chapter_data in self.code_to_chapter.items():
278
+ for code_prefix in chapter_data["code_ranges"]:
279
+ if code.startswith(code_prefix):
280
+ return {
281
+ "chapter_id": chapter_id,
282
+ "title": chapter_data["title"],
283
+ "description": chapter_data["description"]
284
+ }
285
+
286
+ return None
287
+
288
+ def search_icd10(
289
+ self,
290
+ query: str,
291
+ limit: int = 10,
292
+ score_threshold: float = 0.3,
293
+ search_mode: str = "smart",
294
+ target_chapters: str = "",
295
+ detailed_analysis: bool = False,
296
+ chapters_per_sentence: int = 2
297
+ ) -> str:
298
+ """Search ICD-10 codes using the API with enhanced error handling for Spaces"""
299
+ if not query or not query.strip():
300
+ return "Please enter a diagnostic query."
301
+
302
+ if not self.server_ready:
303
+ return """
304
+ <div style='text-align: center; padding: 20px; background: #ffeaa7; border-radius: 8px; margin: 20px 0;'>
305
+ <h3>Server Starting Up</h3>
306
+ <p>The FastAPI server is still initializing. Please wait a moment and try again.</p>
307
+ <p><em>This usually takes 10-30 seconds on first load.</em></p>
308
+ </div>
309
+ """
310
+
311
+ is_connected, connection_msg = self.test_connection()
312
+ if not is_connected:
313
+ return f"""
314
+ <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
315
+ <h3>Connection Error</h3>
316
+ <p>{connection_msg}</p>
317
+ <p><em>Please refresh the page and try again.</em></p>
318
+ </div>
319
+ """
320
+
321
+ try:
322
+ params = {
323
+ "q": query.strip(),
324
+ "limit": limit * 2,
325
+ "score_threshold": score_threshold,
326
+ "search_mode": search_mode or "smart",
327
+ "detailed_analysis": detailed_analysis,
328
+ "chapters_per_sentence": chapters_per_sentence
329
+ }
330
+
331
+ if target_chapters and target_chapters.strip():
332
+ params["target_chapters"] = target_chapters.strip()
333
+
334
+ start_time = time.time()
335
+ response = requests.get(f"{self.api_base_url}/api/search", params=params, timeout=120)
336
+ request_time = time.time() - start_time
337
+
338
+ if response.status_code != 200:
339
+ error_data = response.json() if response.headers.get('content-type', '').startswith('application/json') else {"detail": response.text}
340
+ return f"""
341
+ <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
342
+ <h3>API Error ({response.status_code})</h3>
343
+ <p>{error_data.get('detail', 'Unknown error')}</p>
344
+ </div>
345
+ """
346
+
347
+ data = response.json()
348
+ return self._format_sentence_results_with_enhanced_categories(data)
349
+
350
+ except requests.exceptions.Timeout:
351
+ return """
352
+ <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
353
+ <h3>Request Timeout</h3>
354
+ <p>The search is taking too long. Try reducing the limit or increasing the score threshold.</p>
355
+ </div>
356
+ """
357
+ except requests.exceptions.RequestException as e:
358
+ logger.error(f"Request error: {e}")
359
+ return f"""
360
+ <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
361
+ <h3>Request Error</h3>
362
+ <p>{str(e)}</p>
363
+ </div>
364
+ """
365
+ except Exception as e:
366
+ logger.error(f"Unexpected error: {e}")
367
+ return f"""
368
+ <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
369
+ <h3>Unexpected Error</h3>
370
+ <p>{str(e)}</p>
371
+ </div>
372
+ """
373
+
374
+ def _format_sentence_results_with_enhanced_categories(self, data: Dict) -> str:
375
+ """Format sentence-based results with enhanced category and chapter information"""
376
+ sentence_results = data.get('sentence_results', [])
377
+
378
+ if not sentence_results:
379
+ return "<div style='text-align: center; color: #666; padding: 20px;'>No sentence-based results available.</div>"
380
+
381
+ html = """
382
+ <div style='margin-bottom: 20px;'>
383
+ <h3 style='color: #2c3e50; margin-bottom: 15px;'>Results by Sentence with Enhanced Category Information</h3>
384
+ <p style='color: #666; margin-bottom: 20px;'>
385
+ Results are organized by sentence and grouped by ICD-10 categories with chapter context. High-scoring codes are highlighted.
386
+ </p>
387
+ </div>
388
+ """
389
+
390
+ for i, sent_result in enumerate(sentence_results, 1):
391
+ # Group results by category
392
+ categories = self.group_codes_by_category(sent_result['results'])
393
+
394
+ html += f"""
395
+ <div style='margin-bottom: 30px; border: 2px solid #3498db; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
396
+ <div style='background: linear-gradient(135deg, #3498db, #2980b9); color: white; padding: 15px;'>
397
+ <h4 style='margin: 0; font-size: 1.2em;'>
398
+ Sentence {i}: "{sent_result['sentence_text']}"
399
+ </h4>
400
+ <div style='margin-top: 8px; font-size: 0.9em; opacity: 0.9;'>
401
+ <span style='background-color: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 12px; margin-right: 10px;'>
402
+ {sent_result['total_results']} total results
403
+ </span>
404
+ <span style='background-color: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 12px;'>
405
+ Top 3 of {len(categories)} categories
406
+ </span>
407
+ </div>
408
+ </div>
409
+ <div style='padding: 20px;'>
410
+ """
411
+
412
+ # Sort categories by highest score and limit to top 3
413
+ sorted_categories = sorted(
414
+ categories.items(),
415
+ key=lambda x: max(code['score'] for code in x[1]),
416
+ reverse=True
417
+ )[:3]
418
+
419
+ for category_code, codes_in_category in sorted_categories:
420
+ # Get category information
421
+ category_info = self.get_category_info(category_code, codes_in_category)
422
+ highest_score = max(code['score'] for code in codes_in_category)
423
+ category_color = self._get_category_color(highest_score)
424
+
425
+ # Get chapter information for this category
426
+ sample_code = codes_in_category[0].get('code', category_code)
427
+ chapter_info = self.get_chapter_info_for_code(sample_code)
428
+
429
+ # Build enhanced category header
430
+ category_title = category_info.get('title', 'Unknown Category')
431
+ chapter_display = ""
432
+ chapter_tooltip = ""
433
+
434
+ if chapter_info:
435
+ chapter_display = f" • Chapter {chapter_info['chapter_id'].split('_')[1]} ({chapter_info['chapter_id'].split('_')[2]})"
436
+ chapter_tooltip = f"title='{chapter_info['description']}'"
437
+
438
+ html += f"""
439
+ <div style='margin-bottom: 20px; border: 1px solid {category_color}; border-radius: 8px; overflow: hidden;'>
440
+ <div style='background-color: {category_color}; color: white; padding: 12px 15px;'>
441
+ <div style='display: flex; justify-content: space-between; align-items: flex-start;'>
442
+ <div style='flex-grow: 1;'>
443
+ <h5 style='margin: 0; font-size: 1em; line-height: 1.3;'>
444
+ <span style='display: block;'>
445
+ Category {category_code}: {category_title}
446
+ </span>
447
+ {f'<span style="font-size: 0.85em; opacity: 0.9; display: block; margin-top: 4px;" {chapter_tooltip}>{chapter_display}</span>' if chapter_info else ''}
448
+ </h5>
449
+ {f'<div style="font-size: 0.8em; opacity: 0.8; margin-top: 6px; line-height: 1.2;">{chapter_info["description"]}</div>' if chapter_info else ''}
450
+ </div>
451
+ <div style='text-align: right; margin-left: 15px;'>
452
+ <span style='font-size: 0.8em; background-color: rgba(255,255,255,0.2); padding: 2px 6px; border-radius: 10px; display: block;'>
453
+ Max: {highest_score:.3f}
454
+ </span>
455
+ <span style='font-size: 0.75em; opacity: 0.8; margin-top: 2px; display: block;'>
456
+ {len(codes_in_category)} codes
457
+ </span>
458
+ </div>
459
+ </div>
460
+ </div>
461
+ <div style='padding: 12px;'>
462
+ """
463
+
464
+ # Sort codes within category by score
465
+ sorted_codes = sorted(codes_in_category, key=lambda x: x['score'], reverse=True)
466
+
467
+ # Filter out codes that are the same as the category code
468
+ filtered_codes = [code for code in sorted_codes if code.get('code', '') != category_code]
469
+
470
+ # If we filtered out all codes or have no codes, show a message
471
+ if not filtered_codes:
472
+ html += f"""
473
+ <div style='margin-bottom: 8px; padding: 12px; background-color: #f8f9fa; border-radius: 6px; border-left: 4px solid #95a5a6;'>
474
+ <div style='color: #666; text-align: center; font-style: italic;'>
475
+ Category {category_code} represents the main code group. Specific subcodes available in detailed search.
476
+ </div>
477
+ </div>
478
+ """
479
+ else:
480
+ for j, result in enumerate(filtered_codes, 1):
481
+ score_color = self._get_score_color(result['score'])
482
+ is_high_score = result['score'] >= 0.6
483
+
484
+ # Add highlighting for high-scoring codes
485
+ highlight_style = ""
486
+ if is_high_score:
487
+ highlight_style = "box-shadow: 0 0 0 2px #f39c12; background: linear-gradient(135deg, #fff9e6, #ffffff);"
488
+
489
+ html += f"""
490
+ <div style='margin-bottom: 8px; padding: 12px; background-color: #f8f9fa; border-radius: 6px; border-left: 4px solid {score_color}; {highlight_style}'>
491
+ <div style='display: flex; justify-content: space-between; align-items: center;'>
492
+ <div style='flex-grow: 1;'>
493
+ <strong style='color: #2c3e50; font-size: 1em;'>
494
+ {result['code']} - {result['title']}
495
+ {' ⭐' if is_high_score else ''}
496
+ </strong>
497
+ </div>
498
+ <span style='background-color: {score_color}; color: white; padding: 3px 8px; border-radius: 4px; font-size: 0.85em; font-weight: bold;'>
499
+ {result['score']:.3f}
500
+ </span>
501
+ </div>
502
+ {f"<div style='font-size: 0.9em; color: #666; margin-top: 8px; line-height: 1.4;'>{result['description'][:250]}{'...' if len(result.get('description', '')) > 250 else ''}</div>" if result.get('description') else ""}
503
+ </div>
504
+ """
505
+
506
+ html += "</div></div>"
507
+
508
+ html += "</div></div>"
509
+
510
+ # Enhanced legend with chapter info
511
+ html += """
512
+ <div style='background-color: #f8f9fa; border-radius: 8px; padding: 15px; margin-top: 20px;'>
513
+ <h4 style='color: #2c3e50; margin-bottom: 15px;'>Enhanced Legend</h4>
514
+
515
+ <div style='margin-bottom: 15px;'>
516
+ <h5 style='color: #2c3e50; margin-bottom: 8px;'>Score Quality:</h5>
517
+ <div style='display: flex; flex-wrap: wrap; gap: 15px; align-items: center;'>
518
+ <div style='display: flex; align-items: center;'>
519
+ <div style='width: 20px; height: 20px; background-color: #27ae60; border-radius: 3px; margin-right: 8px;'></div>
520
+ <span style='font-size: 0.9em;'>Excellent Match (≥0.8)</span>
521
+ </div>
522
+ <div style='display: flex; align-items: center;'>
523
+ <div style='width: 20px; height: 20px; background-color: #f39c12; border-radius: 3px; margin-right: 8px;'></div>
524
+ <span style='font-size: 0.9em;'>Good Match (≥0.6)</span>
525
+ </div>
526
+ <div style='display: flex; align-items: center;'>
527
+ <div style='width: 20px; height: 20px; background-color: #e67e22; border-radius: 3px; margin-right: 8px;'></div>
528
+ <span style='font-size: 0.9em;'>Fair Match (≥0.4)</span>
529
+ </div>
530
+ <div style='display: flex; align-items: center;'>
531
+ <div style='width: 20px; height: 20px; background-color: #e74c3c; border-radius: 3px; margin-right: 8px;'></div>
532
+ <span style='font-size: 0.9em;'>Low Match (<0.4)</span>
533
+ </div>
534
+ </div>
535
+ </div>
536
+
537
+ <div>
538
+ <h5 style='color: #2c3e50; margin-bottom: 8px;'>Features:</h5>
539
+ <div style='display: flex; flex-wrap: wrap; gap: 20px; align-items: center; font-size: 0.9em;'>
540
+ <span>High-scoring codes (≥0.6)</span>
541
+ <span>Category grouping by ICD-10 structure</span>
542
+ <span>Chapter context and descriptions</span>
543
+ <span>Score-based category prioritization</span>
544
+ <span>Duplicate category codes filtered</span>
545
+ </div>
546
+ </div>
547
+ </div>
548
+ """
549
+
550
+ return html
551
+
552
+ def _get_score_color(self, score: float) -> str:
553
+ """Get color based on similarity score"""
554
+ if score >= 0.8:
555
+ return "#27ae60" # Green
556
+ elif score >= 0.6:
557
+ return "#f39c12" # Orange
558
+ elif score >= 0.4:
559
+ return "#e67e22" # Dark orange
560
+ else:
561
+ return "#e74c3c" # Red
562
+
563
+ def _get_category_color(self, max_score: float) -> str:
564
+ """Get category header color based on highest score in category"""
565
+ if max_score >= 0.8:
566
+ return "#2ecc71" # Bright green
567
+ elif max_score >= 0.6:
568
+ return "#3498db" # Blue
569
+ elif max_score >= 0.4:
570
+ return "#9b59b6" # Purple
571
+ else:
572
+ return "#95a5a6" # Gray
573
+
574
+ def start_fastapi_server():
575
+ """Start FastAPI server with enhanced error handling for Spaces"""
576
+ try:
577
+ logger.info("Starting FastAPI server...")
578
+ # Use environment variable for port if available
579
+ port = int(os.environ.get("FASTAPI_PORT", "8000"))
580
+
581
+ # Enhanced server configuration for Spaces
582
+ uvicorn.run(
583
+ fastapi_app,
584
+ host="127.0.0.1",
585
+ port=port,
586
+ log_level="info",
587
+ access_log=False, # Reduce log noise
588
+ workers=1, # Single worker for Spaces
589
+ timeout_keep_alive=30
590
+ )
591
+ except Exception as e:
592
+ logger.error(f"FastAPI server failed to start: {e}")
593
+ # Don't raise - let Gradio continue with error messages
594
+
595
+ def create_gradio_interface():
596
+ """Create the Gradio interface with server status monitoring"""
597
+ search_interface = ICD10SearchInterface()
598
+
599
+ css = """
600
+ .gradio-container {
601
+ max-width: 1400px !important;
602
+ margin: auto !important;
603
+ }
604
+
605
+ .server-status {
606
+ transition: all 0.3s ease;
607
+ }
608
+ """
609
+
610
+ with gr.Blocks(css=css, title="ICD-10 Smart Search", theme=gr.themes.Soft()) as demo:
611
+ gr.HTML("""
612
+ <div style='text-align: center; margin-bottom: 30px; padding: 25px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; box-shadow: 0 4px 8px rgba(0,0,0,0.1);'>
613
+ <h1 style='color: white; margin: 0; font-size: 2.5em;'>ICD-10 Smart Search</h1>
614
+ <p style='color: #f1f2f6; margin: 15px 0 0 0; font-size: 1.2em;'>Advanced diagnostic code search with AI-powered sentence analysis</p>
615
+ </div>
616
+ """)
617
+
618
+ # Server status indicator
619
+ def get_server_status():
620
+ is_ready, msg = search_interface.get_server_status()
621
+ if is_ready:
622
+ return "<div class='server-status' style='text-align: center; padding: 10px; background: #00b894; color: white; border-radius: 5px; margin-bottom: 20px;'>🟢 Server Ready</div>"
623
+ else:
624
+ return f"<div class='server-status' style='text-align: center; padding: 10px; background: #e17055; color: white; border-radius: 5px; margin-bottom: 20px;'>🔴 {msg}</div>"
625
+
626
+ server_status = gr.HTML(value=get_server_status())
627
+
628
+ with gr.Row():
629
+ with gr.Column(scale=1):
630
+ gr.HTML("<h3>Search Parameters</h3>")
631
+
632
+ query_input = gr.Textbox(
633
+ label="Diagnostic Query",
634
+ placeholder="Enter diagnostic description (e.g., 'chest pain with shortness of breath')",
635
+ lines=3,
636
+ value=""
637
+ )
638
+
639
+ with gr.Accordion("Advanced Options", open=False):
640
+ with gr.Row():
641
+ limit_input = gr.Slider(
642
+ label="Maximum Results per Sentence",
643
+ minimum=5,
644
+ maximum=50,
645
+ value=15,
646
+ step=5,
647
+ info="Higher values show more codes per category"
648
+ )
649
+
650
+ score_threshold_input = gr.Slider(
651
+ label="Score Threshold",
652
+ minimum=0.1,
653
+ maximum=0.9,
654
+ value=0.2,
655
+ step=0.05,
656
+ info="Lower values include more potential matches"
657
+ )
658
+
659
+ search_mode_input = gr.Dropdown(
660
+ label="Search Mode",
661
+ choices=["smart", "all_chapters", "specific_chapters"],
662
+ value="smart"
663
+ )
664
+
665
+ target_chapters_input = gr.Textbox(
666
+ label="Target Chapters (comma-separated)",
667
+ placeholder="e.g., chapter_9_IX, chapter_10_X",
668
+ visible=False
669
+ )
670
+
671
+ with gr.Row():
672
+ detailed_analysis_input = gr.Checkbox(
673
+ label="Include Detailed Analysis",
674
+ value=True
675
+ )
676
+
677
+ chapters_per_sentence_input = gr.Slider(
678
+ label="Chapters per Sentence",
679
+ minimum=1,
680
+ maximum=5,
681
+ value=3,
682
+ step=1
683
+ )
684
+
685
+ search_button = gr.Button("Search ICD-10 Codes", variant="primary", size="lg")
686
+
687
+ def update_target_chapters_visibility(search_mode):
688
+ return gr.update(visible=(search_mode == "specific_chapters"))
689
+
690
+ search_mode_input.change(
691
+ update_target_chapters_visibility,
692
+ inputs=search_mode_input,
693
+ outputs=target_chapters_input
694
+ )
695
+
696
+ with gr.Column(scale=2):
697
+ gr.HTML("<h3>Enhanced Category-Grouped Results</h3>")
698
+ sentence_results_output = gr.HTML(
699
+ value="<div style='text-align: center; color: #666; padding: 40px;'>Enter a diagnostic query and click search to see categorized results with chapter context.</div>"
700
+ )
701
+
702
+ # Example queries
703
+ gr.HTML("<h3>Example Queries</h3>")
704
+
705
+ example_queries = [
706
+ "acute myocardial infarction with chest pain",
707
+ "type 2 diabetes with diabetic nephropathy",
708
+ "major depressive disorder with anxiety",
709
+ "fracture of distal radius from fall",
710
+ "acute appendicitis with peritonitis",
711
+ "gestational diabetes in pregnancy",
712
+ "chronic kidney disease stage 3",
713
+ "essential hypertension with heart disease"
714
+ ]
715
+
716
+ with gr.Row():
717
+ for i in range(0, len(example_queries), 2):
718
+ with gr.Column():
719
+ for j in range(2):
720
+ if i + j < len(example_queries):
721
+ example_btn = gr.Button(
722
+ example_queries[i + j],
723
+ variant="secondary",
724
+ size="sm"
725
+ )
726
+ example_btn.click(
727
+ lambda x=example_queries[i + j]: x,
728
+ outputs=query_input
729
+ )
730
+
731
+ # Search functionality
732
+ search_button.click(
733
+ fn=search_interface.search_icd10,
734
+ inputs=[
735
+ query_input,
736
+ limit_input,
737
+ score_threshold_input,
738
+ search_mode_input,
739
+ target_chapters_input,
740
+ detailed_analysis_input,
741
+ chapters_per_sentence_input
742
+ ],
743
+ outputs=sentence_results_output
744
+ )
745
+
746
+ # Enhanced footer
747
+ gr.HTML("""
748
+ <div style='text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 12px; border: 1px solid #e9ecef;'>
749
+ <p style='margin: 0; color: #666; line-height: 1.6;'>
750
+ Powered by advanced semantic search and AI-driven sentence analysis<br>
751
+ <strong>Features:</strong> Chapter context • Category descriptions • Score-based prioritization<br>
752
+ <strong>Note:</strong> This tool is for research purposes only and should not replace professional medical diagnosis
753
+ </p>
754
+ </div>
755
+ """)
756
+
757
+ # Auto-refresh server status every 10 seconds
758
+ demo.load(get_server_status, outputs=server_status, every=10)
759
+
760
+ return demo
761
+
762
+ # Global variable to track server thread
763
+ server_thread = None
764
+
765
+ def graceful_shutdown():
766
+ """Handle graceful shutdown"""
767
+ logger.info("Shutting down application...")
768
+ # Add any cleanup code here if needed
769
+
770
+ # Signal handlers for graceful shutdown
771
+ signal.signal(signal.SIGTERM, lambda signum, frame: graceful_shutdown())
772
+ signal.signal(signal.SIGINT, lambda signum, frame: graceful_shutdown())
773
+
774
+ # Main application entry point for Hugging Face Spaces
775
+ if __name__ == "__main__":
776
+ logger.info("Starting ICD-10 Search Application for Hugging Face Spaces...")
777
+
778
+ try:
779
+ # Start FastAPI server in background thread
780
+ logger.info("Initializing FastAPI server thread...")
781
+ server_thread = threading.Thread(target=start_fastapi_server, daemon=True)
782
+ server_thread.start()
783
+ logger.info("FastAPI server thread started")
784
+
785
+ # Give server time to start (increased for Spaces)
786
+ logger.info("Waiting for FastAPI server initialization...")
787
+ time.sleep(8) # Increased wait time for Spaces
788
+
789
+ # Create and launch Gradio interface
790
+ logger.info("Creating Gradio interface...")
791
+ demo = create_gradio_interface()
792
+
793
+ # Launch for Spaces environment
794
+ logger.info("Launching Gradio interface for Hugging Face Spaces...")
795
+ demo.launch(
796
+ share=False, # Don't create public link
797
+ show_error=True, # Show errors for debugging
798
+ show_tips=False, # Don't show Gradio tips
799
+ quiet=False, # Show startup info
800
+ server_name="0.0.0.0", # Listen on all interfaces for Spaces
801
+ server_port=7860, # Default Gradio port for Spaces
802
+ prevent_thread_lock=False,
803
+ root_path=os.environ.get("GRADIO_ROOT_PATH", "") # Support for Spaces routing
804
+ )
805
+
806
+ except Exception as e:
807
+ logger.error(f"Application failed to start: {e}")
808
+ sys.exit(1)
chapter_retrieval_system_v2.py ADDED
@@ -0,0 +1,865 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from qdrant_client import QdrantClient
2
+ from qdrant_client.models import VectorParams, Distance, PointStruct
3
+ import numpy as np
4
+ from typing import List, Dict, Optional, Tuple, Set
5
+ from collections import Counter, defaultdict
6
+ from sentence_transformers import SentenceTransformer
7
+ from concurrent.futures import ThreadPoolExecutor, as_completed
8
+ import time
9
+ import re
10
+ import pprint
11
+ import os
12
+ from dotenv import load_dotenv
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+
18
+ class MultiCollectionChapterRetrieval:
19
+ def __init__(self, use_cloud: bool = True):
20
+ """
21
+ Initialize with Qdrant Cloud or local connection
22
+
23
+ Args:
24
+ use_cloud: If True, connects to Qdrant Cloud using environment variables
25
+ """
26
+ if use_cloud:
27
+ self.client = self._create_cloud_client()
28
+ else:
29
+ self.client = QdrantClient("http://localhost:6333")
30
+
31
+ self.encoder = None
32
+
33
+ # ICD-10 Chapter mapping (all 22 chapters)
34
+ self.chapter_info = {
35
+ "chapter_1_I": "Certain infectious and parasitic diseases",
36
+ "chapter_2_II": "Neoplasms",
37
+ "chapter_3_III": "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism",
38
+ "chapter_4_IV": "Endocrine, nutritional and metabolic diseases",
39
+ "chapter_5_V": "Mental and behavioural disorders",
40
+ "chapter_6_VI": "Diseases of the nervous system",
41
+ "chapter_7_VII": "Diseases of the eye and adnexa",
42
+ "chapter_8_VIII": "Diseases of the ear and mastoid process",
43
+ "chapter_9_IX": "Diseases of the circulatory system",
44
+ "chapter_10_X": "Diseases of the respiratory system",
45
+ "chapter_11_XI": "Diseases of the digestive system",
46
+ "chapter_12_XII": "Diseases of the skin and subcutaneous tissue",
47
+ "chapter_13_XIII": "Diseases of the musculoskeletal system and connective tissue",
48
+ "chapter_14_XIV": "Diseases of the genitourinary system",
49
+ "chapter_15_XV": "Pregnancy, childbirth and the puerperium",
50
+ "chapter_16_XVI": "Certain conditions originating in the perinatal period",
51
+ "chapter_17_XVII": "Congenital malformations, deformations and chromosomal abnormalities",
52
+ "chapter_18_XVIII": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified",
53
+ "chapter_19_XIX": "Injury, poisoning and certain other consequences of external causes",
54
+ "chapter_20_XX": "External causes of morbidity and mortality",
55
+ "chapter_21_XXI": "Factors influencing health status and contact with health services",
56
+ "chapter_22_XXII": "Codes for special purposes"
57
+ }
58
+
59
+ # Cache for collection names
60
+ self._chapter_collections = None
61
+
62
+ def _create_cloud_client(self) -> QdrantClient:
63
+ """Create Qdrant Cloud client with authentication"""
64
+ qdrant_url = os.getenv('QDRANT_URL')
65
+ qdrant_api_key = os.getenv('QDRANT_API_KEY')
66
+
67
+ if not qdrant_url or not qdrant_api_key:
68
+ raise ValueError(
69
+ "Qdrant Cloud credentials not found in environment variables.\n"
70
+ "Please set QDRANT_URL and QDRANT_API_KEY in your .env file:\n"
71
+ "QDRANT_URL=https://your-cluster-id.region.aws.cloud.qdrant.io:6333\n"
72
+ "QDRANT_API_KEY=your-api-key-here"
73
+ )
74
+
75
+ print(f"🔗 Connecting to Qdrant Cloud: {qdrant_url}")
76
+
77
+ try:
78
+ client = QdrantClient(
79
+ url=qdrant_url,
80
+ api_key=qdrant_api_key,
81
+ timeout=60, # Increased timeout for cloud
82
+ # Optional: Add additional cloud-specific settings
83
+ prefer_grpc=True, # Use gRPC for better performance
84
+ )
85
+
86
+ # Test connection
87
+ collections = client.get_collections()
88
+ print(f"✅ Connected successfully! Found {len(collections.collections)} collections")
89
+
90
+
91
+ return client
92
+
93
+ except Exception as e:
94
+ print(f"❌ Failed to connect to Qdrant Cloud: {e}")
95
+ print("Please check your QDRANT_URL and QDRANT_API_KEY in the .env file")
96
+ raise
97
+
98
+ def split_into_sentences(self, text: str) -> List[str]:
99
+ """Split text into sentences using simple rules"""
100
+ import re
101
+
102
+ # Simple sentence splitting - you can enhance this with nltk or spacy if needed
103
+ sentences = re.split(r'[.!?]+', text)
104
+ sentences = [s.strip() for s in sentences if s.strip()]
105
+ return sentences
106
+
107
+ def load_encoder(self, model_name: str = "all-MiniLM-L6-v2"):
108
+ """Load the sentence transformer model"""
109
+ if self.encoder is None:
110
+ print(f"📥 Loading encoder: {model_name}")
111
+ self.encoder = SentenceTransformer(model_name)
112
+ print(f"✅ Encoder loaded successfully")
113
+
114
+ def encode_query(self, query: str) -> List[float]:
115
+ """Encode diagnostic string to vector"""
116
+ if self.encoder is None:
117
+ self.load_encoder()
118
+ return self.encoder.encode([query])[0].tolist()
119
+
120
+ def get_chapter_collections(self) -> Dict[str, str]:
121
+ """
122
+ Get mapping of chapter_id -> collection_name
123
+ Discovers collections automatically based on naming patterns
124
+ """
125
+ if self._chapter_collections is not None:
126
+ return self._chapter_collections
127
+
128
+ try:
129
+ collections = self.client.get_collections()
130
+ chapter_collections = {}
131
+
132
+ print("🔍 Discovering chapter collections...")
133
+
134
+ for collection in collections.collections:
135
+ collection_name = collection.name
136
+
137
+ # Try to match collection names to chapters
138
+ chapter_match = None
139
+
140
+ # Pattern 1: icd10_chapter_X_Y or chapter_X_Y
141
+ pattern1 = re.search(r'chapter[_-]?(\d+)[_-]?([IVX]+)', collection_name, re.IGNORECASE)
142
+ if pattern1:
143
+ chapter_num = pattern1.group(1)
144
+ roman = pattern1.group(2)
145
+ chapter_match = f"chapter_{chapter_num}_{roman}"
146
+
147
+ # Pattern 2: Single collection with all chapters (e.g., icd10_codes_all_chapters)
148
+ elif 'all' in collection_name.lower() and ('chapter' in collection_name.lower() or 'icd' in collection_name.lower()):
149
+ print(f" 📚 Found unified collection: {collection_name}")
150
+ # For unified collections, we'll handle this differently
151
+ chapter_collections['unified_collection'] = collection_name
152
+ continue
153
+
154
+ # Pattern 3: Just the chapter part (chapter1, chapterI, etc.)
155
+ elif 'chapter' in collection_name.lower():
156
+ numbers = re.findall(r'\d+', collection_name)
157
+ romans = re.findall(r'[IVX]+', collection_name)
158
+
159
+ if numbers and romans:
160
+ chapter_match = f"chapter_{numbers[0]}_{romans[0]}"
161
+ elif numbers:
162
+ # Try to convert number to roman numeral
163
+ num = int(numbers[0])
164
+ roman_map = {1: 'I', 2: 'II', 3: 'III', 4: 'IV', 5: 'V', 6: 'VI', 7: 'VII',
165
+ 8: 'VIII', 9: 'IX', 10: 'X', 11: 'XI', 12: 'XII', 13: 'XIII',
166
+ 14: 'XIV', 15: 'XV', 16: 'XVI', 17: 'XVII', 18: 'XVIII', 19: 'XIX',
167
+ 20: 'XX', 21: 'XXI', 22: 'XXII'}
168
+ if num in roman_map:
169
+ chapter_match = f"chapter_{num}_{roman_map[num]}"
170
+
171
+ if chapter_match:
172
+ chapter_collections[chapter_match] = collection_name
173
+ print(f" ✓ {chapter_match} -> {collection_name}")
174
+
175
+ print(f"📊 Found {len(chapter_collections)} chapter collections")
176
+
177
+ # If we only found a unified collection, we'll need to handle searches differently
178
+ if len(chapter_collections) == 1 and 'unified_collection' in chapter_collections:
179
+ print("⚠️ Only unified collection found. Searches will use chapter filtering.")
180
+
181
+ self._chapter_collections = chapter_collections
182
+ return chapter_collections
183
+
184
+ except Exception as e:
185
+ print(f"❌ Error discovering collections: {e}")
186
+ return {}
187
+
188
+ def search_single_collection(
189
+ self,
190
+ collection_name: str,
191
+ query_vector: List[float],
192
+ limit: int = 20,
193
+ score_threshold: float = 0.3,
194
+ chapter_filter: Optional[str] = None
195
+ ) -> List[Dict]:
196
+ """Search a single collection and return formatted results"""
197
+ try:
198
+ # Build search parameters
199
+ search_params = {
200
+ "collection_name": collection_name,
201
+ "query_vector": query_vector,
202
+ "limit": limit,
203
+ "score_threshold": score_threshold
204
+ }
205
+
206
+ results = self.client.search(**search_params)
207
+
208
+ formatted_results = []
209
+ for result in results:
210
+ formatted_results.append({
211
+ 'collection': collection_name,
212
+ 'score': result.score,
213
+ 'id': result.id,
214
+ 'payload': result.payload
215
+ })
216
+
217
+ return formatted_results
218
+
219
+ except Exception as e:
220
+ print(f"❌ Error searching {collection_name}: {e}")
221
+ if "timeout" in str(e).lower():
222
+ print(" This might be due to network issues. Retrying with lower limit...")
223
+ try:
224
+ # Retry with reduced parameters
225
+ search_params["limit"] = min(limit, 10)
226
+ search_params["score_threshold"] = max(score_threshold, 0.5)
227
+ results = self.client.search(**search_params)
228
+
229
+ formatted_results = []
230
+ for result in results:
231
+ formatted_results.append({
232
+ 'collection': collection_name,
233
+ 'score': result.score,
234
+ 'id': result.id,
235
+ 'payload': result.payload
236
+ })
237
+ return formatted_results
238
+ except:
239
+ pass
240
+ return []
241
+
242
+ def analyze_chapters_parallel(
243
+ self,
244
+ diagnostic_string: str,
245
+ sample_size_per_chapter: int = 15,
246
+ score_threshold: float = 0.3,
247
+ max_workers: int = 4 # Reduced for cloud stability
248
+ ) -> Dict[str, Dict]:
249
+ """
250
+ Analyze all chapter collections in parallel to determine relevance
251
+ Optimized for cloud performance
252
+ """
253
+ query_vector = self.encode_query(diagnostic_string)
254
+ chapter_collections = self.get_chapter_collections()
255
+
256
+ if not chapter_collections:
257
+ print("❌ No chapter collections found!")
258
+ return {}
259
+
260
+ print(f"\n🔍 Analyzing diagnostic: '{diagnostic_string}'")
261
+
262
+ # Handle unified collection differently
263
+ # if 'unified_collection' in chapter_collections:
264
+ # return self._analyze_unified_collection(
265
+ # diagnostic_string, query_vector,
266
+ # chapter_collections['unified_collection'],
267
+ # sample_size_per_chapter, score_threshold
268
+ # )
269
+
270
+ print(f"🔄 Searching {len(chapter_collections)} collections in parallel...")
271
+
272
+ chapter_analysis = {}
273
+
274
+ def search_chapter(chapter_id: str, collection_name: str) -> Tuple[str, List[Dict]]:
275
+ """Search function for parallel execution with retry logic"""
276
+ max_retries = 2
277
+ for attempt in range(max_retries):
278
+ try:
279
+ results = self.search_single_collection(
280
+ collection_name, query_vector, sample_size_per_chapter, score_threshold
281
+ )
282
+ return chapter_id, results
283
+ except Exception as e:
284
+ if attempt < max_retries - 1:
285
+ print(f" ⚠️ Retry {attempt + 1} for {chapter_id}: {e}")
286
+ time.sleep(1) # Brief delay before retry
287
+ else:
288
+ print(f" ❌ Failed {chapter_id} after {max_retries} attempts: {e}")
289
+ return chapter_id, []
290
+
291
+ # Execute searches in parallel
292
+ start_time = time.time()
293
+
294
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
295
+ # Submit all search tasks
296
+ future_to_chapter = {
297
+ executor.submit(search_chapter, chapter_id, collection_name): chapter_id
298
+ for chapter_id, collection_name in chapter_collections.items()
299
+ if chapter_id != 'unified_collection'
300
+ }
301
+
302
+ # Collect results as they complete
303
+ for future in as_completed(future_to_chapter):
304
+ chapter_id = future_to_chapter[future]
305
+ try:
306
+ chapter_id, results = future.result(timeout=30) # 30 second timeout per search
307
+
308
+ if results:
309
+ scores = [r['score'] for r in results]
310
+
311
+ # Calculate chapter statistics
312
+ chapter_analysis[chapter_id] = {
313
+ 'collection_name': chapter_collections[chapter_id],
314
+ 'match_count': len(results),
315
+ 'max_score': max(scores),
316
+ 'avg_score': np.mean(scores),
317
+ 'median_score': np.median(scores),
318
+ 'min_score': min(scores),
319
+ 'score_std': np.std(scores),
320
+ 'top_matches': sorted(results, key=lambda x: x['score'], reverse=True)[:5],
321
+ 'all_results': results
322
+ }
323
+
324
+ # Calculate relevance score (weighted combination of metrics)
325
+ relevance = (
326
+ chapter_analysis[chapter_id]['avg_score'] * 0.4 +
327
+ chapter_analysis[chapter_id]['max_score'] * 0.3 +
328
+ min(len(results) / sample_size_per_chapter, 1.0) * 0.2 +
329
+ (1.0 / (1.0 + chapter_analysis[chapter_id]['score_std'])) * 0.1
330
+ )
331
+
332
+ chapter_analysis[chapter_id]['relevance_score'] = relevance
333
+
334
+ # print(f" ✅ {chapter_id}: {len(results)} matches, relevance: {relevance:.4f}")
335
+ # else:
336
+ # print(f" ➖ {chapter_id}: No matches above threshold")
337
+
338
+ except Exception as e:
339
+ print(f" ❌ {chapter_id}: Error - {e}")
340
+
341
+ elapsed = time.time() - start_time
342
+ print(f"⏱️ Parallel analysis completed in {elapsed:.2f} seconds")
343
+
344
+ # Sort by relevance score
345
+ sorted_analysis = dict(sorted(
346
+ chapter_analysis.items(),
347
+ key=lambda x: x[1]['relevance_score'],
348
+ reverse=True
349
+ ))
350
+
351
+ return sorted_analysis
352
+
353
+ def _analyze_unified_collection(
354
+ self,
355
+ diagnostic_string: str,
356
+ query_vector: List[float],
357
+ collection_name: str,
358
+ sample_size_per_chapter: int,
359
+ score_threshold: float
360
+ ) -> Dict[str, Dict]:
361
+ """Analyze unified collection by searching with chapter filters"""
362
+ print(f"🔄 Analyzing unified collection: {collection_name}")
363
+
364
+ chapter_analysis = {}
365
+
366
+ # Search each chapter in the unified collection
367
+ for chapter_id in self.chapter_info.keys():
368
+ try:
369
+ results = self.search_single_collection(
370
+ collection_name, query_vector, sample_size_per_chapter,
371
+ score_threshold, chapter_filter=chapter_id
372
+ )
373
+
374
+ if results:
375
+ scores = [r['score'] for r in results]
376
+
377
+ chapter_analysis[chapter_id] = {
378
+ 'collection_name': collection_name,
379
+ 'match_count': len(results),
380
+ 'max_score': max(scores),
381
+ 'avg_score': np.mean(scores),
382
+ 'median_score': np.median(scores),
383
+ 'min_score': min(scores),
384
+ 'score_std': np.std(scores),
385
+ 'top_matches': sorted(results, key=lambda x: x['score'], reverse=True)[:5],
386
+ 'all_results': results
387
+ }
388
+
389
+ # Calculate relevance score
390
+ relevance = (
391
+ chapter_analysis[chapter_id]['avg_score'] * 0.4 +
392
+ chapter_analysis[chapter_id]['max_score'] * 0.3 +
393
+ min(len(results) / sample_size_per_chapter, 1.0) * 0.2 +
394
+ (1.0 / (1.0 + chapter_analysis[chapter_id]['score_std'])) * 0.1
395
+ )
396
+
397
+ chapter_analysis[chapter_id]['relevance_score'] = relevance
398
+ print(f" ✅ {chapter_id}: {len(results)} matches, relevance: {relevance:.4f}")
399
+ else:
400
+ print(f" ➖ {chapter_id}: No matches above threshold")
401
+
402
+ # Small delay to avoid overwhelming the cloud service
403
+ time.sleep(0.1)
404
+
405
+ except Exception as e:
406
+ print(f" ❌ {chapter_id}: Error - {e}")
407
+
408
+ # Sort by relevance score
409
+ return dict(sorted(
410
+ chapter_analysis.items(),
411
+ key=lambda x: x[1]['relevance_score'],
412
+ reverse=True
413
+ ))
414
+
415
+ def get_top_chapters(
416
+ self,
417
+ diagnostic_string: str,
418
+ top_n: int = 5,
419
+ min_relevance: float = 0.1
420
+ ) -> List[Tuple[str, float, str]]:
421
+ """
422
+ Get top N most relevant chapters for a diagnostic string
423
+ Returns: [(chapter_id, relevance_score, description)]
424
+ """
425
+ analysis = self.analyze_chapters_parallel(diagnostic_string)
426
+
427
+ top_chapters = []
428
+ for chapter_id, stats in analysis.items():
429
+ relevance = stats['relevance_score']
430
+
431
+ if relevance >= min_relevance and len(top_chapters) < top_n:
432
+ description = self.chapter_info.get(chapter_id, "Unknown chapter")
433
+ top_chapters.append((chapter_id, relevance, description))
434
+
435
+ return top_chapters
436
+
437
+ def search_targeted_chapters(
438
+ self,
439
+ diagnostic_string: str,
440
+ target_chapters: List[str] = None,
441
+ results_per_chapter: int = 10, # Keep for backward compatibility
442
+ results_per_sentence: int = 3,
443
+ chapters_per_sentence: int = 2 # New parameter: how many top chapters to search per sentence
444
+ ) -> Dict[str, Dict[str, List[Dict]]]:
445
+ """
446
+ Search only specific chapters or auto-identify top chapters for each sentence individually.
447
+ Now searches only the most relevant chapters for each specific sentence.
448
+ """
449
+ print(f"\n=== STARTING search_targeted_chapters ===")
450
+ print(f"Input parameters:")
451
+ print(f" diagnostic_string: '{diagnostic_string[:100]}{'...' if len(diagnostic_string) > 100 else ''}'")
452
+ print(f" target_chapters: {target_chapters}")
453
+ print(f" results_per_sentence: {results_per_sentence}")
454
+ print(f" chapters_per_sentence: {chapters_per_sentence}")
455
+
456
+ # Split input into sentences first
457
+ print(f"\n--- SENTENCE SPLITTING ---")
458
+ sentences = self.split_into_sentences(diagnostic_string)
459
+ print(f"Split into {len(sentences)} sentences:")
460
+ for i, sentence in enumerate(sentences):
461
+ print(f" [{i+1}]: '{sentence}'")
462
+
463
+ print(f"\n--- GETTING CHAPTER COLLECTIONS ---")
464
+ chapter_collections = self.get_chapter_collections()
465
+ print(f"Available chapter collections: {len(chapter_collections)} total")
466
+ print(f"Chapter IDs: {list(chapter_collections.keys())}")
467
+
468
+ results = {}
469
+
470
+ if target_chapters is None:
471
+ print(f"\n=== AUTO-IDENTIFICATION MODE ===")
472
+ print("Auto-identifying most relevant chapters for each sentence individually...")
473
+
474
+ for i, sentence in enumerate(sentences):
475
+ if sentence.strip(): # Skip empty sentences
476
+ sentence_key = f"sentence_{i+1}"
477
+ print(f"\n--- Processing sentence {i+1} ---")
478
+ print(f"Sentence: '{sentence}'")
479
+ print(f"Sentence key: {sentence_key}")
480
+
481
+ # Get top chapters specifically for THIS sentence
482
+ print(f"Getting top {chapters_per_sentence} chapters for this sentence...")
483
+ try:
484
+ sentence_top_chapters = self.get_top_chapters(
485
+ sentence,
486
+ top_n=chapters_per_sentence,
487
+ min_relevance=0.05
488
+ )
489
+ print(f"Found {len(sentence_top_chapters)} relevant chapters:")
490
+ for j, (ch_id, rel, desc) in enumerate(sentence_top_chapters):
491
+ print(f" [{j+1}] {ch_id}: {rel:.4f} - {desc}")
492
+ except Exception as e:
493
+ print(f"ERROR in get_top_chapters: {e}")
494
+ sentence_top_chapters = []
495
+
496
+ # Search only the relevant chapters for this specific sentence
497
+ print(f"Searching in {len(sentence_top_chapters)} selected chapters...")
498
+ for chapter_id, relevance, description in sentence_top_chapters:
499
+ print(f"\n >> Searching chapter: {chapter_id} (relevance: {relevance:.4f})")
500
+
501
+ if chapter_id in chapter_collections:
502
+ collection_name = chapter_collections[chapter_id]
503
+ print(f" Collection name: {collection_name}")
504
+
505
+ # Initialize chapter in results if not exists
506
+ if chapter_id not in results:
507
+ results[chapter_id] = {}
508
+ print(f" Initialized results dict for chapter {chapter_id}")
509
+
510
+ # Search this sentence in this specific chapter
511
+ try:
512
+ print(f" Encoding query for sentence...")
513
+ query_vector = self.encode_query(sentence)
514
+ print(f" Query vector shape: {getattr(query_vector, 'shape', 'N/A')}")
515
+
516
+ print(f" Searching collection '{collection_name}' for top {results_per_sentence} results...")
517
+ sentence_results = self.search_single_collection(
518
+ collection_name, query_vector, results_per_sentence
519
+ )
520
+ print(f" Raw search returned {len(sentence_results) if sentence_results else 0} results")
521
+
522
+ except Exception as e:
523
+ print(f" ERROR during search: {e}")
524
+ sentence_results = []
525
+
526
+ if sentence_results:
527
+ results[chapter_id][sentence_key] = {
528
+ 'text': sentence,
529
+ 'chapter_relevance': relevance,
530
+ 'results': sentence_results
531
+ }
532
+ print(f" ✓ Stored {len(sentence_results)} results for {chapter_id}[{sentence_key}]")
533
+
534
+ # Debug: show top result scores
535
+ if sentence_results:
536
+ top_scores = [r.get('score', 'N/A') for r in sentence_results[:3]]
537
+ print(f" Top 3 scores: {top_scores}")
538
+ else:
539
+ print(f" ✗ No results above threshold for {chapter_id}")
540
+ else:
541
+ print(f" ERROR: Chapter {chapter_id} collection not found in available collections")
542
+ else:
543
+ print(f"\n--- Skipping empty sentence {i+1} ---")
544
+
545
+ else:
546
+ print(f"\n=== PRE-SPECIFIED CHAPTERS MODE ===")
547
+ print(f"Using pre-specified chapters: {target_chapters}")
548
+
549
+ # Validate chapters exist
550
+ valid_chapters = []
551
+ invalid_chapters = []
552
+ for chapter_id in target_chapters:
553
+ if chapter_id in chapter_collections:
554
+ valid_chapters.append(chapter_id)
555
+ else:
556
+ invalid_chapters.append(chapter_id)
557
+
558
+ print(f"Valid chapters: {valid_chapters}")
559
+ if invalid_chapters:
560
+ print(f"WARNING: Invalid chapters (will be skipped): {invalid_chapters}")
561
+
562
+ for chapter_id in valid_chapters:
563
+ collection_name = chapter_collections[chapter_id]
564
+ print(f"\n--- Searching chapter: {chapter_id} ---")
565
+ print(f"Collection name: {collection_name}")
566
+
567
+ chapter_results = {}
568
+
569
+ # Search each sentence in this chapter
570
+ for i, sentence in enumerate(sentences):
571
+ if sentence.strip(): # Skip empty sentences
572
+ sentence_key = f"sentence_{i+1}"
573
+ print(f"\n >> Processing sentence {i+1} in {chapter_id}")
574
+ print(f" Sentence: '{sentence}'")
575
+
576
+ try:
577
+ print(f" Encoding query...")
578
+ query_vector = self.encode_query(sentence)
579
+ print(f" Query vector shape: {getattr(query_vector, 'shape', 'N/A')}")
580
+
581
+ print(f" Searching for top {results_per_sentence} results...")
582
+ sentence_results = self.search_single_collection(
583
+ collection_name, query_vector, results_per_sentence
584
+ )
585
+ print(f" Found {len(sentence_results) if sentence_results else 0} results")
586
+
587
+ except Exception as e:
588
+ print(f" ERROR during search: {e}")
589
+ sentence_results = []
590
+
591
+ if sentence_results:
592
+ chapter_results[sentence_key] = {
593
+ 'text': sentence,
594
+ 'chapter_relevance': None, # Not calculated for pre-specified chapters
595
+ 'results': sentence_results
596
+ }
597
+ print(f" ✓ Stored results for sentence {i+1}")
598
+
599
+ # Debug: show top result scores
600
+ top_scores = [r.get('score', 'N/A') for r in sentence_results[:3]]
601
+ print(f" Top 3 scores: {top_scores}")
602
+ else:
603
+ print(f" ✗ No results found for sentence {i+1}")
604
+ else:
605
+ print(f" >> Skipping empty sentence {i+1}")
606
+
607
+ if chapter_results:
608
+ results[chapter_id] = chapter_results
609
+ print(f"\n ✓ Chapter {chapter_id}: Stored results for {len(chapter_results)} sentences")
610
+ else:
611
+ print(f"\n ✗ Chapter {chapter_id}: No results found")
612
+
613
+ # Final summary
614
+ print(f"\n=== SEARCH COMPLETE ===")
615
+ print(f"Results summary:")
616
+ total_results = 0
617
+ for chapter_id, chapter_data in results.items():
618
+ sentence_count = len(chapter_data)
619
+ result_count = sum(len(sent_data.get('results', [])) for sent_data in chapter_data.values())
620
+ total_results += result_count
621
+ print(f" {chapter_id}: {sentence_count} sentences, {result_count} total results")
622
+
623
+ print(f"Grand total: {len(results)} chapters, {total_results} results")
624
+ print(f"=== END search_targeted_chapters ===\n")
625
+
626
+ return results
627
+
628
+ def format_chapter_analysis(self, diagnostic_string: str, detailed: bool = True) -> str:
629
+ """Format comprehensive chapter analysis"""
630
+ analysis = self.analyze_chapters_parallel(diagnostic_string)
631
+
632
+ if not analysis:
633
+ return "❌ No relevant chapters found."
634
+
635
+ output = []
636
+ output.append(f"\n{'='*90}")
637
+ output.append(f"📊 CHAPTER RELEVANCE ANALYSIS")
638
+ output.append(f"🔍 Diagnostic: '{diagnostic_string}'")
639
+ output.append(f"{'='*90}")
640
+
641
+ for i, (chapter_id, stats) in enumerate(analysis.items(), 1):
642
+ if stats['relevance_score'] < 0.05: # Skip very low relevance
643
+ continue
644
+
645
+ description = self.chapter_info.get(chapter_id, "Unknown chapter")
646
+
647
+ output.append(f"\n{i}. 📚 {chapter_id.upper()}")
648
+ output.append(f" 🏷️ Collection: {stats['collection_name']}")
649
+ output.append(f" 📖 Description: {description}")
650
+ output.append(f" ⭐ Relevance Score: {stats['relevance_score']:.4f}")
651
+ output.append(f" 📊 Statistics:")
652
+ output.append(f" • Matches: {stats['match_count']}")
653
+ output.append(f" • Max Score: {stats['max_score']:.4f}")
654
+ output.append(f" • Avg Score: {stats['avg_score']:.4f}")
655
+ output.append(f" • Score Range: {stats['min_score']:.4f} - {stats['max_score']:.4f}")
656
+
657
+ if detailed:
658
+ output.append(f"\n 🎯 Top Matches:")
659
+ for j, match in enumerate(stats['top_matches'][:3], 1):
660
+ code = match['payload'].get('code', 'N/A')
661
+ title = match['payload'].get('title', 'N/A')
662
+ score = match['score']
663
+ output.append(f" {j}. {code} - {title}")
664
+ output.append(f" 💯 Similarity: {score:.4f}")
665
+
666
+ output.append("-" * 90)
667
+
668
+ return "\n".join(output)
669
+
670
+
671
+ # Convenience functions for multi-collection setup
672
+ def analyze_diagnostic_chapters(diagnostic_string: str, detailed: bool = True, use_cloud: bool = True) -> str:
673
+ """
674
+ Main function to analyze which chapters are most relevant for a diagnostic
675
+ """
676
+ retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
677
+ return retriever.format_chapter_analysis(diagnostic_string, detailed)
678
+
679
+ def get_relevant_chapters(diagnostic_string: str, top_n: int = 5, use_cloud: bool = True) -> List[str]:
680
+ """
681
+ Get list of most relevant chapter IDs for a diagnostic string
682
+ Returns: ['chapter_9_IX', 'chapter_10_X', ...]
683
+ """
684
+ retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
685
+ top_chapters = retriever.get_top_chapters(diagnostic_string, top_n)
686
+ return [chapter_id for chapter_id, _, _ in top_chapters]
687
+
688
+ def smart_diagnostic_search(
689
+ diagnostic_string: str,
690
+ auto_select_chapters: bool = True,
691
+ target_chapters: List[str] = None,
692
+ results_per_sentence: int = 3, # Updated parameter name
693
+ use_cloud: bool = True
694
+ ) -> Dict[str, Dict[str, List[Dict]]]: # Updated return type
695
+ """
696
+ Intelligent diagnostic search that processes each sentence separately
697
+ Optimized for Qdrant Cloud
698
+ """
699
+ retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
700
+
701
+ if auto_select_chapters:
702
+ return retriever.search_targeted_chapters(
703
+ diagnostic_string, target_chapters, results_per_sentence=results_per_sentence
704
+ )
705
+ else:
706
+ return retriever.search_targeted_chapters(
707
+ diagnostic_string, target_chapters, results_per_sentence=results_per_sentence
708
+ )
709
+
710
+ def format_smart_search_results(
711
+ diagnostic_string: str,
712
+ search_results: Dict[str, Dict[str, List[Dict]]], # Updated parameter type
713
+ use_cloud: bool = True
714
+ ) -> str:
715
+ """Format the results from sentence-based smart_diagnostic_search"""
716
+
717
+ if not search_results:
718
+ return "❌ No results found."
719
+
720
+ retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
721
+
722
+ output = []
723
+ output.append(f"\n{'='*90}")
724
+ output.append(f"🔍 SENTENCE-BASED DIAGNOSTIC SEARCH RESULTS")
725
+ output.append(f"🎯 Query: '{diagnostic_string}'")
726
+ output.append(f"{'='*90}")
727
+
728
+ # Count total results
729
+ total_results = 0
730
+ total_sentences = 0
731
+ for chapter_results in search_results.values():
732
+ total_sentences += len(chapter_results)
733
+ for sentence_data in chapter_results.values():
734
+ total_results += len(sentence_data['results'])
735
+
736
+ output.append(f"📊 Total results: {total_results} across {len(search_results)} chapters and {total_sentences} sentences")
737
+
738
+ for chapter_id, chapter_data in search_results.items():
739
+ description = retriever.chapter_info.get(chapter_id, "Unknown chapter")
740
+
741
+ output.append(f"\n📚 {chapter_id.upper()}")
742
+ output.append(f" 📖 {description}")
743
+ output.append(f" 📝 {len(chapter_data)} sentences processed")
744
+ output.append("-" * 60)
745
+
746
+ for sentence_key, sentence_data in chapter_data.items():
747
+ sentence_text = sentence_data['text']
748
+ results = sentence_data['results']
749
+
750
+ output.append(f"\n 🔍 {sentence_key.replace('_', ' ').title()}: \"{sentence_text}\"")
751
+ output.append(f" 🎯 Top {len(results)} matches:")
752
+ output.append("")
753
+
754
+ for i, result in enumerate(results, 1):
755
+ payload = result['payload']
756
+ code = payload.get('code', 'N/A')
757
+ title = payload.get('title', 'N/A')
758
+ score = result['score']
759
+
760
+ output.append(f" {i}. {code} - {title}")
761
+ output.append(f" 💯 Score: {score:.4f}")
762
+
763
+ # Show description if available
764
+ desc = payload.get('description', '')
765
+ if desc:
766
+ desc_preview = desc[:100] + "..." if len(desc) > 100 else desc
767
+ output.append(f" 📄 {desc_preview}")
768
+
769
+ output.append("")
770
+
771
+ output.append("=" * 90)
772
+
773
+ return "\n".join(output)
774
+
775
+ # Example usage
776
+ def example_multi_collection_analysis(use_cloud: bool = True):
777
+ """Example of using the multi-collection chapter analysis"""
778
+
779
+ test_cases = [
780
+ "severe chest pain with shortness of breath",
781
+ "type 2 diabetes with kidney complications",
782
+ "depression and anxiety disorder",
783
+ "broken wrist from falling",
784
+ "acute appendicitis with fever",
785
+ "skin cancer melanoma",
786
+ "pregnancy complications in third trimester"
787
+ ]
788
+
789
+ for diagnostic in test_cases:
790
+ print(f"\n{'='*100}")
791
+ print(f"🔍 ANALYZING: {diagnostic}")
792
+ print(f"{'='*100}")
793
+
794
+ try:
795
+ # Step 1: Analyze chapter relevance
796
+ analysis = analyze_diagnostic_chapters(diagnostic, detailed=False, use_cloud=use_cloud)
797
+ print(analysis)
798
+
799
+ # Step 2: Get top relevant chapters
800
+ top_chapters = get_relevant_chapters(diagnostic, top_n=3, use_cloud=use_cloud)
801
+ print(f"\n🏆 Top 3 relevant chapters: {top_chapters}")
802
+
803
+ # Step 3: Smart search in those chapters
804
+ search_results = smart_diagnostic_search(
805
+ diagnostic,
806
+ results_per_sentence=5,
807
+ use_cloud=use_cloud
808
+ )
809
+ formatted_results = format_smart_search_results(
810
+ diagnostic,
811
+ search_results,
812
+ use_cloud=use_cloud
813
+ )
814
+ print(formatted_results)
815
+
816
+ except Exception as e:
817
+ print(f"❌ Error processing '{diagnostic}': {e}")
818
+ continue
819
+
820
+ def test_cloud_connection():
821
+ """Test Qdrant Cloud connection and basic functionality"""
822
+ print("🧪 Testing Qdrant Cloud Connection...")
823
+
824
+ try:
825
+ retriever = MultiCollectionChapterRetrieval(use_cloud=True)
826
+
827
+ # Test basic search
828
+ test_query = "heart disease"
829
+ print(f"\n🔬 Testing with query: '{test_query}'")
830
+
831
+ # Get collections
832
+ collections = retriever.get_chapter_collections()
833
+ print(f"📊 Available collections: {len(collections)}")
834
+
835
+ if collections:
836
+ # Test search
837
+ top_chapters = retriever.get_top_chapters(test_query, top_n=3)
838
+ print(f"🎯 Top chapters for '{test_query}': {[ch[0] for ch in top_chapters]}")
839
+
840
+ print("✅ Cloud connection test successful!")
841
+ return True
842
+ else:
843
+ print("⚠️ No collections found")
844
+ return False
845
+
846
+ except Exception as e:
847
+ print(f"❌ Cloud connection test failed: {e}")
848
+ return False
849
+
850
+ if __name__ == "__main__":
851
+ # Test cloud connection first
852
+ if test_cloud_connection():
853
+ print("\n" + "="*100)
854
+ print("🚀 Running example analysis with Qdrant Cloud...")
855
+ print("="*100)
856
+
857
+ # Run examples with cloud
858
+ example_multi_collection_analysis(use_cloud=True)
859
+ else:
860
+ print("❌ Skipping examples due to connection issues")
861
+
862
+ # Or use directly:
863
+ # chapters = get_relevant_chapters("heart attack symptoms", use_cloud=True)
864
+ # results = smart_diagnostic_search("heart attack symptoms", use_cloud=True)
865
+ # print(format_smart_search_results("heart attack symptoms", results, use_cloud=True))
requirements.txt ADDED
File without changes
service_v2.py ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Query
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import JSONResponse
4
+ from pydantic import BaseModel
5
+ from typing import List, Optional, Dict, Any
6
+ import time
7
+ import logging
8
+ import pprint
9
+
10
+ # Import your existing neural searcher and the new multi-collection system
11
+ # from neural_searcher import NeuralSearcher
12
+ from chapter_retrieval_system_v2 import MultiCollectionChapterRetrieval
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ app = FastAPI(
19
+ title="ICD-10 Multi-Collection Search API",
20
+ description="Advanced ICD-10 code search with intelligent chapter detection",
21
+ version="2.0.0"
22
+ )
23
+
24
+ # Add CORS middleware for web frontend integration
25
+ app.add_middleware(
26
+ CORSMiddleware,
27
+ allow_origins=["*"], # Configure this properly for production
28
+ allow_credentials=True,
29
+ allow_methods=["*"],
30
+ allow_headers=["*"],
31
+ )
32
+
33
+ # Initialize systems
34
+ try:
35
+ # Initialize the multi-collection chapter retrieval system
36
+ chapter_retriever = MultiCollectionChapterRetrieval()
37
+
38
+ # Keep your original neural searcher for backward compatibility
39
+ # You might not need this if switching fully to multi-collection approach
40
+ # neural_searcher = NeuralSearcher(collection_name="icd10_codes_chapter_3")
41
+
42
+ logger.info("Successfully initialized search systems")
43
+ except Exception as e:
44
+ logger.error(f"Failed to initialize search systems: {e}")
45
+ chapter_retriever = None
46
+ # neural_searcher = None
47
+
48
+ # Pydantic models for request/response validation
49
+ class SearchRequest(BaseModel):
50
+ query: str
51
+ limit: Optional[int] = 10
52
+ score_threshold: Optional[float] = 0.3
53
+ search_mode: Optional[str] = "smart" # "smart", "all_chapters", "specific_chapters"
54
+ target_chapters: Optional[List[str]] = None
55
+ detailed_analysis: Optional[bool] = False
56
+ chapters_per_sentence: Optional[int] = 2 # NEW: How many chapters to search per sentence
57
+
58
+
59
+
60
+ class ChapterInfo(BaseModel):
61
+ chapter_id: str
62
+ collection_name: str
63
+ relevance_score: float
64
+ description: str
65
+ match_count: int
66
+ avg_score: float
67
+ max_score: float
68
+
69
+ class SearchResult(BaseModel):
70
+ code: str
71
+ title: str
72
+ description: Optional[str] = None
73
+ score: float
74
+ chapter_id: Optional[str] = None
75
+ collection: str
76
+ source_sentence: Optional[str] = None # NEW: Track which sentence generated this result
77
+ sentence_key: Optional[str] = None # NEW: Track sentence identifier
78
+
79
+ class SentenceResults(BaseModel):
80
+ sentence_text: str
81
+ sentence_key: str
82
+ results: List[SearchResult]
83
+ total_results: int
84
+
85
+ class SearchResponse(BaseModel):
86
+ query: str
87
+ total_results: int
88
+ search_time: float
89
+ search_mode: str
90
+ relevant_chapters: List[ChapterInfo]
91
+ results: List[SearchResult] # Keep for backward compatibility
92
+ sentence_results: Optional[List[SentenceResults]] = None # NEW: Results grouped by sentence
93
+
94
+
95
+ class ChapterAnalysisResponse(BaseModel):
96
+ query: str
97
+ analysis_time: float
98
+ chapters: List[ChapterInfo]
99
+
100
+ # Health check endpoint
101
+ @app.get("/health")
102
+ def health_check():
103
+ """Health check endpoint"""
104
+ if chapter_retriever is None:
105
+ raise HTTPException(status_code=503, detail="Search system not initialized")
106
+ return {"status": "healthy", "timestamp": time.time()}
107
+
108
+ # Chapter analysis endpoint
109
+ @app.get("/api/analyze-chapters", response_model=ChapterAnalysisResponse)
110
+ def analyze_chapters(
111
+ q: str = Query(..., description="Diagnostic query string"),
112
+ detailed: bool = Query(False, description="Include detailed chapter statistics")
113
+ ):
114
+ """
115
+ Analyze which ICD-10 chapters are most relevant for a diagnostic query
116
+ """
117
+ if not chapter_retriever:
118
+ raise HTTPException(status_code=503, detail="Chapter retrieval system not available")
119
+
120
+ if not q or not q.strip():
121
+ raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
122
+
123
+ try:
124
+ start_time = time.time()
125
+
126
+ # Perform chapter analysis
127
+ analysis = chapter_retriever.analyze_chapters_parallel(
128
+ q.strip(),
129
+ sample_size_per_chapter=15,
130
+ score_threshold=0.2
131
+ )
132
+
133
+ analysis_time = time.time() - start_time
134
+
135
+ # Convert to response format
136
+ chapters = []
137
+ for chapter_id, stats in analysis.items():
138
+ if stats['relevance_score'] > 0.05: # Filter very low relevance
139
+ chapter_info = ChapterInfo(
140
+ chapter_id=chapter_id,
141
+ collection_name=stats['collection_name'],
142
+ relevance_score=stats['relevance_score'],
143
+ description=chapter_retriever.chapter_info.get(chapter_id, "Unknown chapter"),
144
+ match_count=stats['match_count'],
145
+ avg_score=stats['avg_score'],
146
+ max_score=stats['max_score']
147
+ )
148
+ chapters.append(chapter_info)
149
+
150
+ return ChapterAnalysisResponse(
151
+ query=q,
152
+ analysis_time=analysis_time,
153
+ chapters=chapters
154
+ )
155
+
156
+ except Exception as e:
157
+ logger.error(f"Error in chapter analysis: {e}")
158
+ raise HTTPException(status_code=500, detail=f"Chapter analysis failed: {str(e)}")
159
+
160
+ # Smart search endpoint (main search functionality)
161
+ @app.post("/api/search", response_model=SearchResponse)
162
+ def search_smart(request: SearchRequest):
163
+ """
164
+ Advanced search with intelligent chapter detection and targeted searching
165
+ """
166
+ return _perform_search(request)
167
+
168
+ @app.get("/api/search", response_model=SearchResponse)
169
+ def search_smart_get(
170
+ q: str = Query(..., description="Diagnostic query string"),
171
+ limit: int = Query(10, ge=1, le=100, description="Maximum number of results"),
172
+ score_threshold: float = Query(0.3, ge=0.0, le=1.0, description="Minimum similarity score"),
173
+ search_mode: str = Query("smart", description="Search mode: smart, all_chapters, specific_chapters"),
174
+ target_chapters: Optional[str] = Query(None, description="Comma-separated list of target chapters (for specific_chapters mode)"),
175
+ detailed_analysis: bool = Query(False, description="Include detailed chapter analysis"),
176
+ chapters_per_sentence: int = Query(2, ge=1, le=5, description="Number of chapters to search per sentence") # NEW
177
+ ):
178
+ """
179
+ Advanced search with intelligent chapter detection (GET version)
180
+ """
181
+ # Parse target_chapters if provided
182
+ parsed_chapters = None
183
+ if target_chapters:
184
+ parsed_chapters = [ch.strip() for ch in target_chapters.split(",") if ch.strip()]
185
+
186
+ request = SearchRequest(
187
+ query=q,
188
+ limit=limit,
189
+ score_threshold=score_threshold,
190
+ search_mode=search_mode,
191
+ target_chapters=parsed_chapters,
192
+ detailed_analysis=detailed_analysis,
193
+ chapters_per_sentence=chapters_per_sentence # NEW
194
+ )
195
+
196
+ return _perform_search(request)
197
+
198
+ def _perform_search(request: SearchRequest) -> SearchResponse:
199
+ """Internal search logic - UPDATED to return top responses for each sentence"""
200
+ if not chapter_retriever:
201
+ raise HTTPException(status_code=503, detail="Search system not available")
202
+
203
+ if not request.query or not request.query.strip():
204
+ raise HTTPException(status_code=400, detail="Query is required")
205
+
206
+ try:
207
+ start_time = time.time()
208
+ query = request.query.strip()
209
+
210
+ # Initialize response data
211
+ relevant_chapters = []
212
+ results = []
213
+ sentence_results = [] # NEW: For sentence-based results
214
+
215
+ if request.search_mode == "smart":
216
+ # Smart search: auto-identify chapters then search them sentence by sentence
217
+ logger.info(f"Performing sentence-based smart search for: '{query}'")
218
+
219
+ # First, analyze chapters if detailed analysis is requested
220
+ if request.detailed_analysis:
221
+ analysis = chapter_retriever.analyze_chapters_parallel(query)
222
+ for chapter_id, stats in analysis.items():
223
+ if stats['relevance_score'] > 0.1:
224
+ chapter_info = ChapterInfo(
225
+ chapter_id=chapter_id,
226
+ collection_name=stats['collection_name'],
227
+ relevance_score=stats['relevance_score'],
228
+ description=chapter_retriever.chapter_info.get(chapter_id, "Unknown"),
229
+ match_count=stats['match_count'],
230
+ avg_score=stats['avg_score'],
231
+ max_score=stats['max_score']
232
+ )
233
+ relevant_chapters.append(chapter_info)
234
+
235
+ # Perform sentence-based targeted search
236
+ search_results = chapter_retriever.search_targeted_chapters(
237
+ query,
238
+ target_chapters=request.target_chapters,
239
+ results_per_sentence=request.limit, # Use full limit per sentence
240
+ chapters_per_sentence=request.chapters_per_sentence
241
+ )
242
+
243
+ # NEW: Process results by sentence instead of flattening
244
+ sentence_result_map = {} # Track results by sentence
245
+ all_results = [] # Keep flattened results for backward compatibility
246
+
247
+ # Group results by sentence
248
+ for chapter_id, chapter_data in search_results.items():
249
+ for sentence_key, sentence_data in chapter_data.items():
250
+ sentence_text = sentence_data['text']
251
+
252
+ # Initialize sentence entry if not exists
253
+ if sentence_key not in sentence_result_map:
254
+ sentence_result_map[sentence_key] = {
255
+ 'text': sentence_text,
256
+ 'results': []
257
+ }
258
+
259
+ # Add results for this sentence
260
+ for result in sentence_data['results']:
261
+ # Create enriched result with metadata
262
+ enriched_result = {
263
+ **result,
264
+ 'chapter_id': chapter_id,
265
+ 'source_sentence': sentence_text,
266
+ 'sentence_key': sentence_key
267
+ }
268
+
269
+ # Add to sentence-specific results
270
+ sentence_result_map[sentence_key]['results'].append(enriched_result)
271
+
272
+ # Add to flattened results for backward compatibility
273
+ all_results.append(enriched_result)
274
+
275
+ # NEW: Create sentence-based result objects
276
+ for sentence_key, sentence_data in sentence_result_map.items():
277
+ # Sort sentence results by score
278
+ sentence_data['results'].sort(key=lambda x: x['score'], reverse=True)
279
+
280
+ # Apply score threshold and limit per sentence
281
+ filtered_sentence_results = [
282
+ r for r in sentence_data['results']
283
+ if r['score'] >= request.score_threshold
284
+ ][:request.limit]
285
+
286
+ # Convert to SearchResult objects
287
+ sentence_search_results = []
288
+ for result in filtered_sentence_results:
289
+ payload = result['payload']
290
+ search_result = SearchResult(
291
+ code=payload.get('code', 'N/A'),
292
+ title=payload.get('title', 'N/A'),
293
+ description=payload.get('description'),
294
+ score=result['score'],
295
+ chapter_id=result.get('chapter_id'),
296
+ collection=result['collection'],
297
+ source_sentence=result.get('source_sentence'),
298
+ sentence_key=result.get('sentence_key')
299
+ )
300
+ sentence_search_results.append(search_result)
301
+
302
+ # Create SentenceResults object
303
+ if sentence_search_results: # Only include sentences with results
304
+ sentence_result_obj = SentenceResults(
305
+ sentence_text=sentence_data['text'],
306
+ sentence_key=sentence_key,
307
+ results=sentence_search_results,
308
+ total_results=len(sentence_search_results)
309
+ )
310
+ sentence_results.append(sentence_result_obj)
311
+
312
+ # Sort sentence results by average score (optional)
313
+ sentence_results.sort(
314
+ key=lambda x: sum(r.score for r in x.results) / len(x.results) if x.results else 0,
315
+ reverse=True
316
+ )
317
+
318
+ # Process flattened results for backward compatibility
319
+ all_results.sort(key=lambda x: x['score'], reverse=True)
320
+ all_results = all_results[:request.limit]
321
+
322
+ elif request.search_mode == "all_chapters":
323
+ # Handle other search modes (keeping original logic)
324
+ # You can implement similar sentence-based logic here if needed
325
+ logger.info("All chapters search mode - using original logic")
326
+ # ... implement if needed
327
+
328
+ elif request.search_mode == "specific_chapters":
329
+ # Handle specific chapters mode
330
+ logger.info("Specific chapters search mode - using original logic")
331
+ # ... implement if needed
332
+
333
+ else:
334
+ raise HTTPException(status_code=400, detail=f"Unknown search mode: {request.search_mode}")
335
+
336
+ # Convert flattened results to response format (for backward compatibility)
337
+ for result in all_results:
338
+ if result['score'] >= request.score_threshold:
339
+ payload = result['payload']
340
+ search_result = SearchResult(
341
+ code=payload.get('code', 'N/A'),
342
+ title=payload.get('title', 'N/A'),
343
+ description=payload.get('description'),
344
+ score=result['score'],
345
+ chapter_id=result.get('chapter_id'),
346
+ collection=result['collection'],
347
+ source_sentence=result.get('source_sentence'),
348
+ sentence_key=result.get('sentence_key')
349
+ )
350
+ results.append(search_result)
351
+
352
+ search_time = time.time() - start_time
353
+
354
+ logger.info(f"Sentence-based search completed: {len(results)} total results, {len(sentence_results)} sentences in {search_time:.3f}s")
355
+
356
+ # Debug output
357
+ logger.info(f"Sentence results breakdown:")
358
+ for sent_result in sentence_results:
359
+ logger.info(f" '{sent_result.sentence_text}': {sent_result.total_results} results")
360
+
361
+ return SearchResponse(
362
+ query=query,
363
+ total_results=len(results),
364
+ search_time=search_time,
365
+ search_mode=request.search_mode,
366
+ relevant_chapters=relevant_chapters,
367
+ results=results, # Flattened results for backward compatibility
368
+ sentence_results=sentence_results # NEW: Results organized by sentence
369
+ )
370
+
371
+ except Exception as e:
372
+ logger.error(f"Search error: {e}")
373
+ raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
374
+
375
+
376
+
377
+
378
+ # Backward compatibility endpoint (your original endpoint)
379
+ # @app.get("/api/search/legacy")
380
+ # def search_legacy(q: str):
381
+ # """
382
+ # Legacy search endpoint for backward compatibility
383
+ # Uses your original neural searcher
384
+ # """
385
+ # # if not neural_searcher:
386
+ # # raise HTTPException(status_code=503, detail="Legacy search system not available")
387
+
388
+ # if not q or not q.strip():
389
+ # raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
390
+
391
+ # try:
392
+ # result = neural_searcher.search(text=q.strip())
393
+ # return {"result": result}
394
+ # except Exception as e:
395
+ # logger.error(f"Legacy search error: {e}")
396
+ # raise HTTPException(status_code=500, detail=f"Legacy search failed: {str(e)}")
397
+
398
+ # Get available chapters
399
+ @app.get("/api/chapters")
400
+ def get_available_chapters():
401
+ """
402
+ Get list of available ICD-10 chapters and their descriptions
403
+ """
404
+ if not chapter_retriever:
405
+ raise HTTPException(status_code=503, detail="Chapter system not available")
406
+
407
+ try:
408
+ chapter_collections = chapter_retriever.get_chapter_collections()
409
+
410
+ chapters = []
411
+ for chapter_id, collection_name in chapter_collections.items():
412
+ description = chapter_retriever.chapter_info.get(chapter_id, "Unknown chapter")
413
+ chapters.append({
414
+ "chapter_id": chapter_id,
415
+ "collection_name": collection_name,
416
+ "description": description
417
+ })
418
+
419
+ return {
420
+ "total_chapters": len(chapters),
421
+ "chapters": chapters
422
+ }
423
+ except Exception as e:
424
+ logger.error(f"Error getting chapters: {e}")
425
+ raise HTTPException(status_code=500, detail=f"Failed to get chapters: {str(e)}")
426
+
427
+ # Get search suggestions/autocomplete (optional enhancement)
428
+ @app.get("/api/suggest")
429
+ def get_search_suggestions(
430
+ q: str = Query(..., min_length=2, description="Partial query for suggestions"),
431
+ limit: int = Query(5, ge=1, le=20, description="Maximum number of suggestions")
432
+ ):
433
+ """
434
+ Get search suggestions based on partial query
435
+ This is a simple implementation - you might want to enhance this
436
+ """
437
+ # Simple keyword-based suggestions
438
+ # In a real implementation, you might use a more sophisticated approach
439
+
440
+ common_terms = [
441
+ "chest pain", "shortness of breath", "diabetes", "hypertension",
442
+ "pneumonia", "fracture", "depression", "anxiety", "fever",
443
+ "headache", "abdominal pain", "nausea", "vomiting", "infection",
444
+ "cancer", "tumor", "heart attack", "stroke", "asthma"
445
+ ]
446
+
447
+ query_lower = q.lower().strip()
448
+ suggestions = [term for term in common_terms if query_lower in term.lower()]
449
+
450
+ return {"suggestions": suggestions[:limit]}
451
+
452
+ if __name__ == "__main__":
453
+ import uvicorn
454
+
455
+ # Run with more configuration options
456
+ uvicorn.run(
457
+ app,
458
+ host="0.0.0.0",
459
+ port=8000,
460
+ log_level="info",
461
+ access_log=True
462
+ )