Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- app.py +808 -0
- chapter_retrieval_system_v2.py +865 -0
- requirements.txt +0 -0
- service_v2.py +462 -0
app.py
ADDED
|
@@ -0,0 +1,808 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import requests
|
| 3 |
+
import time
|
| 4 |
+
import re
|
| 5 |
+
import threading
|
| 6 |
+
import uvicorn
|
| 7 |
+
import logging
|
| 8 |
+
import os
|
| 9 |
+
import signal
|
| 10 |
+
import sys
|
| 11 |
+
from typing import Dict, List, Optional, Tuple
|
| 12 |
+
from collections import defaultdict
|
| 13 |
+
|
| 14 |
+
# Import your backend modules
|
| 15 |
+
from service_v2 import app as fastapi_app
|
| 16 |
+
from chapter_retrieval_system_v2 import MultiCollectionChapterRetrieval
|
| 17 |
+
|
| 18 |
+
# Configure logging for Spaces
|
| 19 |
+
logging.basicConfig(
|
| 20 |
+
level=logging.INFO,
|
| 21 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 22 |
+
)
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
class ICD10SearchInterface:
|
| 26 |
+
def __init__(self, api_base_url: str = "http://127.0.0.1:8000"):
|
| 27 |
+
"""Initialize the interface with API base URL"""
|
| 28 |
+
self.api_base_url = api_base_url.rstrip('/')
|
| 29 |
+
self.server_ready = False
|
| 30 |
+
self.max_retries = 30 # Increased for Spaces startup time
|
| 31 |
+
|
| 32 |
+
# ICD-10 code to chapter mapping
|
| 33 |
+
self.code_to_chapter = self._build_code_to_chapter_mapping()
|
| 34 |
+
|
| 35 |
+
def _build_code_to_chapter_mapping(self) -> Dict[str, Dict[str, str]]:
|
| 36 |
+
"""Build mapping from ICD-10 code ranges to chapters"""
|
| 37 |
+
return {
|
| 38 |
+
# Chapter I: Certain infectious and parasitic diseases (A00-B99)
|
| 39 |
+
"chapter_1_I": {
|
| 40 |
+
"title": "Certain infectious and parasitic diseases",
|
| 41 |
+
"code_ranges": ["A", "B"],
|
| 42 |
+
"description": "Infectious diseases, parasitic diseases, and related conditions"
|
| 43 |
+
},
|
| 44 |
+
|
| 45 |
+
# Chapter II: Neoplasms (C00-D49)
|
| 46 |
+
"chapter_2_II": {
|
| 47 |
+
"title": "Neoplasms",
|
| 48 |
+
"code_ranges": ["C", "D"],
|
| 49 |
+
"description": "Malignant neoplasms, benign neoplasms, and neoplasms of uncertain behavior"
|
| 50 |
+
},
|
| 51 |
+
|
| 52 |
+
# Chapter III: Diseases of blood and blood-forming organs (D50-D89)
|
| 53 |
+
"chapter_3_III": {
|
| 54 |
+
"title": "Diseases of the blood and blood-forming organs",
|
| 55 |
+
"code_ranges": ["D5", "D6", "D7", "D8"],
|
| 56 |
+
"description": "Anemias, coagulation defects, and other blood disorders"
|
| 57 |
+
},
|
| 58 |
+
|
| 59 |
+
# Chapter IV: Endocrine, nutritional and metabolic diseases (E00-E89)
|
| 60 |
+
"chapter_4_IV": {
|
| 61 |
+
"title": "Endocrine, nutritional and metabolic diseases",
|
| 62 |
+
"code_ranges": ["E"],
|
| 63 |
+
"description": "Diabetes, thyroid disorders, nutritional deficiencies, and metabolic disorders"
|
| 64 |
+
},
|
| 65 |
+
|
| 66 |
+
# Chapter V: Mental and behavioural disorders (F01-F99)
|
| 67 |
+
"chapter_5_V": {
|
| 68 |
+
"title": "Mental and behavioural disorders",
|
| 69 |
+
"code_ranges": ["F"],
|
| 70 |
+
"description": "Mental disorders, substance abuse, and behavioral conditions"
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# Chapter VI: Diseases of the nervous system (G00-G99)
|
| 74 |
+
"chapter_6_VI": {
|
| 75 |
+
"title": "Diseases of the nervous system",
|
| 76 |
+
"code_ranges": ["G"],
|
| 77 |
+
"description": "Neurological disorders, epilepsy, migraines, and nervous system diseases"
|
| 78 |
+
},
|
| 79 |
+
|
| 80 |
+
# Chapter VII: Diseases of the eye and adnexa (H00-H59)
|
| 81 |
+
"chapter_7_VII": {
|
| 82 |
+
"title": "Diseases of the eye and adnexa",
|
| 83 |
+
"code_ranges": ["H0", "H1", "H2", "H3", "H4", "H5"],
|
| 84 |
+
"description": "Eye diseases, visual disorders, and related conditions"
|
| 85 |
+
},
|
| 86 |
+
|
| 87 |
+
# Chapter VIII: Diseases of the ear and mastoid process (H60-H95)
|
| 88 |
+
"chapter_8_VIII": {
|
| 89 |
+
"title": "Diseases of the ear and mastoid process",
|
| 90 |
+
"code_ranges": ["H6", "H7", "H8", "H9"],
|
| 91 |
+
"description": "Hearing disorders, ear infections, and mastoid conditions"
|
| 92 |
+
},
|
| 93 |
+
|
| 94 |
+
# Chapter IX: Diseases of the circulatory system (I00-I99)
|
| 95 |
+
"chapter_9_IX": {
|
| 96 |
+
"title": "Diseases of the circulatory system",
|
| 97 |
+
"code_ranges": ["I"],
|
| 98 |
+
"description": "Heart disease, hypertension, stroke, and vascular disorders"
|
| 99 |
+
},
|
| 100 |
+
|
| 101 |
+
# Chapter X: Diseases of the respiratory system (J00-J99)
|
| 102 |
+
"chapter_10_X": {
|
| 103 |
+
"title": "Diseases of the respiratory system",
|
| 104 |
+
"code_ranges": ["J"],
|
| 105 |
+
"description": "Pneumonia, asthma, COPD, and other respiratory conditions"
|
| 106 |
+
},
|
| 107 |
+
|
| 108 |
+
# Chapter XI: Diseases of the digestive system (K00-K95)
|
| 109 |
+
"chapter_11_XI": {
|
| 110 |
+
"title": "Diseases of the digestive system",
|
| 111 |
+
"code_ranges": ["K"],
|
| 112 |
+
"description": "Gastrointestinal disorders, liver disease, and digestive conditions"
|
| 113 |
+
},
|
| 114 |
+
|
| 115 |
+
# Chapter XII: Diseases of the skin and subcutaneous tissue (L00-L99)
|
| 116 |
+
"chapter_12_XII": {
|
| 117 |
+
"title": "Diseases of the skin and subcutaneous tissue",
|
| 118 |
+
"code_ranges": ["L"],
|
| 119 |
+
"description": "Skin infections, dermatitis, and subcutaneous tissue disorders"
|
| 120 |
+
},
|
| 121 |
+
|
| 122 |
+
# Chapter XIII: Diseases of the musculoskeletal system (M00-M99)
|
| 123 |
+
"chapter_13_XIII": {
|
| 124 |
+
"title": "Diseases of the musculoskeletal system and connective tissue",
|
| 125 |
+
"code_ranges": ["M"],
|
| 126 |
+
"description": "Arthritis, bone disorders, muscle diseases, and connective tissue conditions"
|
| 127 |
+
},
|
| 128 |
+
|
| 129 |
+
# Chapter XIV: Diseases of the genitourinary system (N00-N99)
|
| 130 |
+
"chapter_14_XIV": {
|
| 131 |
+
"title": "Diseases of the genitourinary system",
|
| 132 |
+
"code_ranges": ["N"],
|
| 133 |
+
"description": "Kidney disease, urinary disorders, and reproductive system conditions"
|
| 134 |
+
},
|
| 135 |
+
|
| 136 |
+
# Chapter XV: Pregnancy, childbirth and the puerperium (O00-O9A)
|
| 137 |
+
"chapter_15_XV": {
|
| 138 |
+
"title": "Pregnancy, childbirth and the puerperium",
|
| 139 |
+
"code_ranges": ["O"],
|
| 140 |
+
"description": "Pregnancy complications, delivery issues, and postpartum conditions"
|
| 141 |
+
},
|
| 142 |
+
|
| 143 |
+
# Chapter XVI: Certain conditions originating in the perinatal period (P00-P96)
|
| 144 |
+
"chapter_16_XVI": {
|
| 145 |
+
"title": "Certain conditions originating in the perinatal period",
|
| 146 |
+
"code_ranges": ["P"],
|
| 147 |
+
"description": "Newborn conditions and perinatal complications"
|
| 148 |
+
},
|
| 149 |
+
|
| 150 |
+
# Chapter XVII: Congenital malformations (Q00-Q99)
|
| 151 |
+
"chapter_17_XVII": {
|
| 152 |
+
"title": "Congenital malformations, deformations and chromosomal abnormalities",
|
| 153 |
+
"code_ranges": ["Q"],
|
| 154 |
+
"description": "Birth defects and chromosomal disorders"
|
| 155 |
+
},
|
| 156 |
+
|
| 157 |
+
# Chapter XVIII: Symptoms, signs and abnormal findings (R00-R99)
|
| 158 |
+
"chapter_18_XVIII": {
|
| 159 |
+
"title": "Symptoms, signs and abnormal clinical and laboratory findings",
|
| 160 |
+
"code_ranges": ["R"],
|
| 161 |
+
"description": "Symptoms and signs not elsewhere classified"
|
| 162 |
+
},
|
| 163 |
+
|
| 164 |
+
# Chapter XIX: Injury, poisoning and external causes (S00-T88)
|
| 165 |
+
"chapter_19_XIX": {
|
| 166 |
+
"title": "Injury, poisoning and certain other consequences of external causes",
|
| 167 |
+
"code_ranges": ["S", "T"],
|
| 168 |
+
"description": "Injuries, poisoning, and external cause consequences"
|
| 169 |
+
},
|
| 170 |
+
|
| 171 |
+
# Chapter XX: External causes of morbidity (V01-Y99)
|
| 172 |
+
"chapter_20_XX": {
|
| 173 |
+
"title": "External causes of morbidity",
|
| 174 |
+
"code_ranges": ["V", "W", "X", "Y"],
|
| 175 |
+
"description": "External causes of injury and poisoning"
|
| 176 |
+
},
|
| 177 |
+
|
| 178 |
+
# Chapter XXI: Factors influencing health status (Z00-Z99)
|
| 179 |
+
"chapter_21_XXI": {
|
| 180 |
+
"title": "Factors influencing health status and contact with health services",
|
| 181 |
+
"code_ranges": ["Z"],
|
| 182 |
+
"description": "Health maintenance, screening, and healthcare encounters"
|
| 183 |
+
}
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
def wait_for_server(self, max_wait_time=60):
|
| 187 |
+
"""Wait for FastAPI server to be ready with enhanced logging"""
|
| 188 |
+
logger.info(f"Waiting for FastAPI server at {self.api_base_url}")
|
| 189 |
+
start_time = time.time()
|
| 190 |
+
attempt = 0
|
| 191 |
+
|
| 192 |
+
while time.time() - start_time < max_wait_time:
|
| 193 |
+
attempt += 1
|
| 194 |
+
try:
|
| 195 |
+
response = requests.get(f"{self.api_base_url}/health", timeout=10)
|
| 196 |
+
if response.status_code == 200:
|
| 197 |
+
self.server_ready = True
|
| 198 |
+
logger.info(f"FastAPI server ready after {attempt} attempts ({time.time() - start_time:.1f}s)")
|
| 199 |
+
return True
|
| 200 |
+
else:
|
| 201 |
+
logger.warning(f"Server returned status {response.status_code}, attempt {attempt}")
|
| 202 |
+
except requests.exceptions.RequestException as e:
|
| 203 |
+
if attempt % 10 == 0: # Log every 10 attempts
|
| 204 |
+
logger.info(f"Waiting for server... attempt {attempt} ({time.time() - start_time:.1f}s)")
|
| 205 |
+
time.sleep(2)
|
| 206 |
+
continue
|
| 207 |
+
|
| 208 |
+
logger.error(f"FastAPI server failed to start within {max_wait_time} seconds")
|
| 209 |
+
return False
|
| 210 |
+
|
| 211 |
+
def get_server_status(self) -> Tuple[bool, str]:
|
| 212 |
+
"""Get current server status for UI display"""
|
| 213 |
+
if not self.server_ready:
|
| 214 |
+
return False, "Server starting up..."
|
| 215 |
+
|
| 216 |
+
try:
|
| 217 |
+
response = requests.get(f"{self.api_base_url}/health", timeout=5)
|
| 218 |
+
if response.status_code == 200:
|
| 219 |
+
return True, "Server Ready"
|
| 220 |
+
else:
|
| 221 |
+
return False, f"Server Error (Status: {response.status_code})"
|
| 222 |
+
except requests.exceptions.RequestException as e:
|
| 223 |
+
return False, f"Connection Error: {str(e)}"
|
| 224 |
+
|
| 225 |
+
def test_connection(self) -> Tuple[bool, str]:
|
| 226 |
+
"""Test if the API is accessible"""
|
| 227 |
+
return self.get_server_status()
|
| 228 |
+
|
| 229 |
+
# Keep all your existing methods (copy from original code)
|
| 230 |
+
def extract_category_code(self, icd_code: str) -> str:
|
| 231 |
+
"""Extract the main category code from ICD-10 code (e.g., I21.0 -> I21)"""
|
| 232 |
+
if not icd_code:
|
| 233 |
+
return ""
|
| 234 |
+
|
| 235 |
+
code = icd_code.strip().upper()
|
| 236 |
+
match = re.match(r'^([A-Z]\d{2,3})', code)
|
| 237 |
+
if match:
|
| 238 |
+
return match.group(1)
|
| 239 |
+
|
| 240 |
+
return code
|
| 241 |
+
|
| 242 |
+
def group_codes_by_category(self, results: List[Dict]) -> Dict[str, List[Dict]]:
|
| 243 |
+
"""Group ICD-10 codes by their main category"""
|
| 244 |
+
categories = defaultdict(list)
|
| 245 |
+
|
| 246 |
+
for result in results:
|
| 247 |
+
code = result.get('code', '')
|
| 248 |
+
category = self.extract_category_code(code)
|
| 249 |
+
if category:
|
| 250 |
+
categories[category].append(result)
|
| 251 |
+
|
| 252 |
+
return dict(categories)
|
| 253 |
+
|
| 254 |
+
def get_category_info(self, category_code: str, codes_in_category: List[Dict]) -> Dict:
|
| 255 |
+
"""Get information about a category from its codes"""
|
| 256 |
+
category_result = None
|
| 257 |
+
max_score = 0
|
| 258 |
+
|
| 259 |
+
for code_info in codes_in_category:
|
| 260 |
+
if code_info['code'] == category_code:
|
| 261 |
+
category_result = code_info
|
| 262 |
+
break
|
| 263 |
+
if code_info['score'] > max_score:
|
| 264 |
+
max_score = code_info['score']
|
| 265 |
+
category_result = code_info
|
| 266 |
+
|
| 267 |
+
return category_result or codes_in_category[0]
|
| 268 |
+
|
| 269 |
+
def get_chapter_info_for_code(self, icd_code: str) -> Optional[Dict[str, str]]:
|
| 270 |
+
"""Get chapter information for a given ICD-10 code"""
|
| 271 |
+
if not icd_code:
|
| 272 |
+
return None
|
| 273 |
+
|
| 274 |
+
code = icd_code.strip().upper()
|
| 275 |
+
|
| 276 |
+
# Check each chapter's code ranges
|
| 277 |
+
for chapter_id, chapter_data in self.code_to_chapter.items():
|
| 278 |
+
for code_prefix in chapter_data["code_ranges"]:
|
| 279 |
+
if code.startswith(code_prefix):
|
| 280 |
+
return {
|
| 281 |
+
"chapter_id": chapter_id,
|
| 282 |
+
"title": chapter_data["title"],
|
| 283 |
+
"description": chapter_data["description"]
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
return None
|
| 287 |
+
|
| 288 |
+
def search_icd10(
|
| 289 |
+
self,
|
| 290 |
+
query: str,
|
| 291 |
+
limit: int = 10,
|
| 292 |
+
score_threshold: float = 0.3,
|
| 293 |
+
search_mode: str = "smart",
|
| 294 |
+
target_chapters: str = "",
|
| 295 |
+
detailed_analysis: bool = False,
|
| 296 |
+
chapters_per_sentence: int = 2
|
| 297 |
+
) -> str:
|
| 298 |
+
"""Search ICD-10 codes using the API with enhanced error handling for Spaces"""
|
| 299 |
+
if not query or not query.strip():
|
| 300 |
+
return "Please enter a diagnostic query."
|
| 301 |
+
|
| 302 |
+
if not self.server_ready:
|
| 303 |
+
return """
|
| 304 |
+
<div style='text-align: center; padding: 20px; background: #ffeaa7; border-radius: 8px; margin: 20px 0;'>
|
| 305 |
+
<h3>Server Starting Up</h3>
|
| 306 |
+
<p>The FastAPI server is still initializing. Please wait a moment and try again.</p>
|
| 307 |
+
<p><em>This usually takes 10-30 seconds on first load.</em></p>
|
| 308 |
+
</div>
|
| 309 |
+
"""
|
| 310 |
+
|
| 311 |
+
is_connected, connection_msg = self.test_connection()
|
| 312 |
+
if not is_connected:
|
| 313 |
+
return f"""
|
| 314 |
+
<div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
|
| 315 |
+
<h3>Connection Error</h3>
|
| 316 |
+
<p>{connection_msg}</p>
|
| 317 |
+
<p><em>Please refresh the page and try again.</em></p>
|
| 318 |
+
</div>
|
| 319 |
+
"""
|
| 320 |
+
|
| 321 |
+
try:
|
| 322 |
+
params = {
|
| 323 |
+
"q": query.strip(),
|
| 324 |
+
"limit": limit * 2,
|
| 325 |
+
"score_threshold": score_threshold,
|
| 326 |
+
"search_mode": search_mode or "smart",
|
| 327 |
+
"detailed_analysis": detailed_analysis,
|
| 328 |
+
"chapters_per_sentence": chapters_per_sentence
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
if target_chapters and target_chapters.strip():
|
| 332 |
+
params["target_chapters"] = target_chapters.strip()
|
| 333 |
+
|
| 334 |
+
start_time = time.time()
|
| 335 |
+
response = requests.get(f"{self.api_base_url}/api/search", params=params, timeout=120)
|
| 336 |
+
request_time = time.time() - start_time
|
| 337 |
+
|
| 338 |
+
if response.status_code != 200:
|
| 339 |
+
error_data = response.json() if response.headers.get('content-type', '').startswith('application/json') else {"detail": response.text}
|
| 340 |
+
return f"""
|
| 341 |
+
<div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
|
| 342 |
+
<h3>API Error ({response.status_code})</h3>
|
| 343 |
+
<p>{error_data.get('detail', 'Unknown error')}</p>
|
| 344 |
+
</div>
|
| 345 |
+
"""
|
| 346 |
+
|
| 347 |
+
data = response.json()
|
| 348 |
+
return self._format_sentence_results_with_enhanced_categories(data)
|
| 349 |
+
|
| 350 |
+
except requests.exceptions.Timeout:
|
| 351 |
+
return """
|
| 352 |
+
<div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
|
| 353 |
+
<h3>Request Timeout</h3>
|
| 354 |
+
<p>The search is taking too long. Try reducing the limit or increasing the score threshold.</p>
|
| 355 |
+
</div>
|
| 356 |
+
"""
|
| 357 |
+
except requests.exceptions.RequestException as e:
|
| 358 |
+
logger.error(f"Request error: {e}")
|
| 359 |
+
return f"""
|
| 360 |
+
<div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
|
| 361 |
+
<h3>Request Error</h3>
|
| 362 |
+
<p>{str(e)}</p>
|
| 363 |
+
</div>
|
| 364 |
+
"""
|
| 365 |
+
except Exception as e:
|
| 366 |
+
logger.error(f"Unexpected error: {e}")
|
| 367 |
+
return f"""
|
| 368 |
+
<div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
|
| 369 |
+
<h3>Unexpected Error</h3>
|
| 370 |
+
<p>{str(e)}</p>
|
| 371 |
+
</div>
|
| 372 |
+
"""
|
| 373 |
+
|
| 374 |
+
def _format_sentence_results_with_enhanced_categories(self, data: Dict) -> str:
|
| 375 |
+
"""Format sentence-based results with enhanced category and chapter information"""
|
| 376 |
+
sentence_results = data.get('sentence_results', [])
|
| 377 |
+
|
| 378 |
+
if not sentence_results:
|
| 379 |
+
return "<div style='text-align: center; color: #666; padding: 20px;'>No sentence-based results available.</div>"
|
| 380 |
+
|
| 381 |
+
html = """
|
| 382 |
+
<div style='margin-bottom: 20px;'>
|
| 383 |
+
<h3 style='color: #2c3e50; margin-bottom: 15px;'>Results by Sentence with Enhanced Category Information</h3>
|
| 384 |
+
<p style='color: #666; margin-bottom: 20px;'>
|
| 385 |
+
Results are organized by sentence and grouped by ICD-10 categories with chapter context. High-scoring codes are highlighted.
|
| 386 |
+
</p>
|
| 387 |
+
</div>
|
| 388 |
+
"""
|
| 389 |
+
|
| 390 |
+
for i, sent_result in enumerate(sentence_results, 1):
|
| 391 |
+
# Group results by category
|
| 392 |
+
categories = self.group_codes_by_category(sent_result['results'])
|
| 393 |
+
|
| 394 |
+
html += f"""
|
| 395 |
+
<div style='margin-bottom: 30px; border: 2px solid #3498db; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
|
| 396 |
+
<div style='background: linear-gradient(135deg, #3498db, #2980b9); color: white; padding: 15px;'>
|
| 397 |
+
<h4 style='margin: 0; font-size: 1.2em;'>
|
| 398 |
+
Sentence {i}: "{sent_result['sentence_text']}"
|
| 399 |
+
</h4>
|
| 400 |
+
<div style='margin-top: 8px; font-size: 0.9em; opacity: 0.9;'>
|
| 401 |
+
<span style='background-color: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 12px; margin-right: 10px;'>
|
| 402 |
+
{sent_result['total_results']} total results
|
| 403 |
+
</span>
|
| 404 |
+
<span style='background-color: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 12px;'>
|
| 405 |
+
Top 3 of {len(categories)} categories
|
| 406 |
+
</span>
|
| 407 |
+
</div>
|
| 408 |
+
</div>
|
| 409 |
+
<div style='padding: 20px;'>
|
| 410 |
+
"""
|
| 411 |
+
|
| 412 |
+
# Sort categories by highest score and limit to top 3
|
| 413 |
+
sorted_categories = sorted(
|
| 414 |
+
categories.items(),
|
| 415 |
+
key=lambda x: max(code['score'] for code in x[1]),
|
| 416 |
+
reverse=True
|
| 417 |
+
)[:3]
|
| 418 |
+
|
| 419 |
+
for category_code, codes_in_category in sorted_categories:
|
| 420 |
+
# Get category information
|
| 421 |
+
category_info = self.get_category_info(category_code, codes_in_category)
|
| 422 |
+
highest_score = max(code['score'] for code in codes_in_category)
|
| 423 |
+
category_color = self._get_category_color(highest_score)
|
| 424 |
+
|
| 425 |
+
# Get chapter information for this category
|
| 426 |
+
sample_code = codes_in_category[0].get('code', category_code)
|
| 427 |
+
chapter_info = self.get_chapter_info_for_code(sample_code)
|
| 428 |
+
|
| 429 |
+
# Build enhanced category header
|
| 430 |
+
category_title = category_info.get('title', 'Unknown Category')
|
| 431 |
+
chapter_display = ""
|
| 432 |
+
chapter_tooltip = ""
|
| 433 |
+
|
| 434 |
+
if chapter_info:
|
| 435 |
+
chapter_display = f" • Chapter {chapter_info['chapter_id'].split('_')[1]} ({chapter_info['chapter_id'].split('_')[2]})"
|
| 436 |
+
chapter_tooltip = f"title='{chapter_info['description']}'"
|
| 437 |
+
|
| 438 |
+
html += f"""
|
| 439 |
+
<div style='margin-bottom: 20px; border: 1px solid {category_color}; border-radius: 8px; overflow: hidden;'>
|
| 440 |
+
<div style='background-color: {category_color}; color: white; padding: 12px 15px;'>
|
| 441 |
+
<div style='display: flex; justify-content: space-between; align-items: flex-start;'>
|
| 442 |
+
<div style='flex-grow: 1;'>
|
| 443 |
+
<h5 style='margin: 0; font-size: 1em; line-height: 1.3;'>
|
| 444 |
+
<span style='display: block;'>
|
| 445 |
+
Category {category_code}: {category_title}
|
| 446 |
+
</span>
|
| 447 |
+
{f'<span style="font-size: 0.85em; opacity: 0.9; display: block; margin-top: 4px;" {chapter_tooltip}>{chapter_display}</span>' if chapter_info else ''}
|
| 448 |
+
</h5>
|
| 449 |
+
{f'<div style="font-size: 0.8em; opacity: 0.8; margin-top: 6px; line-height: 1.2;">{chapter_info["description"]}</div>' if chapter_info else ''}
|
| 450 |
+
</div>
|
| 451 |
+
<div style='text-align: right; margin-left: 15px;'>
|
| 452 |
+
<span style='font-size: 0.8em; background-color: rgba(255,255,255,0.2); padding: 2px 6px; border-radius: 10px; display: block;'>
|
| 453 |
+
Max: {highest_score:.3f}
|
| 454 |
+
</span>
|
| 455 |
+
<span style='font-size: 0.75em; opacity: 0.8; margin-top: 2px; display: block;'>
|
| 456 |
+
{len(codes_in_category)} codes
|
| 457 |
+
</span>
|
| 458 |
+
</div>
|
| 459 |
+
</div>
|
| 460 |
+
</div>
|
| 461 |
+
<div style='padding: 12px;'>
|
| 462 |
+
"""
|
| 463 |
+
|
| 464 |
+
# Sort codes within category by score
|
| 465 |
+
sorted_codes = sorted(codes_in_category, key=lambda x: x['score'], reverse=True)
|
| 466 |
+
|
| 467 |
+
# Filter out codes that are the same as the category code
|
| 468 |
+
filtered_codes = [code for code in sorted_codes if code.get('code', '') != category_code]
|
| 469 |
+
|
| 470 |
+
# If we filtered out all codes or have no codes, show a message
|
| 471 |
+
if not filtered_codes:
|
| 472 |
+
html += f"""
|
| 473 |
+
<div style='margin-bottom: 8px; padding: 12px; background-color: #f8f9fa; border-radius: 6px; border-left: 4px solid #95a5a6;'>
|
| 474 |
+
<div style='color: #666; text-align: center; font-style: italic;'>
|
| 475 |
+
Category {category_code} represents the main code group. Specific subcodes available in detailed search.
|
| 476 |
+
</div>
|
| 477 |
+
</div>
|
| 478 |
+
"""
|
| 479 |
+
else:
|
| 480 |
+
for j, result in enumerate(filtered_codes, 1):
|
| 481 |
+
score_color = self._get_score_color(result['score'])
|
| 482 |
+
is_high_score = result['score'] >= 0.6
|
| 483 |
+
|
| 484 |
+
# Add highlighting for high-scoring codes
|
| 485 |
+
highlight_style = ""
|
| 486 |
+
if is_high_score:
|
| 487 |
+
highlight_style = "box-shadow: 0 0 0 2px #f39c12; background: linear-gradient(135deg, #fff9e6, #ffffff);"
|
| 488 |
+
|
| 489 |
+
html += f"""
|
| 490 |
+
<div style='margin-bottom: 8px; padding: 12px; background-color: #f8f9fa; border-radius: 6px; border-left: 4px solid {score_color}; {highlight_style}'>
|
| 491 |
+
<div style='display: flex; justify-content: space-between; align-items: center;'>
|
| 492 |
+
<div style='flex-grow: 1;'>
|
| 493 |
+
<strong style='color: #2c3e50; font-size: 1em;'>
|
| 494 |
+
{result['code']} - {result['title']}
|
| 495 |
+
{' ⭐' if is_high_score else ''}
|
| 496 |
+
</strong>
|
| 497 |
+
</div>
|
| 498 |
+
<span style='background-color: {score_color}; color: white; padding: 3px 8px; border-radius: 4px; font-size: 0.85em; font-weight: bold;'>
|
| 499 |
+
{result['score']:.3f}
|
| 500 |
+
</span>
|
| 501 |
+
</div>
|
| 502 |
+
{f"<div style='font-size: 0.9em; color: #666; margin-top: 8px; line-height: 1.4;'>{result['description'][:250]}{'...' if len(result.get('description', '')) > 250 else ''}</div>" if result.get('description') else ""}
|
| 503 |
+
</div>
|
| 504 |
+
"""
|
| 505 |
+
|
| 506 |
+
html += "</div></div>"
|
| 507 |
+
|
| 508 |
+
html += "</div></div>"
|
| 509 |
+
|
| 510 |
+
# Enhanced legend with chapter info
|
| 511 |
+
html += """
|
| 512 |
+
<div style='background-color: #f8f9fa; border-radius: 8px; padding: 15px; margin-top: 20px;'>
|
| 513 |
+
<h4 style='color: #2c3e50; margin-bottom: 15px;'>Enhanced Legend</h4>
|
| 514 |
+
|
| 515 |
+
<div style='margin-bottom: 15px;'>
|
| 516 |
+
<h5 style='color: #2c3e50; margin-bottom: 8px;'>Score Quality:</h5>
|
| 517 |
+
<div style='display: flex; flex-wrap: wrap; gap: 15px; align-items: center;'>
|
| 518 |
+
<div style='display: flex; align-items: center;'>
|
| 519 |
+
<div style='width: 20px; height: 20px; background-color: #27ae60; border-radius: 3px; margin-right: 8px;'></div>
|
| 520 |
+
<span style='font-size: 0.9em;'>Excellent Match (≥0.8)</span>
|
| 521 |
+
</div>
|
| 522 |
+
<div style='display: flex; align-items: center;'>
|
| 523 |
+
<div style='width: 20px; height: 20px; background-color: #f39c12; border-radius: 3px; margin-right: 8px;'></div>
|
| 524 |
+
<span style='font-size: 0.9em;'>Good Match (≥0.6)</span>
|
| 525 |
+
</div>
|
| 526 |
+
<div style='display: flex; align-items: center;'>
|
| 527 |
+
<div style='width: 20px; height: 20px; background-color: #e67e22; border-radius: 3px; margin-right: 8px;'></div>
|
| 528 |
+
<span style='font-size: 0.9em;'>Fair Match (≥0.4)</span>
|
| 529 |
+
</div>
|
| 530 |
+
<div style='display: flex; align-items: center;'>
|
| 531 |
+
<div style='width: 20px; height: 20px; background-color: #e74c3c; border-radius: 3px; margin-right: 8px;'></div>
|
| 532 |
+
<span style='font-size: 0.9em;'>Low Match (<0.4)</span>
|
| 533 |
+
</div>
|
| 534 |
+
</div>
|
| 535 |
+
</div>
|
| 536 |
+
|
| 537 |
+
<div>
|
| 538 |
+
<h5 style='color: #2c3e50; margin-bottom: 8px;'>Features:</h5>
|
| 539 |
+
<div style='display: flex; flex-wrap: wrap; gap: 20px; align-items: center; font-size: 0.9em;'>
|
| 540 |
+
<span>High-scoring codes (≥0.6)</span>
|
| 541 |
+
<span>Category grouping by ICD-10 structure</span>
|
| 542 |
+
<span>Chapter context and descriptions</span>
|
| 543 |
+
<span>Score-based category prioritization</span>
|
| 544 |
+
<span>Duplicate category codes filtered</span>
|
| 545 |
+
</div>
|
| 546 |
+
</div>
|
| 547 |
+
</div>
|
| 548 |
+
"""
|
| 549 |
+
|
| 550 |
+
return html
|
| 551 |
+
|
| 552 |
+
def _get_score_color(self, score: float) -> str:
|
| 553 |
+
"""Get color based on similarity score"""
|
| 554 |
+
if score >= 0.8:
|
| 555 |
+
return "#27ae60" # Green
|
| 556 |
+
elif score >= 0.6:
|
| 557 |
+
return "#f39c12" # Orange
|
| 558 |
+
elif score >= 0.4:
|
| 559 |
+
return "#e67e22" # Dark orange
|
| 560 |
+
else:
|
| 561 |
+
return "#e74c3c" # Red
|
| 562 |
+
|
| 563 |
+
def _get_category_color(self, max_score: float) -> str:
|
| 564 |
+
"""Get category header color based on highest score in category"""
|
| 565 |
+
if max_score >= 0.8:
|
| 566 |
+
return "#2ecc71" # Bright green
|
| 567 |
+
elif max_score >= 0.6:
|
| 568 |
+
return "#3498db" # Blue
|
| 569 |
+
elif max_score >= 0.4:
|
| 570 |
+
return "#9b59b6" # Purple
|
| 571 |
+
else:
|
| 572 |
+
return "#95a5a6" # Gray
|
| 573 |
+
|
| 574 |
+
def start_fastapi_server():
|
| 575 |
+
"""Start FastAPI server with enhanced error handling for Spaces"""
|
| 576 |
+
try:
|
| 577 |
+
logger.info("Starting FastAPI server...")
|
| 578 |
+
# Use environment variable for port if available
|
| 579 |
+
port = int(os.environ.get("FASTAPI_PORT", "8000"))
|
| 580 |
+
|
| 581 |
+
# Enhanced server configuration for Spaces
|
| 582 |
+
uvicorn.run(
|
| 583 |
+
fastapi_app,
|
| 584 |
+
host="127.0.0.1",
|
| 585 |
+
port=port,
|
| 586 |
+
log_level="info",
|
| 587 |
+
access_log=False, # Reduce log noise
|
| 588 |
+
workers=1, # Single worker for Spaces
|
| 589 |
+
timeout_keep_alive=30
|
| 590 |
+
)
|
| 591 |
+
except Exception as e:
|
| 592 |
+
logger.error(f"FastAPI server failed to start: {e}")
|
| 593 |
+
# Don't raise - let Gradio continue with error messages
|
| 594 |
+
|
| 595 |
+
def create_gradio_interface():
|
| 596 |
+
"""Create the Gradio interface with server status monitoring"""
|
| 597 |
+
search_interface = ICD10SearchInterface()
|
| 598 |
+
|
| 599 |
+
css = """
|
| 600 |
+
.gradio-container {
|
| 601 |
+
max-width: 1400px !important;
|
| 602 |
+
margin: auto !important;
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
.server-status {
|
| 606 |
+
transition: all 0.3s ease;
|
| 607 |
+
}
|
| 608 |
+
"""
|
| 609 |
+
|
| 610 |
+
with gr.Blocks(css=css, title="ICD-10 Smart Search", theme=gr.themes.Soft()) as demo:
|
| 611 |
+
gr.HTML("""
|
| 612 |
+
<div style='text-align: center; margin-bottom: 30px; padding: 25px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; box-shadow: 0 4px 8px rgba(0,0,0,0.1);'>
|
| 613 |
+
<h1 style='color: white; margin: 0; font-size: 2.5em;'>ICD-10 Smart Search</h1>
|
| 614 |
+
<p style='color: #f1f2f6; margin: 15px 0 0 0; font-size: 1.2em;'>Advanced diagnostic code search with AI-powered sentence analysis</p>
|
| 615 |
+
</div>
|
| 616 |
+
""")
|
| 617 |
+
|
| 618 |
+
# Server status indicator
|
| 619 |
+
def get_server_status():
|
| 620 |
+
is_ready, msg = search_interface.get_server_status()
|
| 621 |
+
if is_ready:
|
| 622 |
+
return "<div class='server-status' style='text-align: center; padding: 10px; background: #00b894; color: white; border-radius: 5px; margin-bottom: 20px;'>🟢 Server Ready</div>"
|
| 623 |
+
else:
|
| 624 |
+
return f"<div class='server-status' style='text-align: center; padding: 10px; background: #e17055; color: white; border-radius: 5px; margin-bottom: 20px;'>🔴 {msg}</div>"
|
| 625 |
+
|
| 626 |
+
server_status = gr.HTML(value=get_server_status())
|
| 627 |
+
|
| 628 |
+
with gr.Row():
|
| 629 |
+
with gr.Column(scale=1):
|
| 630 |
+
gr.HTML("<h3>Search Parameters</h3>")
|
| 631 |
+
|
| 632 |
+
query_input = gr.Textbox(
|
| 633 |
+
label="Diagnostic Query",
|
| 634 |
+
placeholder="Enter diagnostic description (e.g., 'chest pain with shortness of breath')",
|
| 635 |
+
lines=3,
|
| 636 |
+
value=""
|
| 637 |
+
)
|
| 638 |
+
|
| 639 |
+
with gr.Accordion("Advanced Options", open=False):
|
| 640 |
+
with gr.Row():
|
| 641 |
+
limit_input = gr.Slider(
|
| 642 |
+
label="Maximum Results per Sentence",
|
| 643 |
+
minimum=5,
|
| 644 |
+
maximum=50,
|
| 645 |
+
value=15,
|
| 646 |
+
step=5,
|
| 647 |
+
info="Higher values show more codes per category"
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
score_threshold_input = gr.Slider(
|
| 651 |
+
label="Score Threshold",
|
| 652 |
+
minimum=0.1,
|
| 653 |
+
maximum=0.9,
|
| 654 |
+
value=0.2,
|
| 655 |
+
step=0.05,
|
| 656 |
+
info="Lower values include more potential matches"
|
| 657 |
+
)
|
| 658 |
+
|
| 659 |
+
search_mode_input = gr.Dropdown(
|
| 660 |
+
label="Search Mode",
|
| 661 |
+
choices=["smart", "all_chapters", "specific_chapters"],
|
| 662 |
+
value="smart"
|
| 663 |
+
)
|
| 664 |
+
|
| 665 |
+
target_chapters_input = gr.Textbox(
|
| 666 |
+
label="Target Chapters (comma-separated)",
|
| 667 |
+
placeholder="e.g., chapter_9_IX, chapter_10_X",
|
| 668 |
+
visible=False
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
with gr.Row():
|
| 672 |
+
detailed_analysis_input = gr.Checkbox(
|
| 673 |
+
label="Include Detailed Analysis",
|
| 674 |
+
value=True
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
+
chapters_per_sentence_input = gr.Slider(
|
| 678 |
+
label="Chapters per Sentence",
|
| 679 |
+
minimum=1,
|
| 680 |
+
maximum=5,
|
| 681 |
+
value=3,
|
| 682 |
+
step=1
|
| 683 |
+
)
|
| 684 |
+
|
| 685 |
+
search_button = gr.Button("Search ICD-10 Codes", variant="primary", size="lg")
|
| 686 |
+
|
| 687 |
+
def update_target_chapters_visibility(search_mode):
|
| 688 |
+
return gr.update(visible=(search_mode == "specific_chapters"))
|
| 689 |
+
|
| 690 |
+
search_mode_input.change(
|
| 691 |
+
update_target_chapters_visibility,
|
| 692 |
+
inputs=search_mode_input,
|
| 693 |
+
outputs=target_chapters_input
|
| 694 |
+
)
|
| 695 |
+
|
| 696 |
+
with gr.Column(scale=2):
|
| 697 |
+
gr.HTML("<h3>Enhanced Category-Grouped Results</h3>")
|
| 698 |
+
sentence_results_output = gr.HTML(
|
| 699 |
+
value="<div style='text-align: center; color: #666; padding: 40px;'>Enter a diagnostic query and click search to see categorized results with chapter context.</div>"
|
| 700 |
+
)
|
| 701 |
+
|
| 702 |
+
# Example queries
|
| 703 |
+
gr.HTML("<h3>Example Queries</h3>")
|
| 704 |
+
|
| 705 |
+
example_queries = [
|
| 706 |
+
"acute myocardial infarction with chest pain",
|
| 707 |
+
"type 2 diabetes with diabetic nephropathy",
|
| 708 |
+
"major depressive disorder with anxiety",
|
| 709 |
+
"fracture of distal radius from fall",
|
| 710 |
+
"acute appendicitis with peritonitis",
|
| 711 |
+
"gestational diabetes in pregnancy",
|
| 712 |
+
"chronic kidney disease stage 3",
|
| 713 |
+
"essential hypertension with heart disease"
|
| 714 |
+
]
|
| 715 |
+
|
| 716 |
+
with gr.Row():
|
| 717 |
+
for i in range(0, len(example_queries), 2):
|
| 718 |
+
with gr.Column():
|
| 719 |
+
for j in range(2):
|
| 720 |
+
if i + j < len(example_queries):
|
| 721 |
+
example_btn = gr.Button(
|
| 722 |
+
example_queries[i + j],
|
| 723 |
+
variant="secondary",
|
| 724 |
+
size="sm"
|
| 725 |
+
)
|
| 726 |
+
example_btn.click(
|
| 727 |
+
lambda x=example_queries[i + j]: x,
|
| 728 |
+
outputs=query_input
|
| 729 |
+
)
|
| 730 |
+
|
| 731 |
+
# Search functionality
|
| 732 |
+
search_button.click(
|
| 733 |
+
fn=search_interface.search_icd10,
|
| 734 |
+
inputs=[
|
| 735 |
+
query_input,
|
| 736 |
+
limit_input,
|
| 737 |
+
score_threshold_input,
|
| 738 |
+
search_mode_input,
|
| 739 |
+
target_chapters_input,
|
| 740 |
+
detailed_analysis_input,
|
| 741 |
+
chapters_per_sentence_input
|
| 742 |
+
],
|
| 743 |
+
outputs=sentence_results_output
|
| 744 |
+
)
|
| 745 |
+
|
| 746 |
+
# Enhanced footer
|
| 747 |
+
gr.HTML("""
|
| 748 |
+
<div style='text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 12px; border: 1px solid #e9ecef;'>
|
| 749 |
+
<p style='margin: 0; color: #666; line-height: 1.6;'>
|
| 750 |
+
Powered by advanced semantic search and AI-driven sentence analysis<br>
|
| 751 |
+
<strong>Features:</strong> Chapter context • Category descriptions • Score-based prioritization<br>
|
| 752 |
+
<strong>Note:</strong> This tool is for research purposes only and should not replace professional medical diagnosis
|
| 753 |
+
</p>
|
| 754 |
+
</div>
|
| 755 |
+
""")
|
| 756 |
+
|
| 757 |
+
# Auto-refresh server status every 10 seconds
|
| 758 |
+
demo.load(get_server_status, outputs=server_status, every=10)
|
| 759 |
+
|
| 760 |
+
return demo
|
| 761 |
+
|
| 762 |
+
# Global variable to track server thread
|
| 763 |
+
server_thread = None
|
| 764 |
+
|
| 765 |
+
def graceful_shutdown():
|
| 766 |
+
"""Handle graceful shutdown"""
|
| 767 |
+
logger.info("Shutting down application...")
|
| 768 |
+
# Add any cleanup code here if needed
|
| 769 |
+
|
| 770 |
+
# Signal handlers for graceful shutdown
|
| 771 |
+
signal.signal(signal.SIGTERM, lambda signum, frame: graceful_shutdown())
|
| 772 |
+
signal.signal(signal.SIGINT, lambda signum, frame: graceful_shutdown())
|
| 773 |
+
|
| 774 |
+
# Main application entry point for Hugging Face Spaces
|
| 775 |
+
if __name__ == "__main__":
|
| 776 |
+
logger.info("Starting ICD-10 Search Application for Hugging Face Spaces...")
|
| 777 |
+
|
| 778 |
+
try:
|
| 779 |
+
# Start FastAPI server in background thread
|
| 780 |
+
logger.info("Initializing FastAPI server thread...")
|
| 781 |
+
server_thread = threading.Thread(target=start_fastapi_server, daemon=True)
|
| 782 |
+
server_thread.start()
|
| 783 |
+
logger.info("FastAPI server thread started")
|
| 784 |
+
|
| 785 |
+
# Give server time to start (increased for Spaces)
|
| 786 |
+
logger.info("Waiting for FastAPI server initialization...")
|
| 787 |
+
time.sleep(8) # Increased wait time for Spaces
|
| 788 |
+
|
| 789 |
+
# Create and launch Gradio interface
|
| 790 |
+
logger.info("Creating Gradio interface...")
|
| 791 |
+
demo = create_gradio_interface()
|
| 792 |
+
|
| 793 |
+
# Launch for Spaces environment
|
| 794 |
+
logger.info("Launching Gradio interface for Hugging Face Spaces...")
|
| 795 |
+
demo.launch(
|
| 796 |
+
share=False, # Don't create public link
|
| 797 |
+
show_error=True, # Show errors for debugging
|
| 798 |
+
show_tips=False, # Don't show Gradio tips
|
| 799 |
+
quiet=False, # Show startup info
|
| 800 |
+
server_name="0.0.0.0", # Listen on all interfaces for Spaces
|
| 801 |
+
server_port=7860, # Default Gradio port for Spaces
|
| 802 |
+
prevent_thread_lock=False,
|
| 803 |
+
root_path=os.environ.get("GRADIO_ROOT_PATH", "") # Support for Spaces routing
|
| 804 |
+
)
|
| 805 |
+
|
| 806 |
+
except Exception as e:
|
| 807 |
+
logger.error(f"Application failed to start: {e}")
|
| 808 |
+
sys.exit(1)
|
chapter_retrieval_system_v2.py
ADDED
|
@@ -0,0 +1,865 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from qdrant_client import QdrantClient
|
| 2 |
+
from qdrant_client.models import VectorParams, Distance, PointStruct
|
| 3 |
+
import numpy as np
|
| 4 |
+
from typing import List, Dict, Optional, Tuple, Set
|
| 5 |
+
from collections import Counter, defaultdict
|
| 6 |
+
from sentence_transformers import SentenceTransformer
|
| 7 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 8 |
+
import time
|
| 9 |
+
import re
|
| 10 |
+
import pprint
|
| 11 |
+
import os
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
|
| 14 |
+
# Load environment variables
|
| 15 |
+
load_dotenv()
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class MultiCollectionChapterRetrieval:
|
| 19 |
+
def __init__(self, use_cloud: bool = True):
|
| 20 |
+
"""
|
| 21 |
+
Initialize with Qdrant Cloud or local connection
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
use_cloud: If True, connects to Qdrant Cloud using environment variables
|
| 25 |
+
"""
|
| 26 |
+
if use_cloud:
|
| 27 |
+
self.client = self._create_cloud_client()
|
| 28 |
+
else:
|
| 29 |
+
self.client = QdrantClient("http://localhost:6333")
|
| 30 |
+
|
| 31 |
+
self.encoder = None
|
| 32 |
+
|
| 33 |
+
# ICD-10 Chapter mapping (all 22 chapters)
|
| 34 |
+
self.chapter_info = {
|
| 35 |
+
"chapter_1_I": "Certain infectious and parasitic diseases",
|
| 36 |
+
"chapter_2_II": "Neoplasms",
|
| 37 |
+
"chapter_3_III": "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism",
|
| 38 |
+
"chapter_4_IV": "Endocrine, nutritional and metabolic diseases",
|
| 39 |
+
"chapter_5_V": "Mental and behavioural disorders",
|
| 40 |
+
"chapter_6_VI": "Diseases of the nervous system",
|
| 41 |
+
"chapter_7_VII": "Diseases of the eye and adnexa",
|
| 42 |
+
"chapter_8_VIII": "Diseases of the ear and mastoid process",
|
| 43 |
+
"chapter_9_IX": "Diseases of the circulatory system",
|
| 44 |
+
"chapter_10_X": "Diseases of the respiratory system",
|
| 45 |
+
"chapter_11_XI": "Diseases of the digestive system",
|
| 46 |
+
"chapter_12_XII": "Diseases of the skin and subcutaneous tissue",
|
| 47 |
+
"chapter_13_XIII": "Diseases of the musculoskeletal system and connective tissue",
|
| 48 |
+
"chapter_14_XIV": "Diseases of the genitourinary system",
|
| 49 |
+
"chapter_15_XV": "Pregnancy, childbirth and the puerperium",
|
| 50 |
+
"chapter_16_XVI": "Certain conditions originating in the perinatal period",
|
| 51 |
+
"chapter_17_XVII": "Congenital malformations, deformations and chromosomal abnormalities",
|
| 52 |
+
"chapter_18_XVIII": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified",
|
| 53 |
+
"chapter_19_XIX": "Injury, poisoning and certain other consequences of external causes",
|
| 54 |
+
"chapter_20_XX": "External causes of morbidity and mortality",
|
| 55 |
+
"chapter_21_XXI": "Factors influencing health status and contact with health services",
|
| 56 |
+
"chapter_22_XXII": "Codes for special purposes"
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# Cache for collection names
|
| 60 |
+
self._chapter_collections = None
|
| 61 |
+
|
| 62 |
+
def _create_cloud_client(self) -> QdrantClient:
|
| 63 |
+
"""Create Qdrant Cloud client with authentication"""
|
| 64 |
+
qdrant_url = os.getenv('QDRANT_URL')
|
| 65 |
+
qdrant_api_key = os.getenv('QDRANT_API_KEY')
|
| 66 |
+
|
| 67 |
+
if not qdrant_url or not qdrant_api_key:
|
| 68 |
+
raise ValueError(
|
| 69 |
+
"Qdrant Cloud credentials not found in environment variables.\n"
|
| 70 |
+
"Please set QDRANT_URL and QDRANT_API_KEY in your .env file:\n"
|
| 71 |
+
"QDRANT_URL=https://your-cluster-id.region.aws.cloud.qdrant.io:6333\n"
|
| 72 |
+
"QDRANT_API_KEY=your-api-key-here"
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
print(f"🔗 Connecting to Qdrant Cloud: {qdrant_url}")
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
client = QdrantClient(
|
| 79 |
+
url=qdrant_url,
|
| 80 |
+
api_key=qdrant_api_key,
|
| 81 |
+
timeout=60, # Increased timeout for cloud
|
| 82 |
+
# Optional: Add additional cloud-specific settings
|
| 83 |
+
prefer_grpc=True, # Use gRPC for better performance
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
# Test connection
|
| 87 |
+
collections = client.get_collections()
|
| 88 |
+
print(f"✅ Connected successfully! Found {len(collections.collections)} collections")
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
return client
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"❌ Failed to connect to Qdrant Cloud: {e}")
|
| 95 |
+
print("Please check your QDRANT_URL and QDRANT_API_KEY in the .env file")
|
| 96 |
+
raise
|
| 97 |
+
|
| 98 |
+
def split_into_sentences(self, text: str) -> List[str]:
|
| 99 |
+
"""Split text into sentences using simple rules"""
|
| 100 |
+
import re
|
| 101 |
+
|
| 102 |
+
# Simple sentence splitting - you can enhance this with nltk or spacy if needed
|
| 103 |
+
sentences = re.split(r'[.!?]+', text)
|
| 104 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 105 |
+
return sentences
|
| 106 |
+
|
| 107 |
+
def load_encoder(self, model_name: str = "all-MiniLM-L6-v2"):
|
| 108 |
+
"""Load the sentence transformer model"""
|
| 109 |
+
if self.encoder is None:
|
| 110 |
+
print(f"📥 Loading encoder: {model_name}")
|
| 111 |
+
self.encoder = SentenceTransformer(model_name)
|
| 112 |
+
print(f"✅ Encoder loaded successfully")
|
| 113 |
+
|
| 114 |
+
def encode_query(self, query: str) -> List[float]:
|
| 115 |
+
"""Encode diagnostic string to vector"""
|
| 116 |
+
if self.encoder is None:
|
| 117 |
+
self.load_encoder()
|
| 118 |
+
return self.encoder.encode([query])[0].tolist()
|
| 119 |
+
|
| 120 |
+
def get_chapter_collections(self) -> Dict[str, str]:
|
| 121 |
+
"""
|
| 122 |
+
Get mapping of chapter_id -> collection_name
|
| 123 |
+
Discovers collections automatically based on naming patterns
|
| 124 |
+
"""
|
| 125 |
+
if self._chapter_collections is not None:
|
| 126 |
+
return self._chapter_collections
|
| 127 |
+
|
| 128 |
+
try:
|
| 129 |
+
collections = self.client.get_collections()
|
| 130 |
+
chapter_collections = {}
|
| 131 |
+
|
| 132 |
+
print("🔍 Discovering chapter collections...")
|
| 133 |
+
|
| 134 |
+
for collection in collections.collections:
|
| 135 |
+
collection_name = collection.name
|
| 136 |
+
|
| 137 |
+
# Try to match collection names to chapters
|
| 138 |
+
chapter_match = None
|
| 139 |
+
|
| 140 |
+
# Pattern 1: icd10_chapter_X_Y or chapter_X_Y
|
| 141 |
+
pattern1 = re.search(r'chapter[_-]?(\d+)[_-]?([IVX]+)', collection_name, re.IGNORECASE)
|
| 142 |
+
if pattern1:
|
| 143 |
+
chapter_num = pattern1.group(1)
|
| 144 |
+
roman = pattern1.group(2)
|
| 145 |
+
chapter_match = f"chapter_{chapter_num}_{roman}"
|
| 146 |
+
|
| 147 |
+
# Pattern 2: Single collection with all chapters (e.g., icd10_codes_all_chapters)
|
| 148 |
+
elif 'all' in collection_name.lower() and ('chapter' in collection_name.lower() or 'icd' in collection_name.lower()):
|
| 149 |
+
print(f" 📚 Found unified collection: {collection_name}")
|
| 150 |
+
# For unified collections, we'll handle this differently
|
| 151 |
+
chapter_collections['unified_collection'] = collection_name
|
| 152 |
+
continue
|
| 153 |
+
|
| 154 |
+
# Pattern 3: Just the chapter part (chapter1, chapterI, etc.)
|
| 155 |
+
elif 'chapter' in collection_name.lower():
|
| 156 |
+
numbers = re.findall(r'\d+', collection_name)
|
| 157 |
+
romans = re.findall(r'[IVX]+', collection_name)
|
| 158 |
+
|
| 159 |
+
if numbers and romans:
|
| 160 |
+
chapter_match = f"chapter_{numbers[0]}_{romans[0]}"
|
| 161 |
+
elif numbers:
|
| 162 |
+
# Try to convert number to roman numeral
|
| 163 |
+
num = int(numbers[0])
|
| 164 |
+
roman_map = {1: 'I', 2: 'II', 3: 'III', 4: 'IV', 5: 'V', 6: 'VI', 7: 'VII',
|
| 165 |
+
8: 'VIII', 9: 'IX', 10: 'X', 11: 'XI', 12: 'XII', 13: 'XIII',
|
| 166 |
+
14: 'XIV', 15: 'XV', 16: 'XVI', 17: 'XVII', 18: 'XVIII', 19: 'XIX',
|
| 167 |
+
20: 'XX', 21: 'XXI', 22: 'XXII'}
|
| 168 |
+
if num in roman_map:
|
| 169 |
+
chapter_match = f"chapter_{num}_{roman_map[num]}"
|
| 170 |
+
|
| 171 |
+
if chapter_match:
|
| 172 |
+
chapter_collections[chapter_match] = collection_name
|
| 173 |
+
print(f" ✓ {chapter_match} -> {collection_name}")
|
| 174 |
+
|
| 175 |
+
print(f"📊 Found {len(chapter_collections)} chapter collections")
|
| 176 |
+
|
| 177 |
+
# If we only found a unified collection, we'll need to handle searches differently
|
| 178 |
+
if len(chapter_collections) == 1 and 'unified_collection' in chapter_collections:
|
| 179 |
+
print("⚠️ Only unified collection found. Searches will use chapter filtering.")
|
| 180 |
+
|
| 181 |
+
self._chapter_collections = chapter_collections
|
| 182 |
+
return chapter_collections
|
| 183 |
+
|
| 184 |
+
except Exception as e:
|
| 185 |
+
print(f"❌ Error discovering collections: {e}")
|
| 186 |
+
return {}
|
| 187 |
+
|
| 188 |
+
def search_single_collection(
|
| 189 |
+
self,
|
| 190 |
+
collection_name: str,
|
| 191 |
+
query_vector: List[float],
|
| 192 |
+
limit: int = 20,
|
| 193 |
+
score_threshold: float = 0.3,
|
| 194 |
+
chapter_filter: Optional[str] = None
|
| 195 |
+
) -> List[Dict]:
|
| 196 |
+
"""Search a single collection and return formatted results"""
|
| 197 |
+
try:
|
| 198 |
+
# Build search parameters
|
| 199 |
+
search_params = {
|
| 200 |
+
"collection_name": collection_name,
|
| 201 |
+
"query_vector": query_vector,
|
| 202 |
+
"limit": limit,
|
| 203 |
+
"score_threshold": score_threshold
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
results = self.client.search(**search_params)
|
| 207 |
+
|
| 208 |
+
formatted_results = []
|
| 209 |
+
for result in results:
|
| 210 |
+
formatted_results.append({
|
| 211 |
+
'collection': collection_name,
|
| 212 |
+
'score': result.score,
|
| 213 |
+
'id': result.id,
|
| 214 |
+
'payload': result.payload
|
| 215 |
+
})
|
| 216 |
+
|
| 217 |
+
return formatted_results
|
| 218 |
+
|
| 219 |
+
except Exception as e:
|
| 220 |
+
print(f"❌ Error searching {collection_name}: {e}")
|
| 221 |
+
if "timeout" in str(e).lower():
|
| 222 |
+
print(" This might be due to network issues. Retrying with lower limit...")
|
| 223 |
+
try:
|
| 224 |
+
# Retry with reduced parameters
|
| 225 |
+
search_params["limit"] = min(limit, 10)
|
| 226 |
+
search_params["score_threshold"] = max(score_threshold, 0.5)
|
| 227 |
+
results = self.client.search(**search_params)
|
| 228 |
+
|
| 229 |
+
formatted_results = []
|
| 230 |
+
for result in results:
|
| 231 |
+
formatted_results.append({
|
| 232 |
+
'collection': collection_name,
|
| 233 |
+
'score': result.score,
|
| 234 |
+
'id': result.id,
|
| 235 |
+
'payload': result.payload
|
| 236 |
+
})
|
| 237 |
+
return formatted_results
|
| 238 |
+
except:
|
| 239 |
+
pass
|
| 240 |
+
return []
|
| 241 |
+
|
| 242 |
+
def analyze_chapters_parallel(
|
| 243 |
+
self,
|
| 244 |
+
diagnostic_string: str,
|
| 245 |
+
sample_size_per_chapter: int = 15,
|
| 246 |
+
score_threshold: float = 0.3,
|
| 247 |
+
max_workers: int = 4 # Reduced for cloud stability
|
| 248 |
+
) -> Dict[str, Dict]:
|
| 249 |
+
"""
|
| 250 |
+
Analyze all chapter collections in parallel to determine relevance
|
| 251 |
+
Optimized for cloud performance
|
| 252 |
+
"""
|
| 253 |
+
query_vector = self.encode_query(diagnostic_string)
|
| 254 |
+
chapter_collections = self.get_chapter_collections()
|
| 255 |
+
|
| 256 |
+
if not chapter_collections:
|
| 257 |
+
print("❌ No chapter collections found!")
|
| 258 |
+
return {}
|
| 259 |
+
|
| 260 |
+
print(f"\n🔍 Analyzing diagnostic: '{diagnostic_string}'")
|
| 261 |
+
|
| 262 |
+
# Handle unified collection differently
|
| 263 |
+
# if 'unified_collection' in chapter_collections:
|
| 264 |
+
# return self._analyze_unified_collection(
|
| 265 |
+
# diagnostic_string, query_vector,
|
| 266 |
+
# chapter_collections['unified_collection'],
|
| 267 |
+
# sample_size_per_chapter, score_threshold
|
| 268 |
+
# )
|
| 269 |
+
|
| 270 |
+
print(f"🔄 Searching {len(chapter_collections)} collections in parallel...")
|
| 271 |
+
|
| 272 |
+
chapter_analysis = {}
|
| 273 |
+
|
| 274 |
+
def search_chapter(chapter_id: str, collection_name: str) -> Tuple[str, List[Dict]]:
|
| 275 |
+
"""Search function for parallel execution with retry logic"""
|
| 276 |
+
max_retries = 2
|
| 277 |
+
for attempt in range(max_retries):
|
| 278 |
+
try:
|
| 279 |
+
results = self.search_single_collection(
|
| 280 |
+
collection_name, query_vector, sample_size_per_chapter, score_threshold
|
| 281 |
+
)
|
| 282 |
+
return chapter_id, results
|
| 283 |
+
except Exception as e:
|
| 284 |
+
if attempt < max_retries - 1:
|
| 285 |
+
print(f" ⚠️ Retry {attempt + 1} for {chapter_id}: {e}")
|
| 286 |
+
time.sleep(1) # Brief delay before retry
|
| 287 |
+
else:
|
| 288 |
+
print(f" ❌ Failed {chapter_id} after {max_retries} attempts: {e}")
|
| 289 |
+
return chapter_id, []
|
| 290 |
+
|
| 291 |
+
# Execute searches in parallel
|
| 292 |
+
start_time = time.time()
|
| 293 |
+
|
| 294 |
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 295 |
+
# Submit all search tasks
|
| 296 |
+
future_to_chapter = {
|
| 297 |
+
executor.submit(search_chapter, chapter_id, collection_name): chapter_id
|
| 298 |
+
for chapter_id, collection_name in chapter_collections.items()
|
| 299 |
+
if chapter_id != 'unified_collection'
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
# Collect results as they complete
|
| 303 |
+
for future in as_completed(future_to_chapter):
|
| 304 |
+
chapter_id = future_to_chapter[future]
|
| 305 |
+
try:
|
| 306 |
+
chapter_id, results = future.result(timeout=30) # 30 second timeout per search
|
| 307 |
+
|
| 308 |
+
if results:
|
| 309 |
+
scores = [r['score'] for r in results]
|
| 310 |
+
|
| 311 |
+
# Calculate chapter statistics
|
| 312 |
+
chapter_analysis[chapter_id] = {
|
| 313 |
+
'collection_name': chapter_collections[chapter_id],
|
| 314 |
+
'match_count': len(results),
|
| 315 |
+
'max_score': max(scores),
|
| 316 |
+
'avg_score': np.mean(scores),
|
| 317 |
+
'median_score': np.median(scores),
|
| 318 |
+
'min_score': min(scores),
|
| 319 |
+
'score_std': np.std(scores),
|
| 320 |
+
'top_matches': sorted(results, key=lambda x: x['score'], reverse=True)[:5],
|
| 321 |
+
'all_results': results
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
# Calculate relevance score (weighted combination of metrics)
|
| 325 |
+
relevance = (
|
| 326 |
+
chapter_analysis[chapter_id]['avg_score'] * 0.4 +
|
| 327 |
+
chapter_analysis[chapter_id]['max_score'] * 0.3 +
|
| 328 |
+
min(len(results) / sample_size_per_chapter, 1.0) * 0.2 +
|
| 329 |
+
(1.0 / (1.0 + chapter_analysis[chapter_id]['score_std'])) * 0.1
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
chapter_analysis[chapter_id]['relevance_score'] = relevance
|
| 333 |
+
|
| 334 |
+
# print(f" ✅ {chapter_id}: {len(results)} matches, relevance: {relevance:.4f}")
|
| 335 |
+
# else:
|
| 336 |
+
# print(f" ➖ {chapter_id}: No matches above threshold")
|
| 337 |
+
|
| 338 |
+
except Exception as e:
|
| 339 |
+
print(f" ❌ {chapter_id}: Error - {e}")
|
| 340 |
+
|
| 341 |
+
elapsed = time.time() - start_time
|
| 342 |
+
print(f"⏱️ Parallel analysis completed in {elapsed:.2f} seconds")
|
| 343 |
+
|
| 344 |
+
# Sort by relevance score
|
| 345 |
+
sorted_analysis = dict(sorted(
|
| 346 |
+
chapter_analysis.items(),
|
| 347 |
+
key=lambda x: x[1]['relevance_score'],
|
| 348 |
+
reverse=True
|
| 349 |
+
))
|
| 350 |
+
|
| 351 |
+
return sorted_analysis
|
| 352 |
+
|
| 353 |
+
def _analyze_unified_collection(
|
| 354 |
+
self,
|
| 355 |
+
diagnostic_string: str,
|
| 356 |
+
query_vector: List[float],
|
| 357 |
+
collection_name: str,
|
| 358 |
+
sample_size_per_chapter: int,
|
| 359 |
+
score_threshold: float
|
| 360 |
+
) -> Dict[str, Dict]:
|
| 361 |
+
"""Analyze unified collection by searching with chapter filters"""
|
| 362 |
+
print(f"🔄 Analyzing unified collection: {collection_name}")
|
| 363 |
+
|
| 364 |
+
chapter_analysis = {}
|
| 365 |
+
|
| 366 |
+
# Search each chapter in the unified collection
|
| 367 |
+
for chapter_id in self.chapter_info.keys():
|
| 368 |
+
try:
|
| 369 |
+
results = self.search_single_collection(
|
| 370 |
+
collection_name, query_vector, sample_size_per_chapter,
|
| 371 |
+
score_threshold, chapter_filter=chapter_id
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
if results:
|
| 375 |
+
scores = [r['score'] for r in results]
|
| 376 |
+
|
| 377 |
+
chapter_analysis[chapter_id] = {
|
| 378 |
+
'collection_name': collection_name,
|
| 379 |
+
'match_count': len(results),
|
| 380 |
+
'max_score': max(scores),
|
| 381 |
+
'avg_score': np.mean(scores),
|
| 382 |
+
'median_score': np.median(scores),
|
| 383 |
+
'min_score': min(scores),
|
| 384 |
+
'score_std': np.std(scores),
|
| 385 |
+
'top_matches': sorted(results, key=lambda x: x['score'], reverse=True)[:5],
|
| 386 |
+
'all_results': results
|
| 387 |
+
}
|
| 388 |
+
|
| 389 |
+
# Calculate relevance score
|
| 390 |
+
relevance = (
|
| 391 |
+
chapter_analysis[chapter_id]['avg_score'] * 0.4 +
|
| 392 |
+
chapter_analysis[chapter_id]['max_score'] * 0.3 +
|
| 393 |
+
min(len(results) / sample_size_per_chapter, 1.0) * 0.2 +
|
| 394 |
+
(1.0 / (1.0 + chapter_analysis[chapter_id]['score_std'])) * 0.1
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
chapter_analysis[chapter_id]['relevance_score'] = relevance
|
| 398 |
+
print(f" ✅ {chapter_id}: {len(results)} matches, relevance: {relevance:.4f}")
|
| 399 |
+
else:
|
| 400 |
+
print(f" ➖ {chapter_id}: No matches above threshold")
|
| 401 |
+
|
| 402 |
+
# Small delay to avoid overwhelming the cloud service
|
| 403 |
+
time.sleep(0.1)
|
| 404 |
+
|
| 405 |
+
except Exception as e:
|
| 406 |
+
print(f" ❌ {chapter_id}: Error - {e}")
|
| 407 |
+
|
| 408 |
+
# Sort by relevance score
|
| 409 |
+
return dict(sorted(
|
| 410 |
+
chapter_analysis.items(),
|
| 411 |
+
key=lambda x: x[1]['relevance_score'],
|
| 412 |
+
reverse=True
|
| 413 |
+
))
|
| 414 |
+
|
| 415 |
+
def get_top_chapters(
|
| 416 |
+
self,
|
| 417 |
+
diagnostic_string: str,
|
| 418 |
+
top_n: int = 5,
|
| 419 |
+
min_relevance: float = 0.1
|
| 420 |
+
) -> List[Tuple[str, float, str]]:
|
| 421 |
+
"""
|
| 422 |
+
Get top N most relevant chapters for a diagnostic string
|
| 423 |
+
Returns: [(chapter_id, relevance_score, description)]
|
| 424 |
+
"""
|
| 425 |
+
analysis = self.analyze_chapters_parallel(diagnostic_string)
|
| 426 |
+
|
| 427 |
+
top_chapters = []
|
| 428 |
+
for chapter_id, stats in analysis.items():
|
| 429 |
+
relevance = stats['relevance_score']
|
| 430 |
+
|
| 431 |
+
if relevance >= min_relevance and len(top_chapters) < top_n:
|
| 432 |
+
description = self.chapter_info.get(chapter_id, "Unknown chapter")
|
| 433 |
+
top_chapters.append((chapter_id, relevance, description))
|
| 434 |
+
|
| 435 |
+
return top_chapters
|
| 436 |
+
|
| 437 |
+
def search_targeted_chapters(
|
| 438 |
+
self,
|
| 439 |
+
diagnostic_string: str,
|
| 440 |
+
target_chapters: List[str] = None,
|
| 441 |
+
results_per_chapter: int = 10, # Keep for backward compatibility
|
| 442 |
+
results_per_sentence: int = 3,
|
| 443 |
+
chapters_per_sentence: int = 2 # New parameter: how many top chapters to search per sentence
|
| 444 |
+
) -> Dict[str, Dict[str, List[Dict]]]:
|
| 445 |
+
"""
|
| 446 |
+
Search only specific chapters or auto-identify top chapters for each sentence individually.
|
| 447 |
+
Now searches only the most relevant chapters for each specific sentence.
|
| 448 |
+
"""
|
| 449 |
+
print(f"\n=== STARTING search_targeted_chapters ===")
|
| 450 |
+
print(f"Input parameters:")
|
| 451 |
+
print(f" diagnostic_string: '{diagnostic_string[:100]}{'...' if len(diagnostic_string) > 100 else ''}'")
|
| 452 |
+
print(f" target_chapters: {target_chapters}")
|
| 453 |
+
print(f" results_per_sentence: {results_per_sentence}")
|
| 454 |
+
print(f" chapters_per_sentence: {chapters_per_sentence}")
|
| 455 |
+
|
| 456 |
+
# Split input into sentences first
|
| 457 |
+
print(f"\n--- SENTENCE SPLITTING ---")
|
| 458 |
+
sentences = self.split_into_sentences(diagnostic_string)
|
| 459 |
+
print(f"Split into {len(sentences)} sentences:")
|
| 460 |
+
for i, sentence in enumerate(sentences):
|
| 461 |
+
print(f" [{i+1}]: '{sentence}'")
|
| 462 |
+
|
| 463 |
+
print(f"\n--- GETTING CHAPTER COLLECTIONS ---")
|
| 464 |
+
chapter_collections = self.get_chapter_collections()
|
| 465 |
+
print(f"Available chapter collections: {len(chapter_collections)} total")
|
| 466 |
+
print(f"Chapter IDs: {list(chapter_collections.keys())}")
|
| 467 |
+
|
| 468 |
+
results = {}
|
| 469 |
+
|
| 470 |
+
if target_chapters is None:
|
| 471 |
+
print(f"\n=== AUTO-IDENTIFICATION MODE ===")
|
| 472 |
+
print("Auto-identifying most relevant chapters for each sentence individually...")
|
| 473 |
+
|
| 474 |
+
for i, sentence in enumerate(sentences):
|
| 475 |
+
if sentence.strip(): # Skip empty sentences
|
| 476 |
+
sentence_key = f"sentence_{i+1}"
|
| 477 |
+
print(f"\n--- Processing sentence {i+1} ---")
|
| 478 |
+
print(f"Sentence: '{sentence}'")
|
| 479 |
+
print(f"Sentence key: {sentence_key}")
|
| 480 |
+
|
| 481 |
+
# Get top chapters specifically for THIS sentence
|
| 482 |
+
print(f"Getting top {chapters_per_sentence} chapters for this sentence...")
|
| 483 |
+
try:
|
| 484 |
+
sentence_top_chapters = self.get_top_chapters(
|
| 485 |
+
sentence,
|
| 486 |
+
top_n=chapters_per_sentence,
|
| 487 |
+
min_relevance=0.05
|
| 488 |
+
)
|
| 489 |
+
print(f"Found {len(sentence_top_chapters)} relevant chapters:")
|
| 490 |
+
for j, (ch_id, rel, desc) in enumerate(sentence_top_chapters):
|
| 491 |
+
print(f" [{j+1}] {ch_id}: {rel:.4f} - {desc}")
|
| 492 |
+
except Exception as e:
|
| 493 |
+
print(f"ERROR in get_top_chapters: {e}")
|
| 494 |
+
sentence_top_chapters = []
|
| 495 |
+
|
| 496 |
+
# Search only the relevant chapters for this specific sentence
|
| 497 |
+
print(f"Searching in {len(sentence_top_chapters)} selected chapters...")
|
| 498 |
+
for chapter_id, relevance, description in sentence_top_chapters:
|
| 499 |
+
print(f"\n >> Searching chapter: {chapter_id} (relevance: {relevance:.4f})")
|
| 500 |
+
|
| 501 |
+
if chapter_id in chapter_collections:
|
| 502 |
+
collection_name = chapter_collections[chapter_id]
|
| 503 |
+
print(f" Collection name: {collection_name}")
|
| 504 |
+
|
| 505 |
+
# Initialize chapter in results if not exists
|
| 506 |
+
if chapter_id not in results:
|
| 507 |
+
results[chapter_id] = {}
|
| 508 |
+
print(f" Initialized results dict for chapter {chapter_id}")
|
| 509 |
+
|
| 510 |
+
# Search this sentence in this specific chapter
|
| 511 |
+
try:
|
| 512 |
+
print(f" Encoding query for sentence...")
|
| 513 |
+
query_vector = self.encode_query(sentence)
|
| 514 |
+
print(f" Query vector shape: {getattr(query_vector, 'shape', 'N/A')}")
|
| 515 |
+
|
| 516 |
+
print(f" Searching collection '{collection_name}' for top {results_per_sentence} results...")
|
| 517 |
+
sentence_results = self.search_single_collection(
|
| 518 |
+
collection_name, query_vector, results_per_sentence
|
| 519 |
+
)
|
| 520 |
+
print(f" Raw search returned {len(sentence_results) if sentence_results else 0} results")
|
| 521 |
+
|
| 522 |
+
except Exception as e:
|
| 523 |
+
print(f" ERROR during search: {e}")
|
| 524 |
+
sentence_results = []
|
| 525 |
+
|
| 526 |
+
if sentence_results:
|
| 527 |
+
results[chapter_id][sentence_key] = {
|
| 528 |
+
'text': sentence,
|
| 529 |
+
'chapter_relevance': relevance,
|
| 530 |
+
'results': sentence_results
|
| 531 |
+
}
|
| 532 |
+
print(f" ✓ Stored {len(sentence_results)} results for {chapter_id}[{sentence_key}]")
|
| 533 |
+
|
| 534 |
+
# Debug: show top result scores
|
| 535 |
+
if sentence_results:
|
| 536 |
+
top_scores = [r.get('score', 'N/A') for r in sentence_results[:3]]
|
| 537 |
+
print(f" Top 3 scores: {top_scores}")
|
| 538 |
+
else:
|
| 539 |
+
print(f" ✗ No results above threshold for {chapter_id}")
|
| 540 |
+
else:
|
| 541 |
+
print(f" ERROR: Chapter {chapter_id} collection not found in available collections")
|
| 542 |
+
else:
|
| 543 |
+
print(f"\n--- Skipping empty sentence {i+1} ---")
|
| 544 |
+
|
| 545 |
+
else:
|
| 546 |
+
print(f"\n=== PRE-SPECIFIED CHAPTERS MODE ===")
|
| 547 |
+
print(f"Using pre-specified chapters: {target_chapters}")
|
| 548 |
+
|
| 549 |
+
# Validate chapters exist
|
| 550 |
+
valid_chapters = []
|
| 551 |
+
invalid_chapters = []
|
| 552 |
+
for chapter_id in target_chapters:
|
| 553 |
+
if chapter_id in chapter_collections:
|
| 554 |
+
valid_chapters.append(chapter_id)
|
| 555 |
+
else:
|
| 556 |
+
invalid_chapters.append(chapter_id)
|
| 557 |
+
|
| 558 |
+
print(f"Valid chapters: {valid_chapters}")
|
| 559 |
+
if invalid_chapters:
|
| 560 |
+
print(f"WARNING: Invalid chapters (will be skipped): {invalid_chapters}")
|
| 561 |
+
|
| 562 |
+
for chapter_id in valid_chapters:
|
| 563 |
+
collection_name = chapter_collections[chapter_id]
|
| 564 |
+
print(f"\n--- Searching chapter: {chapter_id} ---")
|
| 565 |
+
print(f"Collection name: {collection_name}")
|
| 566 |
+
|
| 567 |
+
chapter_results = {}
|
| 568 |
+
|
| 569 |
+
# Search each sentence in this chapter
|
| 570 |
+
for i, sentence in enumerate(sentences):
|
| 571 |
+
if sentence.strip(): # Skip empty sentences
|
| 572 |
+
sentence_key = f"sentence_{i+1}"
|
| 573 |
+
print(f"\n >> Processing sentence {i+1} in {chapter_id}")
|
| 574 |
+
print(f" Sentence: '{sentence}'")
|
| 575 |
+
|
| 576 |
+
try:
|
| 577 |
+
print(f" Encoding query...")
|
| 578 |
+
query_vector = self.encode_query(sentence)
|
| 579 |
+
print(f" Query vector shape: {getattr(query_vector, 'shape', 'N/A')}")
|
| 580 |
+
|
| 581 |
+
print(f" Searching for top {results_per_sentence} results...")
|
| 582 |
+
sentence_results = self.search_single_collection(
|
| 583 |
+
collection_name, query_vector, results_per_sentence
|
| 584 |
+
)
|
| 585 |
+
print(f" Found {len(sentence_results) if sentence_results else 0} results")
|
| 586 |
+
|
| 587 |
+
except Exception as e:
|
| 588 |
+
print(f" ERROR during search: {e}")
|
| 589 |
+
sentence_results = []
|
| 590 |
+
|
| 591 |
+
if sentence_results:
|
| 592 |
+
chapter_results[sentence_key] = {
|
| 593 |
+
'text': sentence,
|
| 594 |
+
'chapter_relevance': None, # Not calculated for pre-specified chapters
|
| 595 |
+
'results': sentence_results
|
| 596 |
+
}
|
| 597 |
+
print(f" ✓ Stored results for sentence {i+1}")
|
| 598 |
+
|
| 599 |
+
# Debug: show top result scores
|
| 600 |
+
top_scores = [r.get('score', 'N/A') for r in sentence_results[:3]]
|
| 601 |
+
print(f" Top 3 scores: {top_scores}")
|
| 602 |
+
else:
|
| 603 |
+
print(f" ✗ No results found for sentence {i+1}")
|
| 604 |
+
else:
|
| 605 |
+
print(f" >> Skipping empty sentence {i+1}")
|
| 606 |
+
|
| 607 |
+
if chapter_results:
|
| 608 |
+
results[chapter_id] = chapter_results
|
| 609 |
+
print(f"\n ✓ Chapter {chapter_id}: Stored results for {len(chapter_results)} sentences")
|
| 610 |
+
else:
|
| 611 |
+
print(f"\n ✗ Chapter {chapter_id}: No results found")
|
| 612 |
+
|
| 613 |
+
# Final summary
|
| 614 |
+
print(f"\n=== SEARCH COMPLETE ===")
|
| 615 |
+
print(f"Results summary:")
|
| 616 |
+
total_results = 0
|
| 617 |
+
for chapter_id, chapter_data in results.items():
|
| 618 |
+
sentence_count = len(chapter_data)
|
| 619 |
+
result_count = sum(len(sent_data.get('results', [])) for sent_data in chapter_data.values())
|
| 620 |
+
total_results += result_count
|
| 621 |
+
print(f" {chapter_id}: {sentence_count} sentences, {result_count} total results")
|
| 622 |
+
|
| 623 |
+
print(f"Grand total: {len(results)} chapters, {total_results} results")
|
| 624 |
+
print(f"=== END search_targeted_chapters ===\n")
|
| 625 |
+
|
| 626 |
+
return results
|
| 627 |
+
|
| 628 |
+
def format_chapter_analysis(self, diagnostic_string: str, detailed: bool = True) -> str:
|
| 629 |
+
"""Format comprehensive chapter analysis"""
|
| 630 |
+
analysis = self.analyze_chapters_parallel(diagnostic_string)
|
| 631 |
+
|
| 632 |
+
if not analysis:
|
| 633 |
+
return "❌ No relevant chapters found."
|
| 634 |
+
|
| 635 |
+
output = []
|
| 636 |
+
output.append(f"\n{'='*90}")
|
| 637 |
+
output.append(f"📊 CHAPTER RELEVANCE ANALYSIS")
|
| 638 |
+
output.append(f"🔍 Diagnostic: '{diagnostic_string}'")
|
| 639 |
+
output.append(f"{'='*90}")
|
| 640 |
+
|
| 641 |
+
for i, (chapter_id, stats) in enumerate(analysis.items(), 1):
|
| 642 |
+
if stats['relevance_score'] < 0.05: # Skip very low relevance
|
| 643 |
+
continue
|
| 644 |
+
|
| 645 |
+
description = self.chapter_info.get(chapter_id, "Unknown chapter")
|
| 646 |
+
|
| 647 |
+
output.append(f"\n{i}. 📚 {chapter_id.upper()}")
|
| 648 |
+
output.append(f" 🏷️ Collection: {stats['collection_name']}")
|
| 649 |
+
output.append(f" 📖 Description: {description}")
|
| 650 |
+
output.append(f" ⭐ Relevance Score: {stats['relevance_score']:.4f}")
|
| 651 |
+
output.append(f" 📊 Statistics:")
|
| 652 |
+
output.append(f" • Matches: {stats['match_count']}")
|
| 653 |
+
output.append(f" • Max Score: {stats['max_score']:.4f}")
|
| 654 |
+
output.append(f" • Avg Score: {stats['avg_score']:.4f}")
|
| 655 |
+
output.append(f" • Score Range: {stats['min_score']:.4f} - {stats['max_score']:.4f}")
|
| 656 |
+
|
| 657 |
+
if detailed:
|
| 658 |
+
output.append(f"\n 🎯 Top Matches:")
|
| 659 |
+
for j, match in enumerate(stats['top_matches'][:3], 1):
|
| 660 |
+
code = match['payload'].get('code', 'N/A')
|
| 661 |
+
title = match['payload'].get('title', 'N/A')
|
| 662 |
+
score = match['score']
|
| 663 |
+
output.append(f" {j}. {code} - {title}")
|
| 664 |
+
output.append(f" 💯 Similarity: {score:.4f}")
|
| 665 |
+
|
| 666 |
+
output.append("-" * 90)
|
| 667 |
+
|
| 668 |
+
return "\n".join(output)
|
| 669 |
+
|
| 670 |
+
|
| 671 |
+
# Convenience functions for multi-collection setup
|
| 672 |
+
def analyze_diagnostic_chapters(diagnostic_string: str, detailed: bool = True, use_cloud: bool = True) -> str:
|
| 673 |
+
"""
|
| 674 |
+
Main function to analyze which chapters are most relevant for a diagnostic
|
| 675 |
+
"""
|
| 676 |
+
retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
|
| 677 |
+
return retriever.format_chapter_analysis(diagnostic_string, detailed)
|
| 678 |
+
|
| 679 |
+
def get_relevant_chapters(diagnostic_string: str, top_n: int = 5, use_cloud: bool = True) -> List[str]:
|
| 680 |
+
"""
|
| 681 |
+
Get list of most relevant chapter IDs for a diagnostic string
|
| 682 |
+
Returns: ['chapter_9_IX', 'chapter_10_X', ...]
|
| 683 |
+
"""
|
| 684 |
+
retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
|
| 685 |
+
top_chapters = retriever.get_top_chapters(diagnostic_string, top_n)
|
| 686 |
+
return [chapter_id for chapter_id, _, _ in top_chapters]
|
| 687 |
+
|
| 688 |
+
def smart_diagnostic_search(
|
| 689 |
+
diagnostic_string: str,
|
| 690 |
+
auto_select_chapters: bool = True,
|
| 691 |
+
target_chapters: List[str] = None,
|
| 692 |
+
results_per_sentence: int = 3, # Updated parameter name
|
| 693 |
+
use_cloud: bool = True
|
| 694 |
+
) -> Dict[str, Dict[str, List[Dict]]]: # Updated return type
|
| 695 |
+
"""
|
| 696 |
+
Intelligent diagnostic search that processes each sentence separately
|
| 697 |
+
Optimized for Qdrant Cloud
|
| 698 |
+
"""
|
| 699 |
+
retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
|
| 700 |
+
|
| 701 |
+
if auto_select_chapters:
|
| 702 |
+
return retriever.search_targeted_chapters(
|
| 703 |
+
diagnostic_string, target_chapters, results_per_sentence=results_per_sentence
|
| 704 |
+
)
|
| 705 |
+
else:
|
| 706 |
+
return retriever.search_targeted_chapters(
|
| 707 |
+
diagnostic_string, target_chapters, results_per_sentence=results_per_sentence
|
| 708 |
+
)
|
| 709 |
+
|
| 710 |
+
def format_smart_search_results(
|
| 711 |
+
diagnostic_string: str,
|
| 712 |
+
search_results: Dict[str, Dict[str, List[Dict]]], # Updated parameter type
|
| 713 |
+
use_cloud: bool = True
|
| 714 |
+
) -> str:
|
| 715 |
+
"""Format the results from sentence-based smart_diagnostic_search"""
|
| 716 |
+
|
| 717 |
+
if not search_results:
|
| 718 |
+
return "❌ No results found."
|
| 719 |
+
|
| 720 |
+
retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
|
| 721 |
+
|
| 722 |
+
output = []
|
| 723 |
+
output.append(f"\n{'='*90}")
|
| 724 |
+
output.append(f"🔍 SENTENCE-BASED DIAGNOSTIC SEARCH RESULTS")
|
| 725 |
+
output.append(f"🎯 Query: '{diagnostic_string}'")
|
| 726 |
+
output.append(f"{'='*90}")
|
| 727 |
+
|
| 728 |
+
# Count total results
|
| 729 |
+
total_results = 0
|
| 730 |
+
total_sentences = 0
|
| 731 |
+
for chapter_results in search_results.values():
|
| 732 |
+
total_sentences += len(chapter_results)
|
| 733 |
+
for sentence_data in chapter_results.values():
|
| 734 |
+
total_results += len(sentence_data['results'])
|
| 735 |
+
|
| 736 |
+
output.append(f"📊 Total results: {total_results} across {len(search_results)} chapters and {total_sentences} sentences")
|
| 737 |
+
|
| 738 |
+
for chapter_id, chapter_data in search_results.items():
|
| 739 |
+
description = retriever.chapter_info.get(chapter_id, "Unknown chapter")
|
| 740 |
+
|
| 741 |
+
output.append(f"\n📚 {chapter_id.upper()}")
|
| 742 |
+
output.append(f" 📖 {description}")
|
| 743 |
+
output.append(f" 📝 {len(chapter_data)} sentences processed")
|
| 744 |
+
output.append("-" * 60)
|
| 745 |
+
|
| 746 |
+
for sentence_key, sentence_data in chapter_data.items():
|
| 747 |
+
sentence_text = sentence_data['text']
|
| 748 |
+
results = sentence_data['results']
|
| 749 |
+
|
| 750 |
+
output.append(f"\n 🔍 {sentence_key.replace('_', ' ').title()}: \"{sentence_text}\"")
|
| 751 |
+
output.append(f" 🎯 Top {len(results)} matches:")
|
| 752 |
+
output.append("")
|
| 753 |
+
|
| 754 |
+
for i, result in enumerate(results, 1):
|
| 755 |
+
payload = result['payload']
|
| 756 |
+
code = payload.get('code', 'N/A')
|
| 757 |
+
title = payload.get('title', 'N/A')
|
| 758 |
+
score = result['score']
|
| 759 |
+
|
| 760 |
+
output.append(f" {i}. {code} - {title}")
|
| 761 |
+
output.append(f" 💯 Score: {score:.4f}")
|
| 762 |
+
|
| 763 |
+
# Show description if available
|
| 764 |
+
desc = payload.get('description', '')
|
| 765 |
+
if desc:
|
| 766 |
+
desc_preview = desc[:100] + "..." if len(desc) > 100 else desc
|
| 767 |
+
output.append(f" 📄 {desc_preview}")
|
| 768 |
+
|
| 769 |
+
output.append("")
|
| 770 |
+
|
| 771 |
+
output.append("=" * 90)
|
| 772 |
+
|
| 773 |
+
return "\n".join(output)
|
| 774 |
+
|
| 775 |
+
# Example usage
|
| 776 |
+
def example_multi_collection_analysis(use_cloud: bool = True):
|
| 777 |
+
"""Example of using the multi-collection chapter analysis"""
|
| 778 |
+
|
| 779 |
+
test_cases = [
|
| 780 |
+
"severe chest pain with shortness of breath",
|
| 781 |
+
"type 2 diabetes with kidney complications",
|
| 782 |
+
"depression and anxiety disorder",
|
| 783 |
+
"broken wrist from falling",
|
| 784 |
+
"acute appendicitis with fever",
|
| 785 |
+
"skin cancer melanoma",
|
| 786 |
+
"pregnancy complications in third trimester"
|
| 787 |
+
]
|
| 788 |
+
|
| 789 |
+
for diagnostic in test_cases:
|
| 790 |
+
print(f"\n{'='*100}")
|
| 791 |
+
print(f"🔍 ANALYZING: {diagnostic}")
|
| 792 |
+
print(f"{'='*100}")
|
| 793 |
+
|
| 794 |
+
try:
|
| 795 |
+
# Step 1: Analyze chapter relevance
|
| 796 |
+
analysis = analyze_diagnostic_chapters(diagnostic, detailed=False, use_cloud=use_cloud)
|
| 797 |
+
print(analysis)
|
| 798 |
+
|
| 799 |
+
# Step 2: Get top relevant chapters
|
| 800 |
+
top_chapters = get_relevant_chapters(diagnostic, top_n=3, use_cloud=use_cloud)
|
| 801 |
+
print(f"\n🏆 Top 3 relevant chapters: {top_chapters}")
|
| 802 |
+
|
| 803 |
+
# Step 3: Smart search in those chapters
|
| 804 |
+
search_results = smart_diagnostic_search(
|
| 805 |
+
diagnostic,
|
| 806 |
+
results_per_sentence=5,
|
| 807 |
+
use_cloud=use_cloud
|
| 808 |
+
)
|
| 809 |
+
formatted_results = format_smart_search_results(
|
| 810 |
+
diagnostic,
|
| 811 |
+
search_results,
|
| 812 |
+
use_cloud=use_cloud
|
| 813 |
+
)
|
| 814 |
+
print(formatted_results)
|
| 815 |
+
|
| 816 |
+
except Exception as e:
|
| 817 |
+
print(f"❌ Error processing '{diagnostic}': {e}")
|
| 818 |
+
continue
|
| 819 |
+
|
| 820 |
+
def test_cloud_connection():
|
| 821 |
+
"""Test Qdrant Cloud connection and basic functionality"""
|
| 822 |
+
print("🧪 Testing Qdrant Cloud Connection...")
|
| 823 |
+
|
| 824 |
+
try:
|
| 825 |
+
retriever = MultiCollectionChapterRetrieval(use_cloud=True)
|
| 826 |
+
|
| 827 |
+
# Test basic search
|
| 828 |
+
test_query = "heart disease"
|
| 829 |
+
print(f"\n🔬 Testing with query: '{test_query}'")
|
| 830 |
+
|
| 831 |
+
# Get collections
|
| 832 |
+
collections = retriever.get_chapter_collections()
|
| 833 |
+
print(f"📊 Available collections: {len(collections)}")
|
| 834 |
+
|
| 835 |
+
if collections:
|
| 836 |
+
# Test search
|
| 837 |
+
top_chapters = retriever.get_top_chapters(test_query, top_n=3)
|
| 838 |
+
print(f"🎯 Top chapters for '{test_query}': {[ch[0] for ch in top_chapters]}")
|
| 839 |
+
|
| 840 |
+
print("✅ Cloud connection test successful!")
|
| 841 |
+
return True
|
| 842 |
+
else:
|
| 843 |
+
print("⚠️ No collections found")
|
| 844 |
+
return False
|
| 845 |
+
|
| 846 |
+
except Exception as e:
|
| 847 |
+
print(f"❌ Cloud connection test failed: {e}")
|
| 848 |
+
return False
|
| 849 |
+
|
| 850 |
+
if __name__ == "__main__":
|
| 851 |
+
# Test cloud connection first
|
| 852 |
+
if test_cloud_connection():
|
| 853 |
+
print("\n" + "="*100)
|
| 854 |
+
print("🚀 Running example analysis with Qdrant Cloud...")
|
| 855 |
+
print("="*100)
|
| 856 |
+
|
| 857 |
+
# Run examples with cloud
|
| 858 |
+
example_multi_collection_analysis(use_cloud=True)
|
| 859 |
+
else:
|
| 860 |
+
print("❌ Skipping examples due to connection issues")
|
| 861 |
+
|
| 862 |
+
# Or use directly:
|
| 863 |
+
# chapters = get_relevant_chapters("heart attack symptoms", use_cloud=True)
|
| 864 |
+
# results = smart_diagnostic_search("heart attack symptoms", use_cloud=True)
|
| 865 |
+
# print(format_smart_search_results("heart attack symptoms", results, use_cloud=True))
|
requirements.txt
ADDED
|
File without changes
|
service_v2.py
ADDED
|
@@ -0,0 +1,462 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, HTTPException, Query
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.responses import JSONResponse
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from typing import List, Optional, Dict, Any
|
| 6 |
+
import time
|
| 7 |
+
import logging
|
| 8 |
+
import pprint
|
| 9 |
+
|
| 10 |
+
# Import your existing neural searcher and the new multi-collection system
|
| 11 |
+
# from neural_searcher import NeuralSearcher
|
| 12 |
+
from chapter_retrieval_system_v2 import MultiCollectionChapterRetrieval
|
| 13 |
+
|
| 14 |
+
# Configure logging
|
| 15 |
+
logging.basicConfig(level=logging.INFO)
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
app = FastAPI(
|
| 19 |
+
title="ICD-10 Multi-Collection Search API",
|
| 20 |
+
description="Advanced ICD-10 code search with intelligent chapter detection",
|
| 21 |
+
version="2.0.0"
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Add CORS middleware for web frontend integration
|
| 25 |
+
app.add_middleware(
|
| 26 |
+
CORSMiddleware,
|
| 27 |
+
allow_origins=["*"], # Configure this properly for production
|
| 28 |
+
allow_credentials=True,
|
| 29 |
+
allow_methods=["*"],
|
| 30 |
+
allow_headers=["*"],
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Initialize systems
|
| 34 |
+
try:
|
| 35 |
+
# Initialize the multi-collection chapter retrieval system
|
| 36 |
+
chapter_retriever = MultiCollectionChapterRetrieval()
|
| 37 |
+
|
| 38 |
+
# Keep your original neural searcher for backward compatibility
|
| 39 |
+
# You might not need this if switching fully to multi-collection approach
|
| 40 |
+
# neural_searcher = NeuralSearcher(collection_name="icd10_codes_chapter_3")
|
| 41 |
+
|
| 42 |
+
logger.info("Successfully initialized search systems")
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.error(f"Failed to initialize search systems: {e}")
|
| 45 |
+
chapter_retriever = None
|
| 46 |
+
# neural_searcher = None
|
| 47 |
+
|
| 48 |
+
# Pydantic models for request/response validation
|
| 49 |
+
class SearchRequest(BaseModel):
|
| 50 |
+
query: str
|
| 51 |
+
limit: Optional[int] = 10
|
| 52 |
+
score_threshold: Optional[float] = 0.3
|
| 53 |
+
search_mode: Optional[str] = "smart" # "smart", "all_chapters", "specific_chapters"
|
| 54 |
+
target_chapters: Optional[List[str]] = None
|
| 55 |
+
detailed_analysis: Optional[bool] = False
|
| 56 |
+
chapters_per_sentence: Optional[int] = 2 # NEW: How many chapters to search per sentence
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class ChapterInfo(BaseModel):
|
| 61 |
+
chapter_id: str
|
| 62 |
+
collection_name: str
|
| 63 |
+
relevance_score: float
|
| 64 |
+
description: str
|
| 65 |
+
match_count: int
|
| 66 |
+
avg_score: float
|
| 67 |
+
max_score: float
|
| 68 |
+
|
| 69 |
+
class SearchResult(BaseModel):
|
| 70 |
+
code: str
|
| 71 |
+
title: str
|
| 72 |
+
description: Optional[str] = None
|
| 73 |
+
score: float
|
| 74 |
+
chapter_id: Optional[str] = None
|
| 75 |
+
collection: str
|
| 76 |
+
source_sentence: Optional[str] = None # NEW: Track which sentence generated this result
|
| 77 |
+
sentence_key: Optional[str] = None # NEW: Track sentence identifier
|
| 78 |
+
|
| 79 |
+
class SentenceResults(BaseModel):
|
| 80 |
+
sentence_text: str
|
| 81 |
+
sentence_key: str
|
| 82 |
+
results: List[SearchResult]
|
| 83 |
+
total_results: int
|
| 84 |
+
|
| 85 |
+
class SearchResponse(BaseModel):
|
| 86 |
+
query: str
|
| 87 |
+
total_results: int
|
| 88 |
+
search_time: float
|
| 89 |
+
search_mode: str
|
| 90 |
+
relevant_chapters: List[ChapterInfo]
|
| 91 |
+
results: List[SearchResult] # Keep for backward compatibility
|
| 92 |
+
sentence_results: Optional[List[SentenceResults]] = None # NEW: Results grouped by sentence
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class ChapterAnalysisResponse(BaseModel):
|
| 96 |
+
query: str
|
| 97 |
+
analysis_time: float
|
| 98 |
+
chapters: List[ChapterInfo]
|
| 99 |
+
|
| 100 |
+
# Health check endpoint
|
| 101 |
+
@app.get("/health")
|
| 102 |
+
def health_check():
|
| 103 |
+
"""Health check endpoint"""
|
| 104 |
+
if chapter_retriever is None:
|
| 105 |
+
raise HTTPException(status_code=503, detail="Search system not initialized")
|
| 106 |
+
return {"status": "healthy", "timestamp": time.time()}
|
| 107 |
+
|
| 108 |
+
# Chapter analysis endpoint
|
| 109 |
+
@app.get("/api/analyze-chapters", response_model=ChapterAnalysisResponse)
|
| 110 |
+
def analyze_chapters(
|
| 111 |
+
q: str = Query(..., description="Diagnostic query string"),
|
| 112 |
+
detailed: bool = Query(False, description="Include detailed chapter statistics")
|
| 113 |
+
):
|
| 114 |
+
"""
|
| 115 |
+
Analyze which ICD-10 chapters are most relevant for a diagnostic query
|
| 116 |
+
"""
|
| 117 |
+
if not chapter_retriever:
|
| 118 |
+
raise HTTPException(status_code=503, detail="Chapter retrieval system not available")
|
| 119 |
+
|
| 120 |
+
if not q or not q.strip():
|
| 121 |
+
raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
|
| 122 |
+
|
| 123 |
+
try:
|
| 124 |
+
start_time = time.time()
|
| 125 |
+
|
| 126 |
+
# Perform chapter analysis
|
| 127 |
+
analysis = chapter_retriever.analyze_chapters_parallel(
|
| 128 |
+
q.strip(),
|
| 129 |
+
sample_size_per_chapter=15,
|
| 130 |
+
score_threshold=0.2
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
analysis_time = time.time() - start_time
|
| 134 |
+
|
| 135 |
+
# Convert to response format
|
| 136 |
+
chapters = []
|
| 137 |
+
for chapter_id, stats in analysis.items():
|
| 138 |
+
if stats['relevance_score'] > 0.05: # Filter very low relevance
|
| 139 |
+
chapter_info = ChapterInfo(
|
| 140 |
+
chapter_id=chapter_id,
|
| 141 |
+
collection_name=stats['collection_name'],
|
| 142 |
+
relevance_score=stats['relevance_score'],
|
| 143 |
+
description=chapter_retriever.chapter_info.get(chapter_id, "Unknown chapter"),
|
| 144 |
+
match_count=stats['match_count'],
|
| 145 |
+
avg_score=stats['avg_score'],
|
| 146 |
+
max_score=stats['max_score']
|
| 147 |
+
)
|
| 148 |
+
chapters.append(chapter_info)
|
| 149 |
+
|
| 150 |
+
return ChapterAnalysisResponse(
|
| 151 |
+
query=q,
|
| 152 |
+
analysis_time=analysis_time,
|
| 153 |
+
chapters=chapters
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
except Exception as e:
|
| 157 |
+
logger.error(f"Error in chapter analysis: {e}")
|
| 158 |
+
raise HTTPException(status_code=500, detail=f"Chapter analysis failed: {str(e)}")
|
| 159 |
+
|
| 160 |
+
# Smart search endpoint (main search functionality)
|
| 161 |
+
@app.post("/api/search", response_model=SearchResponse)
|
| 162 |
+
def search_smart(request: SearchRequest):
|
| 163 |
+
"""
|
| 164 |
+
Advanced search with intelligent chapter detection and targeted searching
|
| 165 |
+
"""
|
| 166 |
+
return _perform_search(request)
|
| 167 |
+
|
| 168 |
+
@app.get("/api/search", response_model=SearchResponse)
|
| 169 |
+
def search_smart_get(
|
| 170 |
+
q: str = Query(..., description="Diagnostic query string"),
|
| 171 |
+
limit: int = Query(10, ge=1, le=100, description="Maximum number of results"),
|
| 172 |
+
score_threshold: float = Query(0.3, ge=0.0, le=1.0, description="Minimum similarity score"),
|
| 173 |
+
search_mode: str = Query("smart", description="Search mode: smart, all_chapters, specific_chapters"),
|
| 174 |
+
target_chapters: Optional[str] = Query(None, description="Comma-separated list of target chapters (for specific_chapters mode)"),
|
| 175 |
+
detailed_analysis: bool = Query(False, description="Include detailed chapter analysis"),
|
| 176 |
+
chapters_per_sentence: int = Query(2, ge=1, le=5, description="Number of chapters to search per sentence") # NEW
|
| 177 |
+
):
|
| 178 |
+
"""
|
| 179 |
+
Advanced search with intelligent chapter detection (GET version)
|
| 180 |
+
"""
|
| 181 |
+
# Parse target_chapters if provided
|
| 182 |
+
parsed_chapters = None
|
| 183 |
+
if target_chapters:
|
| 184 |
+
parsed_chapters = [ch.strip() for ch in target_chapters.split(",") if ch.strip()]
|
| 185 |
+
|
| 186 |
+
request = SearchRequest(
|
| 187 |
+
query=q,
|
| 188 |
+
limit=limit,
|
| 189 |
+
score_threshold=score_threshold,
|
| 190 |
+
search_mode=search_mode,
|
| 191 |
+
target_chapters=parsed_chapters,
|
| 192 |
+
detailed_analysis=detailed_analysis,
|
| 193 |
+
chapters_per_sentence=chapters_per_sentence # NEW
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
return _perform_search(request)
|
| 197 |
+
|
| 198 |
+
def _perform_search(request: SearchRequest) -> SearchResponse:
|
| 199 |
+
"""Internal search logic - UPDATED to return top responses for each sentence"""
|
| 200 |
+
if not chapter_retriever:
|
| 201 |
+
raise HTTPException(status_code=503, detail="Search system not available")
|
| 202 |
+
|
| 203 |
+
if not request.query or not request.query.strip():
|
| 204 |
+
raise HTTPException(status_code=400, detail="Query is required")
|
| 205 |
+
|
| 206 |
+
try:
|
| 207 |
+
start_time = time.time()
|
| 208 |
+
query = request.query.strip()
|
| 209 |
+
|
| 210 |
+
# Initialize response data
|
| 211 |
+
relevant_chapters = []
|
| 212 |
+
results = []
|
| 213 |
+
sentence_results = [] # NEW: For sentence-based results
|
| 214 |
+
|
| 215 |
+
if request.search_mode == "smart":
|
| 216 |
+
# Smart search: auto-identify chapters then search them sentence by sentence
|
| 217 |
+
logger.info(f"Performing sentence-based smart search for: '{query}'")
|
| 218 |
+
|
| 219 |
+
# First, analyze chapters if detailed analysis is requested
|
| 220 |
+
if request.detailed_analysis:
|
| 221 |
+
analysis = chapter_retriever.analyze_chapters_parallel(query)
|
| 222 |
+
for chapter_id, stats in analysis.items():
|
| 223 |
+
if stats['relevance_score'] > 0.1:
|
| 224 |
+
chapter_info = ChapterInfo(
|
| 225 |
+
chapter_id=chapter_id,
|
| 226 |
+
collection_name=stats['collection_name'],
|
| 227 |
+
relevance_score=stats['relevance_score'],
|
| 228 |
+
description=chapter_retriever.chapter_info.get(chapter_id, "Unknown"),
|
| 229 |
+
match_count=stats['match_count'],
|
| 230 |
+
avg_score=stats['avg_score'],
|
| 231 |
+
max_score=stats['max_score']
|
| 232 |
+
)
|
| 233 |
+
relevant_chapters.append(chapter_info)
|
| 234 |
+
|
| 235 |
+
# Perform sentence-based targeted search
|
| 236 |
+
search_results = chapter_retriever.search_targeted_chapters(
|
| 237 |
+
query,
|
| 238 |
+
target_chapters=request.target_chapters,
|
| 239 |
+
results_per_sentence=request.limit, # Use full limit per sentence
|
| 240 |
+
chapters_per_sentence=request.chapters_per_sentence
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
# NEW: Process results by sentence instead of flattening
|
| 244 |
+
sentence_result_map = {} # Track results by sentence
|
| 245 |
+
all_results = [] # Keep flattened results for backward compatibility
|
| 246 |
+
|
| 247 |
+
# Group results by sentence
|
| 248 |
+
for chapter_id, chapter_data in search_results.items():
|
| 249 |
+
for sentence_key, sentence_data in chapter_data.items():
|
| 250 |
+
sentence_text = sentence_data['text']
|
| 251 |
+
|
| 252 |
+
# Initialize sentence entry if not exists
|
| 253 |
+
if sentence_key not in sentence_result_map:
|
| 254 |
+
sentence_result_map[sentence_key] = {
|
| 255 |
+
'text': sentence_text,
|
| 256 |
+
'results': []
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
# Add results for this sentence
|
| 260 |
+
for result in sentence_data['results']:
|
| 261 |
+
# Create enriched result with metadata
|
| 262 |
+
enriched_result = {
|
| 263 |
+
**result,
|
| 264 |
+
'chapter_id': chapter_id,
|
| 265 |
+
'source_sentence': sentence_text,
|
| 266 |
+
'sentence_key': sentence_key
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
# Add to sentence-specific results
|
| 270 |
+
sentence_result_map[sentence_key]['results'].append(enriched_result)
|
| 271 |
+
|
| 272 |
+
# Add to flattened results for backward compatibility
|
| 273 |
+
all_results.append(enriched_result)
|
| 274 |
+
|
| 275 |
+
# NEW: Create sentence-based result objects
|
| 276 |
+
for sentence_key, sentence_data in sentence_result_map.items():
|
| 277 |
+
# Sort sentence results by score
|
| 278 |
+
sentence_data['results'].sort(key=lambda x: x['score'], reverse=True)
|
| 279 |
+
|
| 280 |
+
# Apply score threshold and limit per sentence
|
| 281 |
+
filtered_sentence_results = [
|
| 282 |
+
r for r in sentence_data['results']
|
| 283 |
+
if r['score'] >= request.score_threshold
|
| 284 |
+
][:request.limit]
|
| 285 |
+
|
| 286 |
+
# Convert to SearchResult objects
|
| 287 |
+
sentence_search_results = []
|
| 288 |
+
for result in filtered_sentence_results:
|
| 289 |
+
payload = result['payload']
|
| 290 |
+
search_result = SearchResult(
|
| 291 |
+
code=payload.get('code', 'N/A'),
|
| 292 |
+
title=payload.get('title', 'N/A'),
|
| 293 |
+
description=payload.get('description'),
|
| 294 |
+
score=result['score'],
|
| 295 |
+
chapter_id=result.get('chapter_id'),
|
| 296 |
+
collection=result['collection'],
|
| 297 |
+
source_sentence=result.get('source_sentence'),
|
| 298 |
+
sentence_key=result.get('sentence_key')
|
| 299 |
+
)
|
| 300 |
+
sentence_search_results.append(search_result)
|
| 301 |
+
|
| 302 |
+
# Create SentenceResults object
|
| 303 |
+
if sentence_search_results: # Only include sentences with results
|
| 304 |
+
sentence_result_obj = SentenceResults(
|
| 305 |
+
sentence_text=sentence_data['text'],
|
| 306 |
+
sentence_key=sentence_key,
|
| 307 |
+
results=sentence_search_results,
|
| 308 |
+
total_results=len(sentence_search_results)
|
| 309 |
+
)
|
| 310 |
+
sentence_results.append(sentence_result_obj)
|
| 311 |
+
|
| 312 |
+
# Sort sentence results by average score (optional)
|
| 313 |
+
sentence_results.sort(
|
| 314 |
+
key=lambda x: sum(r.score for r in x.results) / len(x.results) if x.results else 0,
|
| 315 |
+
reverse=True
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
# Process flattened results for backward compatibility
|
| 319 |
+
all_results.sort(key=lambda x: x['score'], reverse=True)
|
| 320 |
+
all_results = all_results[:request.limit]
|
| 321 |
+
|
| 322 |
+
elif request.search_mode == "all_chapters":
|
| 323 |
+
# Handle other search modes (keeping original logic)
|
| 324 |
+
# You can implement similar sentence-based logic here if needed
|
| 325 |
+
logger.info("All chapters search mode - using original logic")
|
| 326 |
+
# ... implement if needed
|
| 327 |
+
|
| 328 |
+
elif request.search_mode == "specific_chapters":
|
| 329 |
+
# Handle specific chapters mode
|
| 330 |
+
logger.info("Specific chapters search mode - using original logic")
|
| 331 |
+
# ... implement if needed
|
| 332 |
+
|
| 333 |
+
else:
|
| 334 |
+
raise HTTPException(status_code=400, detail=f"Unknown search mode: {request.search_mode}")
|
| 335 |
+
|
| 336 |
+
# Convert flattened results to response format (for backward compatibility)
|
| 337 |
+
for result in all_results:
|
| 338 |
+
if result['score'] >= request.score_threshold:
|
| 339 |
+
payload = result['payload']
|
| 340 |
+
search_result = SearchResult(
|
| 341 |
+
code=payload.get('code', 'N/A'),
|
| 342 |
+
title=payload.get('title', 'N/A'),
|
| 343 |
+
description=payload.get('description'),
|
| 344 |
+
score=result['score'],
|
| 345 |
+
chapter_id=result.get('chapter_id'),
|
| 346 |
+
collection=result['collection'],
|
| 347 |
+
source_sentence=result.get('source_sentence'),
|
| 348 |
+
sentence_key=result.get('sentence_key')
|
| 349 |
+
)
|
| 350 |
+
results.append(search_result)
|
| 351 |
+
|
| 352 |
+
search_time = time.time() - start_time
|
| 353 |
+
|
| 354 |
+
logger.info(f"Sentence-based search completed: {len(results)} total results, {len(sentence_results)} sentences in {search_time:.3f}s")
|
| 355 |
+
|
| 356 |
+
# Debug output
|
| 357 |
+
logger.info(f"Sentence results breakdown:")
|
| 358 |
+
for sent_result in sentence_results:
|
| 359 |
+
logger.info(f" '{sent_result.sentence_text}': {sent_result.total_results} results")
|
| 360 |
+
|
| 361 |
+
return SearchResponse(
|
| 362 |
+
query=query,
|
| 363 |
+
total_results=len(results),
|
| 364 |
+
search_time=search_time,
|
| 365 |
+
search_mode=request.search_mode,
|
| 366 |
+
relevant_chapters=relevant_chapters,
|
| 367 |
+
results=results, # Flattened results for backward compatibility
|
| 368 |
+
sentence_results=sentence_results # NEW: Results organized by sentence
|
| 369 |
+
)
|
| 370 |
+
|
| 371 |
+
except Exception as e:
|
| 372 |
+
logger.error(f"Search error: {e}")
|
| 373 |
+
raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
|
| 377 |
+
|
| 378 |
+
# Backward compatibility endpoint (your original endpoint)
|
| 379 |
+
# @app.get("/api/search/legacy")
|
| 380 |
+
# def search_legacy(q: str):
|
| 381 |
+
# """
|
| 382 |
+
# Legacy search endpoint for backward compatibility
|
| 383 |
+
# Uses your original neural searcher
|
| 384 |
+
# """
|
| 385 |
+
# # if not neural_searcher:
|
| 386 |
+
# # raise HTTPException(status_code=503, detail="Legacy search system not available")
|
| 387 |
+
|
| 388 |
+
# if not q or not q.strip():
|
| 389 |
+
# raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
|
| 390 |
+
|
| 391 |
+
# try:
|
| 392 |
+
# result = neural_searcher.search(text=q.strip())
|
| 393 |
+
# return {"result": result}
|
| 394 |
+
# except Exception as e:
|
| 395 |
+
# logger.error(f"Legacy search error: {e}")
|
| 396 |
+
# raise HTTPException(status_code=500, detail=f"Legacy search failed: {str(e)}")
|
| 397 |
+
|
| 398 |
+
# Get available chapters
|
| 399 |
+
@app.get("/api/chapters")
|
| 400 |
+
def get_available_chapters():
|
| 401 |
+
"""
|
| 402 |
+
Get list of available ICD-10 chapters and their descriptions
|
| 403 |
+
"""
|
| 404 |
+
if not chapter_retriever:
|
| 405 |
+
raise HTTPException(status_code=503, detail="Chapter system not available")
|
| 406 |
+
|
| 407 |
+
try:
|
| 408 |
+
chapter_collections = chapter_retriever.get_chapter_collections()
|
| 409 |
+
|
| 410 |
+
chapters = []
|
| 411 |
+
for chapter_id, collection_name in chapter_collections.items():
|
| 412 |
+
description = chapter_retriever.chapter_info.get(chapter_id, "Unknown chapter")
|
| 413 |
+
chapters.append({
|
| 414 |
+
"chapter_id": chapter_id,
|
| 415 |
+
"collection_name": collection_name,
|
| 416 |
+
"description": description
|
| 417 |
+
})
|
| 418 |
+
|
| 419 |
+
return {
|
| 420 |
+
"total_chapters": len(chapters),
|
| 421 |
+
"chapters": chapters
|
| 422 |
+
}
|
| 423 |
+
except Exception as e:
|
| 424 |
+
logger.error(f"Error getting chapters: {e}")
|
| 425 |
+
raise HTTPException(status_code=500, detail=f"Failed to get chapters: {str(e)}")
|
| 426 |
+
|
| 427 |
+
# Get search suggestions/autocomplete (optional enhancement)
|
| 428 |
+
@app.get("/api/suggest")
|
| 429 |
+
def get_search_suggestions(
|
| 430 |
+
q: str = Query(..., min_length=2, description="Partial query for suggestions"),
|
| 431 |
+
limit: int = Query(5, ge=1, le=20, description="Maximum number of suggestions")
|
| 432 |
+
):
|
| 433 |
+
"""
|
| 434 |
+
Get search suggestions based on partial query
|
| 435 |
+
This is a simple implementation - you might want to enhance this
|
| 436 |
+
"""
|
| 437 |
+
# Simple keyword-based suggestions
|
| 438 |
+
# In a real implementation, you might use a more sophisticated approach
|
| 439 |
+
|
| 440 |
+
common_terms = [
|
| 441 |
+
"chest pain", "shortness of breath", "diabetes", "hypertension",
|
| 442 |
+
"pneumonia", "fracture", "depression", "anxiety", "fever",
|
| 443 |
+
"headache", "abdominal pain", "nausea", "vomiting", "infection",
|
| 444 |
+
"cancer", "tumor", "heart attack", "stroke", "asthma"
|
| 445 |
+
]
|
| 446 |
+
|
| 447 |
+
query_lower = q.lower().strip()
|
| 448 |
+
suggestions = [term for term in common_terms if query_lower in term.lower()]
|
| 449 |
+
|
| 450 |
+
return {"suggestions": suggestions[:limit]}
|
| 451 |
+
|
| 452 |
+
if __name__ == "__main__":
|
| 453 |
+
import uvicorn
|
| 454 |
+
|
| 455 |
+
# Run with more configuration options
|
| 456 |
+
uvicorn.run(
|
| 457 |
+
app,
|
| 458 |
+
host="0.0.0.0",
|
| 459 |
+
port=8000,
|
| 460 |
+
log_level="info",
|
| 461 |
+
access_log=True
|
| 462 |
+
)
|