Spaces:

Axcel1
/

icd_10_coding_assistant

Paused

App Files Files Community

Axcel1 commited on Sep 1, 2025

Commit

0ee5e7e

verified ·

1 Parent(s): 2dd2e39

Upload 4 files

Browse files

Files changed (4) hide show

app.py +808 -0
chapter_retrieval_system_v2.py +865 -0
requirements.txt +0 -0
service_v2.py +462 -0

app.py ADDED Viewed

	@@ -0,0 +1,808 @@

+import gradio as gr
+import requests
+import time
+import re
+import threading
+import uvicorn
+import logging
+import os
+import signal
+import sys
+from typing import Dict, List, Optional, Tuple
+from collections import defaultdict
+# Import your backend modules
+from service_v2 import app as fastapi_app
+from chapter_retrieval_system_v2 import MultiCollectionChapterRetrieval
+# Configure logging for Spaces
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+class ICD10SearchInterface:
+    def __init__(self, api_base_url: str = "http://127.0.0.1:8000"):
+        """Initialize the interface with API base URL"""
+        self.api_base_url = api_base_url.rstrip('/')
+        self.server_ready = False
+        self.max_retries = 30  # Increased for Spaces startup time
+        # ICD-10 code to chapter mapping
+        self.code_to_chapter = self._build_code_to_chapter_mapping()
+    def _build_code_to_chapter_mapping(self) -> Dict[str, Dict[str, str]]:
+        """Build mapping from ICD-10 code ranges to chapters"""
+        return {
+            # Chapter I: Certain infectious and parasitic diseases (A00-B99)
+            "chapter_1_I": {
+                "title": "Certain infectious and parasitic diseases",
+                "code_ranges": ["A", "B"],
+                "description": "Infectious diseases, parasitic diseases, and related conditions"
+            },
+            # Chapter II: Neoplasms (C00-D49)
+            "chapter_2_II": {
+                "title": "Neoplasms",
+                "code_ranges": ["C", "D"],
+                "description": "Malignant neoplasms, benign neoplasms, and neoplasms of uncertain behavior"
+            },
+            # Chapter III: Diseases of blood and blood-forming organs (D50-D89)
+            "chapter_3_III": {
+                "title": "Diseases of the blood and blood-forming organs",
+                "code_ranges": ["D5", "D6", "D7", "D8"],
+                "description": "Anemias, coagulation defects, and other blood disorders"
+            },
+            # Chapter IV: Endocrine, nutritional and metabolic diseases (E00-E89)
+            "chapter_4_IV": {
+                "title": "Endocrine, nutritional and metabolic diseases",
+                "code_ranges": ["E"],
+                "description": "Diabetes, thyroid disorders, nutritional deficiencies, and metabolic disorders"
+            },
+            # Chapter V: Mental and behavioural disorders (F01-F99)
+            "chapter_5_V": {
+                "title": "Mental and behavioural disorders",
+                "code_ranges": ["F"],
+                "description": "Mental disorders, substance abuse, and behavioral conditions"
+            },
+            # Chapter VI: Diseases of the nervous system (G00-G99)
+            "chapter_6_VI": {
+                "title": "Diseases of the nervous system",
+                "code_ranges": ["G"],
+                "description": "Neurological disorders, epilepsy, migraines, and nervous system diseases"
+            },
+            # Chapter VII: Diseases of the eye and adnexa (H00-H59)
+            "chapter_7_VII": {
+                "title": "Diseases of the eye and adnexa",
+                "code_ranges": ["H0", "H1", "H2", "H3", "H4", "H5"],
+                "description": "Eye diseases, visual disorders, and related conditions"
+            },
+            # Chapter VIII: Diseases of the ear and mastoid process (H60-H95)
+            "chapter_8_VIII": {
+                "title": "Diseases of the ear and mastoid process",
+                "code_ranges": ["H6", "H7", "H8", "H9"],
+                "description": "Hearing disorders, ear infections, and mastoid conditions"
+            },
+            # Chapter IX: Diseases of the circulatory system (I00-I99)
+            "chapter_9_IX": {
+                "title": "Diseases of the circulatory system",
+                "code_ranges": ["I"],
+                "description": "Heart disease, hypertension, stroke, and vascular disorders"
+            },
+            # Chapter X: Diseases of the respiratory system (J00-J99)
+            "chapter_10_X": {
+                "title": "Diseases of the respiratory system",
+                "code_ranges": ["J"],
+                "description": "Pneumonia, asthma, COPD, and other respiratory conditions"
+            },
+            # Chapter XI: Diseases of the digestive system (K00-K95)
+            "chapter_11_XI": {
+                "title": "Diseases of the digestive system",
+                "code_ranges": ["K"],
+                "description": "Gastrointestinal disorders, liver disease, and digestive conditions"
+            },
+            # Chapter XII: Diseases of the skin and subcutaneous tissue (L00-L99)
+            "chapter_12_XII": {
+                "title": "Diseases of the skin and subcutaneous tissue",
+                "code_ranges": ["L"],
+                "description": "Skin infections, dermatitis, and subcutaneous tissue disorders"
+            },
+            # Chapter XIII: Diseases of the musculoskeletal system (M00-M99)
+            "chapter_13_XIII": {
+                "title": "Diseases of the musculoskeletal system and connective tissue",
+                "code_ranges": ["M"],
+                "description": "Arthritis, bone disorders, muscle diseases, and connective tissue conditions"
+            },
+            # Chapter XIV: Diseases of the genitourinary system (N00-N99)
+            "chapter_14_XIV": {
+                "title": "Diseases of the genitourinary system",
+                "code_ranges": ["N"],
+                "description": "Kidney disease, urinary disorders, and reproductive system conditions"
+            },
+            # Chapter XV: Pregnancy, childbirth and the puerperium (O00-O9A)
+            "chapter_15_XV": {
+                "title": "Pregnancy, childbirth and the puerperium",
+                "code_ranges": ["O"],
+                "description": "Pregnancy complications, delivery issues, and postpartum conditions"
+            },
+            # Chapter XVI: Certain conditions originating in the perinatal period (P00-P96)
+            "chapter_16_XVI": {
+                "title": "Certain conditions originating in the perinatal period",
+                "code_ranges": ["P"],
+                "description": "Newborn conditions and perinatal complications"
+            },
+            # Chapter XVII: Congenital malformations (Q00-Q99)
+            "chapter_17_XVII": {
+                "title": "Congenital malformations, deformations and chromosomal abnormalities",
+                "code_ranges": ["Q"],
+                "description": "Birth defects and chromosomal disorders"
+            },
+            # Chapter XVIII: Symptoms, signs and abnormal findings (R00-R99)
+            "chapter_18_XVIII": {
+                "title": "Symptoms, signs and abnormal clinical and laboratory findings",
+                "code_ranges": ["R"],
+                "description": "Symptoms and signs not elsewhere classified"
+            },
+            # Chapter XIX: Injury, poisoning and external causes (S00-T88)
+            "chapter_19_XIX": {
+                "title": "Injury, poisoning and certain other consequences of external causes",
+                "code_ranges": ["S", "T"],
+                "description": "Injuries, poisoning, and external cause consequences"
+            },
+            # Chapter XX: External causes of morbidity (V01-Y99)
+            "chapter_20_XX": {
+                "title": "External causes of morbidity",
+                "code_ranges": ["V", "W", "X", "Y"],
+                "description": "External causes of injury and poisoning"
+            },
+            # Chapter XXI: Factors influencing health status (Z00-Z99)
+            "chapter_21_XXI": {
+                "title": "Factors influencing health status and contact with health services",
+                "code_ranges": ["Z"],
+                "description": "Health maintenance, screening, and healthcare encounters"
+            }
+        }
+    def wait_for_server(self, max_wait_time=60):
+        """Wait for FastAPI server to be ready with enhanced logging"""
+        logger.info(f"Waiting for FastAPI server at {self.api_base_url}")
+        start_time = time.time()
+        attempt = 0
+        while time.time() - start_time < max_wait_time:
+            attempt += 1
+            try:
+                response = requests.get(f"{self.api_base_url}/health", timeout=10)
+                if response.status_code == 200:
+                    self.server_ready = True
+                    logger.info(f"FastAPI server ready after {attempt} attempts ({time.time() - start_time:.1f}s)")
+                    return True
+                else:
+                    logger.warning(f"Server returned status {response.status_code}, attempt {attempt}")
+            except requests.exceptions.RequestException as e:
+                if attempt % 10 == 0:  # Log every 10 attempts
+                    logger.info(f"Waiting for server... attempt {attempt} ({time.time() - start_time:.1f}s)")
+                time.sleep(2)
+                continue
+        logger.error(f"FastAPI server failed to start within {max_wait_time} seconds")
+        return False
+    def get_server_status(self) -> Tuple[bool, str]:
+        """Get current server status for UI display"""
+        if not self.server_ready:
+            return False, "Server starting up..."
+        try:
+            response = requests.get(f"{self.api_base_url}/health", timeout=5)
+            if response.status_code == 200:
+                return True, "Server Ready"
+            else:
+                return False, f"Server Error (Status: {response.status_code})"
+        except requests.exceptions.RequestException as e:
+            return False, f"Connection Error: {str(e)}"
+    def test_connection(self) -> Tuple[bool, str]:
+        """Test if the API is accessible"""
+        return self.get_server_status()
+    # Keep all your existing methods (copy from original code)
+    def extract_category_code(self, icd_code: str) -> str:
+        """Extract the main category code from ICD-10 code (e.g., I21.0 -> I21)"""
+        if not icd_code:
+            return ""
+        code = icd_code.strip().upper()
+        match = re.match(r'^([A-Z]\d{2,3})', code)
+        if match:
+            return match.group(1)
+        return code
+    def group_codes_by_category(self, results: List[Dict]) -> Dict[str, List[Dict]]:
+        """Group ICD-10 codes by their main category"""
+        categories = defaultdict(list)
+        for result in results:
+            code = result.get('code', '')
+            category = self.extract_category_code(code)
+            if category:
+                categories[category].append(result)
+        return dict(categories)
+    def get_category_info(self, category_code: str, codes_in_category: List[Dict]) -> Dict:
+        """Get information about a category from its codes"""
+        category_result = None
+        max_score = 0
+        for code_info in codes_in_category:
+            if code_info['code'] == category_code:
+                category_result = code_info
+                break
+            if code_info['score'] > max_score:
+                max_score = code_info['score']
+                category_result = code_info
+        return category_result or codes_in_category[0]
+    def get_chapter_info_for_code(self, icd_code: str) -> Optional[Dict[str, str]]:
+        """Get chapter information for a given ICD-10 code"""
+        if not icd_code:
+            return None
+        code = icd_code.strip().upper()
+        # Check each chapter's code ranges
+        for chapter_id, chapter_data in self.code_to_chapter.items():
+            for code_prefix in chapter_data["code_ranges"]:
+                if code.startswith(code_prefix):
+                    return {
+                        "chapter_id": chapter_id,
+                        "title": chapter_data["title"],
+                        "description": chapter_data["description"]
+                    }
+        return None
+    def search_icd10(
+        self,
+        query: str,
+        limit: int = 10,
+        score_threshold: float = 0.3,
+        search_mode: str = "smart",
+        target_chapters: str = "",
+        detailed_analysis: bool = False,
+        chapters_per_sentence: int = 2
+    ) -> str:
+        """Search ICD-10 codes using the API with enhanced error handling for Spaces"""
+        if not query or not query.strip():
+            return "Please enter a diagnostic query."
+        if not self.server_ready:
+            return """
+            <div style='text-align: center; padding: 20px; background: #ffeaa7; border-radius: 8px; margin: 20px 0;'>
+                <h3>Server Starting Up</h3>
+                <p>The FastAPI server is still initializing. Please wait a moment and try again.</p>
+                <p><em>This usually takes 10-30 seconds on first load.</em></p>
+            </div>
+            """
+        is_connected, connection_msg = self.test_connection()
+        if not is_connected:
+            return f"""
+            <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
+                <h3>Connection Error</h3>
+                <p>{connection_msg}</p>
+                <p><em>Please refresh the page and try again.</em></p>
+            </div>
+            """
+        try:
+            params = {
+                "q": query.strip(),
+                "limit": limit * 2,
+                "score_threshold": score_threshold,
+                "search_mode": search_mode or "smart",
+                "detailed_analysis": detailed_analysis,
+                "chapters_per_sentence": chapters_per_sentence
+            }
+            if target_chapters and target_chapters.strip():
+                params["target_chapters"] = target_chapters.strip()
+            start_time = time.time()
+            response = requests.get(f"{self.api_base_url}/api/search", params=params, timeout=120)
+            request_time = time.time() - start_time
+            if response.status_code != 200:
+                error_data = response.json() if response.headers.get('content-type', '').startswith('application/json') else {"detail": response.text}
+                return f"""
+                <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
+                    <h3>API Error ({response.status_code})</h3>
+                    <p>{error_data.get('detail', 'Unknown error')}</p>
+                </div>
+                """
+            data = response.json()
+            return self._format_sentence_results_with_enhanced_categories(data)
+        except requests.exceptions.Timeout:
+            return """
+            <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
+                <h3>Request Timeout</h3>
+                <p>The search is taking too long. Try reducing the limit or increasing the score threshold.</p>
+            </div>
+            """
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Request error: {e}")
+            return f"""
+            <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
+                <h3>Request Error</h3>
+                <p>{str(e)}</p>
+            </div>
+            """
+        except Exception as e:
+            logger.error(f"Unexpected error: {e}")
+            return f"""
+            <div style='text-align: center; padding: 20px; background: #fab1a0; border-radius: 8px; margin: 20px 0;'>
+                <h3>Unexpected Error</h3>
+                <p>{str(e)}</p>
+            </div>
+            """
+    def _format_sentence_results_with_enhanced_categories(self, data: Dict) -> str:
+        """Format sentence-based results with enhanced category and chapter information"""
+        sentence_results = data.get('sentence_results', [])
+        if not sentence_results:
+            return "<div style='text-align: center; color: #666; padding: 20px;'>No sentence-based results available.</div>"
+        html = """
+        <div style='margin-bottom: 20px;'>
+            <h3 style='color: #2c3e50; margin-bottom: 15px;'>Results by Sentence with Enhanced Category Information</h3>
+            <p style='color: #666; margin-bottom: 20px;'>
+                Results are organized by sentence and grouped by ICD-10 categories with chapter context. High-scoring codes are highlighted.
+            </p>
+        </div>
+        """
+        for i, sent_result in enumerate(sentence_results, 1):
+            # Group results by category
+            categories = self.group_codes_by_category(sent_result['results'])
+            html += f"""
+            <div style='margin-bottom: 30px; border: 2px solid #3498db; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 6px rgba(0,0,0,0.1);'>
+                <div style='background: linear-gradient(135deg, #3498db, #2980b9); color: white; padding: 15px;'>
+                    <h4 style='margin: 0; font-size: 1.2em;'>
+                        Sentence {i}: "{sent_result['sentence_text']}"
+                    </h4>
+                    <div style='margin-top: 8px; font-size: 0.9em; opacity: 0.9;'>
+                        <span style='background-color: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 12px; margin-right: 10px;'>
+                            {sent_result['total_results']} total results
+                        </span>
+                        <span style='background-color: rgba(255,255,255,0.2); padding: 3px 8px; border-radius: 12px;'>
+                            Top 3 of {len(categories)} categories
+                        </span>
+                    </div>
+                </div>
+                <div style='padding: 20px;'>
+            """
+            # Sort categories by highest score and limit to top 3
+            sorted_categories = sorted(
+                categories.items(),
+                key=lambda x: max(code['score'] for code in x[1]),
+                reverse=True
+            )[:3]
+            for category_code, codes_in_category in sorted_categories:
+                # Get category information
+                category_info = self.get_category_info(category_code, codes_in_category)
+                highest_score = max(code['score'] for code in codes_in_category)
+                category_color = self._get_category_color(highest_score)
+                # Get chapter information for this category
+                sample_code = codes_in_category[0].get('code', category_code)
+                chapter_info = self.get_chapter_info_for_code(sample_code)
+                # Build enhanced category header
+                category_title = category_info.get('title', 'Unknown Category')
+                chapter_display = ""
+                chapter_tooltip = ""
+                if chapter_info:
+                    chapter_display = f" • Chapter {chapter_info['chapter_id'].split('_')[1]} ({chapter_info['chapter_id'].split('_')[2]})"
+                    chapter_tooltip = f"title='{chapter_info['description']}'"
+                html += f"""
+                <div style='margin-bottom: 20px; border: 1px solid {category_color}; border-radius: 8px; overflow: hidden;'>
+                    <div style='background-color: {category_color}; color: white; padding: 12px 15px;'>
+                        <div style='display: flex; justify-content: space-between; align-items: flex-start;'>
+                            <div style='flex-grow: 1;'>
+                                <h5 style='margin: 0; font-size: 1em; line-height: 1.3;'>
+                                    <span style='display: block;'>
+                                        Category {category_code}: {category_title}
+                                    </span>
+                                    {f'<span style="font-size: 0.85em; opacity: 0.9; display: block; margin-top: 4px;" {chapter_tooltip}>{chapter_display}</span>' if chapter_info else ''}
+                                </h5>
+                                {f'<div style="font-size: 0.8em; opacity: 0.8; margin-top: 6px; line-height: 1.2;">{chapter_info["description"]}</div>' if chapter_info else ''}
+                            </div>
+                            <div style='text-align: right; margin-left: 15px;'>
+                                <span style='font-size: 0.8em; background-color: rgba(255,255,255,0.2); padding: 2px 6px; border-radius: 10px; display: block;'>
+                                    Max: {highest_score:.3f}
+                                </span>
+                                <span style='font-size: 0.75em; opacity: 0.8; margin-top: 2px; display: block;'>
+                                    {len(codes_in_category)} codes
+                                </span>
+                            </div>
+                        </div>
+                    </div>
+                    <div style='padding: 12px;'>
+                """
+                # Sort codes within category by score
+                sorted_codes = sorted(codes_in_category, key=lambda x: x['score'], reverse=True)
+                # Filter out codes that are the same as the category code
+                filtered_codes = [code for code in sorted_codes if code.get('code', '') != category_code]
+                # If we filtered out all codes or have no codes, show a message
+                if not filtered_codes:
+                    html += f"""
+                    <div style='margin-bottom: 8px; padding: 12px; background-color: #f8f9fa; border-radius: 6px; border-left: 4px solid #95a5a6;'>
+                        <div style='color: #666; text-align: center; font-style: italic;'>
+                            Category {category_code} represents the main code group. Specific subcodes available in detailed search.
+                        </div>
+                    </div>
+                    """
+                else:
+                    for j, result in enumerate(filtered_codes, 1):
+                        score_color = self._get_score_color(result['score'])
+                        is_high_score = result['score'] >= 0.6
+                        # Add highlighting for high-scoring codes
+                        highlight_style = ""
+                        if is_high_score:
+                            highlight_style = "box-shadow: 0 0 0 2px #f39c12; background: linear-gradient(135deg, #fff9e6, #ffffff);"
+                        html += f"""
+                        <div style='margin-bottom: 8px; padding: 12px; background-color: #f8f9fa; border-radius: 6px; border-left: 4px solid {score_color}; {highlight_style}'>
+                            <div style='display: flex; justify-content: space-between; align-items: center;'>
+                                <div style='flex-grow: 1;'>
+                                    <strong style='color: #2c3e50; font-size: 1em;'>
+                                        {result['code']} - {result['title']}
+                                        {' ⭐' if is_high_score else ''}
+                                    </strong>
+                                </div>
+                                <span style='background-color: {score_color}; color: white; padding: 3px 8px; border-radius: 4px; font-size: 0.85em; font-weight: bold;'>
+                                    {result['score']:.3f}
+                                </span>
+                            </div>
+                            {f"<div style='font-size: 0.9em; color: #666; margin-top: 8px; line-height: 1.4;'>{result['description'][:250]}{'...' if len(result.get('description', '')) > 250 else ''}</div>" if result.get('description') else ""}
+                        </div>
+                        """
+                html += "</div></div>"
+            html += "</div></div>"
+        # Enhanced legend with chapter info
+        html += """
+        <div style='background-color: #f8f9fa; border-radius: 8px; padding: 15px; margin-top: 20px;'>
+            <h4 style='color: #2c3e50; margin-bottom: 15px;'>Enhanced Legend</h4>
+            <div style='margin-bottom: 15px;'>
+                <h5 style='color: #2c3e50; margin-bottom: 8px;'>Score Quality:</h5>
+                <div style='display: flex; flex-wrap: wrap; gap: 15px; align-items: center;'>
+                    <div style='display: flex; align-items: center;'>
+                        <div style='width: 20px; height: 20px; background-color: #27ae60; border-radius: 3px; margin-right: 8px;'></div>
+                        <span style='font-size: 0.9em;'>Excellent Match (≥0.8)</span>
+                    </div>
+                    <div style='display: flex; align-items: center;'>
+                        <div style='width: 20px; height: 20px; background-color: #f39c12; border-radius: 3px; margin-right: 8px;'></div>
+                        <span style='font-size: 0.9em;'>Good Match (≥0.6)</span>
+                    </div>
+                    <div style='display: flex; align-items: center;'>
+                        <div style='width: 20px; height: 20px; background-color: #e67e22; border-radius: 3px; margin-right: 8px;'></div>
+                        <span style='font-size: 0.9em;'>Fair Match (≥0.4)</span>
+                    </div>
+                    <div style='display: flex; align-items: center;'>
+                        <div style='width: 20px; height: 20px; background-color: #e74c3c; border-radius: 3px; margin-right: 8px;'></div>
+                        <span style='font-size: 0.9em;'>Low Match (<0.4)</span>
+                    </div>
+                </div>
+            </div>
+            <div>
+                <h5 style='color: #2c3e50; margin-bottom: 8px;'>Features:</h5>
+                <div style='display: flex; flex-wrap: wrap; gap: 20px; align-items: center; font-size: 0.9em;'>
+                    <span>High-scoring codes (≥0.6)</span>
+                    <span>Category grouping by ICD-10 structure</span>
+                    <span>Chapter context and descriptions</span>
+                    <span>Score-based category prioritization</span>
+                    <span>Duplicate category codes filtered</span>
+                </div>
+            </div>
+        </div>
+        """
+        return html
+    def _get_score_color(self, score: float) -> str:
+        """Get color based on similarity score"""
+        if score >= 0.8:
+            return "#27ae60"  # Green
+        elif score >= 0.6:
+            return "#f39c12"  # Orange
+        elif score >= 0.4:
+            return "#e67e22"  # Dark orange
+        else:
+            return "#e74c3c"  # Red
+    def _get_category_color(self, max_score: float) -> str:
+        """Get category header color based on highest score in category"""
+        if max_score >= 0.8:
+            return "#2ecc71"  # Bright green
+        elif max_score >= 0.6:
+            return "#3498db"  # Blue
+        elif max_score >= 0.4:
+            return "#9b59b6"  # Purple
+        else:
+            return "#95a5a6"  # Gray
+def start_fastapi_server():
+    """Start FastAPI server with enhanced error handling for Spaces"""
+    try:
+        logger.info("Starting FastAPI server...")
+        # Use environment variable for port if available
+        port = int(os.environ.get("FASTAPI_PORT", "8000"))
+        # Enhanced server configuration for Spaces
+        uvicorn.run(
+            fastapi_app,
+            host="127.0.0.1",
+            port=port,
+            log_level="info",
+            access_log=False,  # Reduce log noise
+            workers=1,  # Single worker for Spaces
+            timeout_keep_alive=30
+        )
+    except Exception as e:
+        logger.error(f"FastAPI server failed to start: {e}")
+        # Don't raise - let Gradio continue with error messages
+def create_gradio_interface():
+    """Create the Gradio interface with server status monitoring"""
+    search_interface = ICD10SearchInterface()
+    css = """
+    .gradio-container {
+        max-width: 1400px !important;
+        margin: auto !important;
+    }
+    .server-status {
+        transition: all 0.3s ease;
+    }
+    """
+    with gr.Blocks(css=css, title="ICD-10 Smart Search", theme=gr.themes.Soft()) as demo:
+        gr.HTML("""
+        <div style='text-align: center; margin-bottom: 30px; padding: 25px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 15px; box-shadow: 0 4px 8px rgba(0,0,0,0.1);'>
+            <h1 style='color: white; margin: 0; font-size: 2.5em;'>ICD-10 Smart Search</h1>
+            <p style='color: #f1f2f6; margin: 15px 0 0 0; font-size: 1.2em;'>Advanced diagnostic code search with AI-powered sentence analysis</p>
+        </div>
+        """)
+        # Server status indicator
+        def get_server_status():
+            is_ready, msg = search_interface.get_server_status()
+            if is_ready:
+                return "<div class='server-status' style='text-align: center; padding: 10px; background: #00b894; color: white; border-radius: 5px; margin-bottom: 20px;'>🟢 Server Ready</div>"
+            else:
+                return f"<div class='server-status' style='text-align: center; padding: 10px; background: #e17055; color: white; border-radius: 5px; margin-bottom: 20px;'>🔴 {msg}</div>"
+        server_status = gr.HTML(value=get_server_status())
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("<h3>Search Parameters</h3>")
+                query_input = gr.Textbox(
+                    label="Diagnostic Query",
+                    placeholder="Enter diagnostic description (e.g., 'chest pain with shortness of breath')",
+                    lines=3,
+                    value=""
+                )
+                with gr.Accordion("Advanced Options", open=False):
+                    with gr.Row():
+                        limit_input = gr.Slider(
+                            label="Maximum Results per Sentence",
+                            minimum=5,
+                            maximum=50,
+                            value=15,
+                            step=5,
+                            info="Higher values show more codes per category"
+                        )
+                        score_threshold_input = gr.Slider(
+                            label="Score Threshold",
+                            minimum=0.1,
+                            maximum=0.9,
+                            value=0.2,
+                            step=0.05,
+                            info="Lower values include more potential matches"
+                        )
+                    search_mode_input = gr.Dropdown(
+                        label="Search Mode",
+                        choices=["smart", "all_chapters", "specific_chapters"],
+                        value="smart"
+                    )
+                    target_chapters_input = gr.Textbox(
+                        label="Target Chapters (comma-separated)",
+                        placeholder="e.g., chapter_9_IX, chapter_10_X",
+                        visible=False
+                    )
+                    with gr.Row():
+                        detailed_analysis_input = gr.Checkbox(
+                            label="Include Detailed Analysis",
+                            value=True
+                        )
+                        chapters_per_sentence_input = gr.Slider(
+                            label="Chapters per Sentence",
+                            minimum=1,
+                            maximum=5,
+                            value=3,
+                            step=1
+                        )
+                search_button = gr.Button("Search ICD-10 Codes", variant="primary", size="lg")
+                def update_target_chapters_visibility(search_mode):
+                    return gr.update(visible=(search_mode == "specific_chapters"))
+                search_mode_input.change(
+                    update_target_chapters_visibility,
+                    inputs=search_mode_input,
+                    outputs=target_chapters_input
+                )
+            with gr.Column(scale=2):
+                gr.HTML("<h3>Enhanced Category-Grouped Results</h3>")
+                sentence_results_output = gr.HTML(
+                    value="<div style='text-align: center; color: #666; padding: 40px;'>Enter a diagnostic query and click search to see categorized results with chapter context.</div>"
+                )
+        # Example queries
+        gr.HTML("<h3>Example Queries</h3>")
+        example_queries = [
+            "acute myocardial infarction with chest pain",
+            "type 2 diabetes with diabetic nephropathy",
+            "major depressive disorder with anxiety",
+            "fracture of distal radius from fall",
+            "acute appendicitis with peritonitis",
+            "gestational diabetes in pregnancy",
+            "chronic kidney disease stage 3",
+            "essential hypertension with heart disease"
+        ]
+        with gr.Row():
+            for i in range(0, len(example_queries), 2):
+                with gr.Column():
+                    for j in range(2):
+                        if i + j < len(example_queries):
+                            example_btn = gr.Button(
+                                example_queries[i + j],
+                                variant="secondary",
+                                size="sm"
+                            )
+                            example_btn.click(
+                                lambda x=example_queries[i + j]: x,
+                                outputs=query_input
+                            )
+        # Search functionality
+        search_button.click(
+            fn=search_interface.search_icd10,
+            inputs=[
+                query_input,
+                limit_input,
+                score_threshold_input,
+                search_mode_input,
+                target_chapters_input,
+                detailed_analysis_input,
+                chapters_per_sentence_input
+            ],
+            outputs=sentence_results_output
+        )
+        # Enhanced footer
+        gr.HTML("""
+        <div style='text-align: center; margin-top: 30px; padding: 20px; background-color: #f8f9fa; border-radius: 12px; border: 1px solid #e9ecef;'>
+            <p style='margin: 0; color: #666; line-height: 1.6;'>
+                Powered by advanced semantic search and AI-driven sentence analysis<br>
+                <strong>Features:</strong> Chapter context • Category descriptions • Score-based prioritization<br>
+                <strong>Note:</strong> This tool is for research purposes only and should not replace professional medical diagnosis
+            </p>
+        </div>
+        """)
+        # Auto-refresh server status every 10 seconds
+        demo.load(get_server_status, outputs=server_status, every=10)
+    return demo
+# Global variable to track server thread
+server_thread = None
+def graceful_shutdown():
+    """Handle graceful shutdown"""
+    logger.info("Shutting down application...")
+    # Add any cleanup code here if needed
+# Signal handlers for graceful shutdown
+signal.signal(signal.SIGTERM, lambda signum, frame: graceful_shutdown())
+signal.signal(signal.SIGINT, lambda signum, frame: graceful_shutdown())
+# Main application entry point for Hugging Face Spaces
+if __name__ == "__main__":
+    logger.info("Starting ICD-10 Search Application for Hugging Face Spaces...")
+    try:
+        # Start FastAPI server in background thread
+        logger.info("Initializing FastAPI server thread...")
+        server_thread = threading.Thread(target=start_fastapi_server, daemon=True)
+        server_thread.start()
+        logger.info("FastAPI server thread started")
+        # Give server time to start (increased for Spaces)
+        logger.info("Waiting for FastAPI server initialization...")
+        time.sleep(8)  # Increased wait time for Spaces
+        # Create and launch Gradio interface
+        logger.info("Creating Gradio interface...")
+        demo = create_gradio_interface()
+        # Launch for Spaces environment
+        logger.info("Launching Gradio interface for Hugging Face Spaces...")
+        demo.launch(
+            share=False,  # Don't create public link
+            show_error=True,  # Show errors for debugging
+            show_tips=False,  # Don't show Gradio tips
+            quiet=False,  # Show startup info
+            server_name="0.0.0.0",  # Listen on all interfaces for Spaces
+            server_port=7860,  # Default Gradio port for Spaces
+            prevent_thread_lock=False,
+            root_path=os.environ.get("GRADIO_ROOT_PATH", "")  # Support for Spaces routing
+        )
+    except Exception as e:
+        logger.error(f"Application failed to start: {e}")
+        sys.exit(1)

chapter_retrieval_system_v2.py ADDED Viewed

	@@ -0,0 +1,865 @@

+from qdrant_client import QdrantClient
+from qdrant_client.models import VectorParams, Distance, PointStruct
+import numpy as np
+from typing import List, Dict, Optional, Tuple, Set
+from collections import Counter, defaultdict
+from sentence_transformers import SentenceTransformer
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
+import re
+import pprint
+import os
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+class MultiCollectionChapterRetrieval:
+    def __init__(self, use_cloud: bool = True):
+        """
+        Initialize with Qdrant Cloud or local connection
+        Args:
+            use_cloud: If True, connects to Qdrant Cloud using environment variables
+        """
+        if use_cloud:
+            self.client = self._create_cloud_client()
+        else:
+            self.client = QdrantClient("http://localhost:6333")
+        self.encoder = None
+        # ICD-10 Chapter mapping (all 22 chapters)
+        self.chapter_info = {
+            "chapter_1_I": "Certain infectious and parasitic diseases",
+            "chapter_2_II": "Neoplasms",
+            "chapter_3_III": "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism",
+            "chapter_4_IV": "Endocrine, nutritional and metabolic diseases",
+            "chapter_5_V": "Mental and behavioural disorders",
+            "chapter_6_VI": "Diseases of the nervous system",
+            "chapter_7_VII": "Diseases of the eye and adnexa",
+            "chapter_8_VIII": "Diseases of the ear and mastoid process",
+            "chapter_9_IX": "Diseases of the circulatory system",
+            "chapter_10_X": "Diseases of the respiratory system",
+            "chapter_11_XI": "Diseases of the digestive system",
+            "chapter_12_XII": "Diseases of the skin and subcutaneous tissue",
+            "chapter_13_XIII": "Diseases of the musculoskeletal system and connective tissue",
+            "chapter_14_XIV": "Diseases of the genitourinary system",
+            "chapter_15_XV": "Pregnancy, childbirth and the puerperium",
+            "chapter_16_XVI": "Certain conditions originating in the perinatal period",
+            "chapter_17_XVII": "Congenital malformations, deformations and chromosomal abnormalities",
+            "chapter_18_XVIII": "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified",
+            "chapter_19_XIX": "Injury, poisoning and certain other consequences of external causes",
+            "chapter_20_XX": "External causes of morbidity and mortality",
+            "chapter_21_XXI": "Factors influencing health status and contact with health services",
+            "chapter_22_XXII": "Codes for special purposes"
+        }
+        # Cache for collection names
+        self._chapter_collections = None
+    def _create_cloud_client(self) -> QdrantClient:
+        """Create Qdrant Cloud client with authentication"""
+        qdrant_url = os.getenv('QDRANT_URL')
+        qdrant_api_key = os.getenv('QDRANT_API_KEY')
+        if not qdrant_url or not qdrant_api_key:
+            raise ValueError(
+                "Qdrant Cloud credentials not found in environment variables.\n"
+                "Please set QDRANT_URL and QDRANT_API_KEY in your .env file:\n"
+                "QDRANT_URL=https://your-cluster-id.region.aws.cloud.qdrant.io:6333\n"
+                "QDRANT_API_KEY=your-api-key-here"
+            )
+        print(f"🔗 Connecting to Qdrant Cloud: {qdrant_url}")
+        try:
+            client = QdrantClient(
+                url=qdrant_url,
+                api_key=qdrant_api_key,
+                timeout=60,  # Increased timeout for cloud
+                # Optional: Add additional cloud-specific settings
+                prefer_grpc=True,  # Use gRPC for better performance
+            )
+            # Test connection
+            collections = client.get_collections()
+            print(f"✅ Connected successfully! Found {len(collections.collections)} collections")
+            return client
+        except Exception as e:
+            print(f"❌ Failed to connect to Qdrant Cloud: {e}")
+            print("Please check your QDRANT_URL and QDRANT_API_KEY in the .env file")
+            raise
+    def split_into_sentences(self, text: str) -> List[str]:
+        """Split text into sentences using simple rules"""
+        import re
+        # Simple sentence splitting - you can enhance this with nltk or spacy if needed
+        sentences = re.split(r'[.!?]+', text)
+        sentences = [s.strip() for s in sentences if s.strip()]
+        return sentences
+    def load_encoder(self, model_name: str = "all-MiniLM-L6-v2"):
+        """Load the sentence transformer model"""
+        if self.encoder is None:
+            print(f"📥 Loading encoder: {model_name}")
+            self.encoder = SentenceTransformer(model_name)
+            print(f"✅ Encoder loaded successfully")
+    def encode_query(self, query: str) -> List[float]:
+        """Encode diagnostic string to vector"""
+        if self.encoder is None:
+            self.load_encoder()
+        return self.encoder.encode([query])[0].tolist()
+    def get_chapter_collections(self) -> Dict[str, str]:
+        """
+        Get mapping of chapter_id -> collection_name
+        Discovers collections automatically based on naming patterns
+        """
+        if self._chapter_collections is not None:
+            return self._chapter_collections
+        try:
+            collections = self.client.get_collections()
+            chapter_collections = {}
+            print("🔍 Discovering chapter collections...")
+            for collection in collections.collections:
+                collection_name = collection.name
+                # Try to match collection names to chapters
+                chapter_match = None
+                # Pattern 1: icd10_chapter_X_Y or chapter_X_Y
+                pattern1 = re.search(r'chapter[_-]?(\d+)[_-]?([IVX]+)', collection_name, re.IGNORECASE)
+                if pattern1:
+                    chapter_num = pattern1.group(1)
+                    roman = pattern1.group(2)
+                    chapter_match = f"chapter_{chapter_num}_{roman}"
+                # Pattern 2: Single collection with all chapters (e.g., icd10_codes_all_chapters)
+                elif 'all' in collection_name.lower() and ('chapter' in collection_name.lower() or 'icd' in collection_name.lower()):
+                    print(f"  📚 Found unified collection: {collection_name}")
+                    # For unified collections, we'll handle this differently
+                    chapter_collections['unified_collection'] = collection_name
+                    continue
+                # Pattern 3: Just the chapter part (chapter1, chapterI, etc.)
+                elif 'chapter' in collection_name.lower():
+                    numbers = re.findall(r'\d+', collection_name)
+                    romans = re.findall(r'[IVX]+', collection_name)
+                    if numbers and romans:
+                        chapter_match = f"chapter_{numbers[0]}_{romans[0]}"
+                    elif numbers:
+                        # Try to convert number to roman numeral
+                        num = int(numbers[0])
+                        roman_map = {1: 'I', 2: 'II', 3: 'III', 4: 'IV', 5: 'V', 6: 'VI', 7: 'VII',
+                                   8: 'VIII', 9: 'IX', 10: 'X', 11: 'XI', 12: 'XII', 13: 'XIII',
+                                   14: 'XIV', 15: 'XV', 16: 'XVI', 17: 'XVII', 18: 'XVIII', 19: 'XIX',
+                                   20: 'XX', 21: 'XXI', 22: 'XXII'}
+                        if num in roman_map:
+                            chapter_match = f"chapter_{num}_{roman_map[num]}"
+                if chapter_match:
+                    chapter_collections[chapter_match] = collection_name
+                    print(f"  ✓ {chapter_match} -> {collection_name}")
+            print(f"📊 Found {len(chapter_collections)} chapter collections")
+            # If we only found a unified collection, we'll need to handle searches differently
+            if len(chapter_collections) == 1 and 'unified_collection' in chapter_collections:
+                print("⚠️  Only unified collection found. Searches will use chapter filtering.")
+            self._chapter_collections = chapter_collections
+            return chapter_collections
+        except Exception as e:
+            print(f"❌ Error discovering collections: {e}")
+            return {}
+    def search_single_collection(
+        self,
+        collection_name: str,
+        query_vector: List[float],
+        limit: int = 20,
+        score_threshold: float = 0.3,
+        chapter_filter: Optional[str] = None
+    ) -> List[Dict]:
+        """Search a single collection and return formatted results"""
+        try:
+            # Build search parameters
+            search_params = {
+                "collection_name": collection_name,
+                "query_vector": query_vector,
+                "limit": limit,
+                "score_threshold": score_threshold
+            }
+            results = self.client.search(**search_params)
+            formatted_results = []
+            for result in results:
+                formatted_results.append({
+                    'collection': collection_name,
+                    'score': result.score,
+                    'id': result.id,
+                    'payload': result.payload
+                })
+            return formatted_results
+        except Exception as e:
+            print(f"❌ Error searching {collection_name}: {e}")
+            if "timeout" in str(e).lower():
+                print("   This might be due to network issues. Retrying with lower limit...")
+                try:
+                    # Retry with reduced parameters
+                    search_params["limit"] = min(limit, 10)
+                    search_params["score_threshold"] = max(score_threshold, 0.5)
+                    results = self.client.search(**search_params)
+                    formatted_results = []
+                    for result in results:
+                        formatted_results.append({
+                            'collection': collection_name,
+                            'score': result.score,
+                            'id': result.id,
+                            'payload': result.payload
+                        })
+                    return formatted_results
+                except:
+                    pass
+            return []
+    def analyze_chapters_parallel(
+        self,
+        diagnostic_string: str,
+        sample_size_per_chapter: int = 15,
+        score_threshold: float = 0.3,
+        max_workers: int = 4  # Reduced for cloud stability
+    ) -> Dict[str, Dict]:
+        """
+        Analyze all chapter collections in parallel to determine relevance
+        Optimized for cloud performance
+        """
+        query_vector = self.encode_query(diagnostic_string)
+        chapter_collections = self.get_chapter_collections()
+        if not chapter_collections:
+            print("❌ No chapter collections found!")
+            return {}
+        print(f"\n🔍 Analyzing diagnostic: '{diagnostic_string}'")
+        # Handle unified collection differently
+        # if 'unified_collection' in chapter_collections:
+        #     return self._analyze_unified_collection(
+        #         diagnostic_string, query_vector,
+        #         chapter_collections['unified_collection'],
+        #         sample_size_per_chapter, score_threshold
+        #     )
+        print(f"🔄 Searching {len(chapter_collections)} collections in parallel...")
+        chapter_analysis = {}
+        def search_chapter(chapter_id: str, collection_name: str) -> Tuple[str, List[Dict]]:
+            """Search function for parallel execution with retry logic"""
+            max_retries = 2
+            for attempt in range(max_retries):
+                try:
+                    results = self.search_single_collection(
+                        collection_name, query_vector, sample_size_per_chapter, score_threshold
+                    )
+                    return chapter_id, results
+                except Exception as e:
+                    if attempt < max_retries - 1:
+                        print(f"  ⚠️ Retry {attempt + 1} for {chapter_id}: {e}")
+                        time.sleep(1)  # Brief delay before retry
+                    else:
+                        print(f"  ❌ Failed {chapter_id} after {max_retries} attempts: {e}")
+                        return chapter_id, []
+        # Execute searches in parallel
+        start_time = time.time()
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # Submit all search tasks
+            future_to_chapter = {
+                executor.submit(search_chapter, chapter_id, collection_name): chapter_id
+                for chapter_id, collection_name in chapter_collections.items()
+                if chapter_id != 'unified_collection'
+            }
+            # Collect results as they complete
+            for future in as_completed(future_to_chapter):
+                chapter_id = future_to_chapter[future]
+                try:
+                    chapter_id, results = future.result(timeout=30)  # 30 second timeout per search
+                    if results:
+                        scores = [r['score'] for r in results]
+                        # Calculate chapter statistics
+                        chapter_analysis[chapter_id] = {
+                            'collection_name': chapter_collections[chapter_id],
+                            'match_count': len(results),
+                            'max_score': max(scores),
+                            'avg_score': np.mean(scores),
+                            'median_score': np.median(scores),
+                            'min_score': min(scores),
+                            'score_std': np.std(scores),
+                            'top_matches': sorted(results, key=lambda x: x['score'], reverse=True)[:5],
+                            'all_results': results
+                        }
+                        # Calculate relevance score (weighted combination of metrics)
+                        relevance = (
+                            chapter_analysis[chapter_id]['avg_score'] * 0.4 +
+                            chapter_analysis[chapter_id]['max_score'] * 0.3 +
+                            min(len(results) / sample_size_per_chapter, 1.0) * 0.2 +
+                            (1.0 / (1.0 + chapter_analysis[chapter_id]['score_std'])) * 0.1
+                        )
+                        chapter_analysis[chapter_id]['relevance_score'] = relevance
+                        # print(f"  ✅ {chapter_id}: {len(results)} matches, relevance: {relevance:.4f}")
+                    # else:
+                        # print(f"  ➖ {chapter_id}: No matches above threshold")
+                except Exception as e:
+                    print(f"  ❌ {chapter_id}: Error - {e}")
+        elapsed = time.time() - start_time
+        print(f"⏱️ Parallel analysis completed in {elapsed:.2f} seconds")
+        # Sort by relevance score
+        sorted_analysis = dict(sorted(
+            chapter_analysis.items(),
+            key=lambda x: x[1]['relevance_score'],
+            reverse=True
+        ))
+        return sorted_analysis
+    def _analyze_unified_collection(
+        self,
+        diagnostic_string: str,
+        query_vector: List[float],
+        collection_name: str,
+        sample_size_per_chapter: int,
+        score_threshold: float
+    ) -> Dict[str, Dict]:
+        """Analyze unified collection by searching with chapter filters"""
+        print(f"🔄 Analyzing unified collection: {collection_name}")
+        chapter_analysis = {}
+        # Search each chapter in the unified collection
+        for chapter_id in self.chapter_info.keys():
+            try:
+                results = self.search_single_collection(
+                    collection_name, query_vector, sample_size_per_chapter,
+                    score_threshold, chapter_filter=chapter_id
+                )
+                if results:
+                    scores = [r['score'] for r in results]
+                    chapter_analysis[chapter_id] = {
+                        'collection_name': collection_name,
+                        'match_count': len(results),
+                        'max_score': max(scores),
+                        'avg_score': np.mean(scores),
+                        'median_score': np.median(scores),
+                        'min_score': min(scores),
+                        'score_std': np.std(scores),
+                        'top_matches': sorted(results, key=lambda x: x['score'], reverse=True)[:5],
+                        'all_results': results
+                    }
+                    # Calculate relevance score
+                    relevance = (
+                        chapter_analysis[chapter_id]['avg_score'] * 0.4 +
+                        chapter_analysis[chapter_id]['max_score'] * 0.3 +
+                        min(len(results) / sample_size_per_chapter, 1.0) * 0.2 +
+                        (1.0 / (1.0 + chapter_analysis[chapter_id]['score_std'])) * 0.1
+                    )
+                    chapter_analysis[chapter_id]['relevance_score'] = relevance
+                    print(f"  ✅ {chapter_id}: {len(results)} matches, relevance: {relevance:.4f}")
+                else:
+                    print(f"  ➖ {chapter_id}: No matches above threshold")
+                # Small delay to avoid overwhelming the cloud service
+                time.sleep(0.1)
+            except Exception as e:
+                print(f"  ❌ {chapter_id}: Error - {e}")
+        # Sort by relevance score
+        return dict(sorted(
+            chapter_analysis.items(),
+            key=lambda x: x[1]['relevance_score'],
+            reverse=True
+        ))
+    def get_top_chapters(
+        self,
+        diagnostic_string: str,
+        top_n: int = 5,
+        min_relevance: float = 0.1
+    ) -> List[Tuple[str, float, str]]:
+        """
+        Get top N most relevant chapters for a diagnostic string
+        Returns: [(chapter_id, relevance_score, description)]
+        """
+        analysis = self.analyze_chapters_parallel(diagnostic_string)
+        top_chapters = []
+        for chapter_id, stats in analysis.items():
+            relevance = stats['relevance_score']
+            if relevance >= min_relevance and len(top_chapters) < top_n:
+                description = self.chapter_info.get(chapter_id, "Unknown chapter")
+                top_chapters.append((chapter_id, relevance, description))
+        return top_chapters
+    def search_targeted_chapters(
+        self,
+        diagnostic_string: str,
+        target_chapters: List[str] = None,
+        results_per_chapter: int = 10,  # Keep for backward compatibility
+        results_per_sentence: int = 3,
+        chapters_per_sentence: int = 2  # New parameter: how many top chapters to search per sentence
+    ) -> Dict[str, Dict[str, List[Dict]]]:
+        """
+        Search only specific chapters or auto-identify top chapters for each sentence individually.
+        Now searches only the most relevant chapters for each specific sentence.
+        """
+        print(f"\n=== STARTING search_targeted_chapters ===")
+        print(f"Input parameters:")
+        print(f"  diagnostic_string: '{diagnostic_string[:100]}{'...' if len(diagnostic_string) > 100 else ''}'")
+        print(f"  target_chapters: {target_chapters}")
+        print(f"  results_per_sentence: {results_per_sentence}")
+        print(f"  chapters_per_sentence: {chapters_per_sentence}")
+        # Split input into sentences first
+        print(f"\n--- SENTENCE SPLITTING ---")
+        sentences = self.split_into_sentences(diagnostic_string)
+        print(f"Split into {len(sentences)} sentences:")
+        for i, sentence in enumerate(sentences):
+            print(f"  [{i+1}]: '{sentence}'")
+        print(f"\n--- GETTING CHAPTER COLLECTIONS ---")
+        chapter_collections = self.get_chapter_collections()
+        print(f"Available chapter collections: {len(chapter_collections)} total")
+        print(f"Chapter IDs: {list(chapter_collections.keys())}")
+        results = {}
+        if target_chapters is None:
+            print(f"\n=== AUTO-IDENTIFICATION MODE ===")
+            print("Auto-identifying most relevant chapters for each sentence individually...")
+            for i, sentence in enumerate(sentences):
+                if sentence.strip():  # Skip empty sentences
+                    sentence_key = f"sentence_{i+1}"
+                    print(f"\n--- Processing sentence {i+1} ---")
+                    print(f"Sentence: '{sentence}'")
+                    print(f"Sentence key: {sentence_key}")
+                    # Get top chapters specifically for THIS sentence
+                    print(f"Getting top {chapters_per_sentence} chapters for this sentence...")
+                    try:
+                        sentence_top_chapters = self.get_top_chapters(
+                            sentence,
+                            top_n=chapters_per_sentence,
+                            min_relevance=0.05
+                        )
+                        print(f"Found {len(sentence_top_chapters)} relevant chapters:")
+                        for j, (ch_id, rel, desc) in enumerate(sentence_top_chapters):
+                            print(f"  [{j+1}] {ch_id}: {rel:.4f} - {desc}")
+                    except Exception as e:
+                        print(f"ERROR in get_top_chapters: {e}")
+                        sentence_top_chapters = []
+                    # Search only the relevant chapters for this specific sentence
+                    print(f"Searching in {len(sentence_top_chapters)} selected chapters...")
+                    for chapter_id, relevance, description in sentence_top_chapters:
+                        print(f"\n  >> Searching chapter: {chapter_id} (relevance: {relevance:.4f})")
+                        if chapter_id in chapter_collections:
+                            collection_name = chapter_collections[chapter_id]
+                            print(f"     Collection name: {collection_name}")
+                            # Initialize chapter in results if not exists
+                            if chapter_id not in results:
+                                results[chapter_id] = {}
+                                print(f"     Initialized results dict for chapter {chapter_id}")
+                            # Search this sentence in this specific chapter
+                            try:
+                                print(f"     Encoding query for sentence...")
+                                query_vector = self.encode_query(sentence)
+                                print(f"     Query vector shape: {getattr(query_vector, 'shape', 'N/A')}")
+                                print(f"     Searching collection '{collection_name}' for top {results_per_sentence} results...")
+                                sentence_results = self.search_single_collection(
+                                    collection_name, query_vector, results_per_sentence
+                                )
+                                print(f"     Raw search returned {len(sentence_results) if sentence_results else 0} results")
+                            except Exception as e:
+                                print(f"     ERROR during search: {e}")
+                                sentence_results = []
+                            if sentence_results:
+                                results[chapter_id][sentence_key] = {
+                                    'text': sentence,
+                                    'chapter_relevance': relevance,
+                                    'results': sentence_results
+                                }
+                                print(f"     ✓ Stored {len(sentence_results)} results for {chapter_id}[{sentence_key}]")
+                                # Debug: show top result scores
+                                if sentence_results:
+                                    top_scores = [r.get('score', 'N/A') for r in sentence_results[:3]]
+                                    print(f"     Top 3 scores: {top_scores}")
+                            else:
+                                print(f"     ✗ No results above threshold for {chapter_id}")
+                        else:
+                            print(f"     ERROR: Chapter {chapter_id} collection not found in available collections")
+                else:
+                    print(f"\n--- Skipping empty sentence {i+1} ---")
+        else:
+            print(f"\n=== PRE-SPECIFIED CHAPTERS MODE ===")
+            print(f"Using pre-specified chapters: {target_chapters}")
+            # Validate chapters exist
+            valid_chapters = []
+            invalid_chapters = []
+            for chapter_id in target_chapters:
+                if chapter_id in chapter_collections:
+                    valid_chapters.append(chapter_id)
+                else:
+                    invalid_chapters.append(chapter_id)
+            print(f"Valid chapters: {valid_chapters}")
+            if invalid_chapters:
+                print(f"WARNING: Invalid chapters (will be skipped): {invalid_chapters}")
+            for chapter_id in valid_chapters:
+                collection_name = chapter_collections[chapter_id]
+                print(f"\n--- Searching chapter: {chapter_id} ---")
+                print(f"Collection name: {collection_name}")
+                chapter_results = {}
+                # Search each sentence in this chapter
+                for i, sentence in enumerate(sentences):
+                    if sentence.strip():  # Skip empty sentences
+                        sentence_key = f"sentence_{i+1}"
+                        print(f"\n  >> Processing sentence {i+1} in {chapter_id}")
+                        print(f"     Sentence: '{sentence}'")
+                        try:
+                            print(f"     Encoding query...")
+                            query_vector = self.encode_query(sentence)
+                            print(f"     Query vector shape: {getattr(query_vector, 'shape', 'N/A')}")
+                            print(f"     Searching for top {results_per_sentence} results...")
+                            sentence_results = self.search_single_collection(
+                                collection_name, query_vector, results_per_sentence
+                            )
+                            print(f"     Found {len(sentence_results) if sentence_results else 0} results")
+                        except Exception as e:
+                            print(f"     ERROR during search: {e}")
+                            sentence_results = []
+                        if sentence_results:
+                            chapter_results[sentence_key] = {
+                                'text': sentence,
+                                'chapter_relevance': None,  # Not calculated for pre-specified chapters
+                                'results': sentence_results
+                            }
+                            print(f"     ✓ Stored results for sentence {i+1}")
+                            # Debug: show top result scores
+                            top_scores = [r.get('score', 'N/A') for r in sentence_results[:3]]
+                            print(f"     Top 3 scores: {top_scores}")
+                        else:
+                            print(f"     ✗ No results found for sentence {i+1}")
+                    else:
+                        print(f"  >> Skipping empty sentence {i+1}")
+                if chapter_results:
+                    results[chapter_id] = chapter_results
+                    print(f"\n  ✓ Chapter {chapter_id}: Stored results for {len(chapter_results)} sentences")
+                else:
+                    print(f"\n  ✗ Chapter {chapter_id}: No results found")
+        # Final summary
+        print(f"\n=== SEARCH COMPLETE ===")
+        print(f"Results summary:")
+        total_results = 0
+        for chapter_id, chapter_data in results.items():
+            sentence_count = len(chapter_data)
+            result_count = sum(len(sent_data.get('results', [])) for sent_data in chapter_data.values())
+            total_results += result_count
+            print(f"  {chapter_id}: {sentence_count} sentences, {result_count} total results")
+        print(f"Grand total: {len(results)} chapters, {total_results} results")
+        print(f"=== END search_targeted_chapters ===\n")
+        return results
+    def format_chapter_analysis(self, diagnostic_string: str, detailed: bool = True) -> str:
+        """Format comprehensive chapter analysis"""
+        analysis = self.analyze_chapters_parallel(diagnostic_string)
+        if not analysis:
+            return "❌ No relevant chapters found."
+        output = []
+        output.append(f"\n{'='*90}")
+        output.append(f"📊 CHAPTER RELEVANCE ANALYSIS")
+        output.append(f"🔍 Diagnostic: '{diagnostic_string}'")
+        output.append(f"{'='*90}")
+        for i, (chapter_id, stats) in enumerate(analysis.items(), 1):
+            if stats['relevance_score'] < 0.05:  # Skip very low relevance
+                continue
+            description = self.chapter_info.get(chapter_id, "Unknown chapter")
+            output.append(f"\n{i}. 📚 {chapter_id.upper()}")
+            output.append(f"   🏷️  Collection: {stats['collection_name']}")
+            output.append(f"   📖 Description: {description}")
+            output.append(f"   ⭐ Relevance Score: {stats['relevance_score']:.4f}")
+            output.append(f"   📊 Statistics:")
+            output.append(f"      • Matches: {stats['match_count']}")
+            output.append(f"      • Max Score: {stats['max_score']:.4f}")
+            output.append(f"      • Avg Score: {stats['avg_score']:.4f}")
+            output.append(f"      • Score Range: {stats['min_score']:.4f} - {stats['max_score']:.4f}")
+            if detailed:
+                output.append(f"\n   🎯 Top Matches:")
+                for j, match in enumerate(stats['top_matches'][:3], 1):
+                    code = match['payload'].get('code', 'N/A')
+                    title = match['payload'].get('title', 'N/A')
+                    score = match['score']
+                    output.append(f"      {j}. {code} - {title}")
+                    output.append(f"         💯 Similarity: {score:.4f}")
+            output.append("-" * 90)
+        return "\n".join(output)
+# Convenience functions for multi-collection setup
+def analyze_diagnostic_chapters(diagnostic_string: str, detailed: bool = True, use_cloud: bool = True) -> str:
+    """
+    Main function to analyze which chapters are most relevant for a diagnostic
+    """
+    retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
+    return retriever.format_chapter_analysis(diagnostic_string, detailed)
+def get_relevant_chapters(diagnostic_string: str, top_n: int = 5, use_cloud: bool = True) -> List[str]:
+    """
+    Get list of most relevant chapter IDs for a diagnostic string
+    Returns: ['chapter_9_IX', 'chapter_10_X', ...]
+    """
+    retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
+    top_chapters = retriever.get_top_chapters(diagnostic_string, top_n)
+    return [chapter_id for chapter_id, _, _ in top_chapters]
+def smart_diagnostic_search(
+    diagnostic_string: str,
+    auto_select_chapters: bool = True,
+    target_chapters: List[str] = None,
+    results_per_sentence: int = 3,  # Updated parameter name
+    use_cloud: bool = True
+) -> Dict[str, Dict[str, List[Dict]]]:  # Updated return type
+    """
+    Intelligent diagnostic search that processes each sentence separately
+    Optimized for Qdrant Cloud
+    """
+    retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
+    if auto_select_chapters:
+        return retriever.search_targeted_chapters(
+            diagnostic_string, target_chapters, results_per_sentence=results_per_sentence
+        )
+    else:
+        return retriever.search_targeted_chapters(
+            diagnostic_string, target_chapters, results_per_sentence=results_per_sentence
+        )
+def format_smart_search_results(
+    diagnostic_string: str,
+    search_results: Dict[str, Dict[str, List[Dict]]],  # Updated parameter type
+    use_cloud: bool = True
+) -> str:
+    """Format the results from sentence-based smart_diagnostic_search"""
+    if not search_results:
+        return "❌ No results found."
+    retriever = MultiCollectionChapterRetrieval(use_cloud=use_cloud)
+    output = []
+    output.append(f"\n{'='*90}")
+    output.append(f"🔍 SENTENCE-BASED DIAGNOSTIC SEARCH RESULTS")
+    output.append(f"🎯 Query: '{diagnostic_string}'")
+    output.append(f"{'='*90}")
+    # Count total results
+    total_results = 0
+    total_sentences = 0
+    for chapter_results in search_results.values():
+        total_sentences += len(chapter_results)
+        for sentence_data in chapter_results.values():
+            total_results += len(sentence_data['results'])
+    output.append(f"📊 Total results: {total_results} across {len(search_results)} chapters and {total_sentences} sentences")
+    for chapter_id, chapter_data in search_results.items():
+        description = retriever.chapter_info.get(chapter_id, "Unknown chapter")
+        output.append(f"\n📚 {chapter_id.upper()}")
+        output.append(f"   📖 {description}")
+        output.append(f"   📝 {len(chapter_data)} sentences processed")
+        output.append("-" * 60)
+        for sentence_key, sentence_data in chapter_data.items():
+            sentence_text = sentence_data['text']
+            results = sentence_data['results']
+            output.append(f"\n   🔍 {sentence_key.replace('_', ' ').title()}: \"{sentence_text}\"")
+            output.append(f"   🎯 Top {len(results)} matches:")
+            output.append("")
+            for i, result in enumerate(results, 1):
+                payload = result['payload']
+                code = payload.get('code', 'N/A')
+                title = payload.get('title', 'N/A')
+                score = result['score']
+                output.append(f"      {i}. {code} - {title}")
+                output.append(f"         💯 Score: {score:.4f}")
+                # Show description if available
+                desc = payload.get('description', '')
+                if desc:
+                    desc_preview = desc[:100] + "..." if len(desc) > 100 else desc
+                    output.append(f"         📄 {desc_preview}")
+                output.append("")
+        output.append("=" * 90)
+    return "\n".join(output)
+# Example usage
+def example_multi_collection_analysis(use_cloud: bool = True):
+    """Example of using the multi-collection chapter analysis"""
+    test_cases = [
+        "severe chest pain with shortness of breath",
+        "type 2 diabetes with kidney complications",
+        "depression and anxiety disorder",
+        "broken wrist from falling",
+        "acute appendicitis with fever",
+        "skin cancer melanoma",
+        "pregnancy complications in third trimester"
+    ]
+    for diagnostic in test_cases:
+        print(f"\n{'='*100}")
+        print(f"🔍 ANALYZING: {diagnostic}")
+        print(f"{'='*100}")
+        try:
+            # Step 1: Analyze chapter relevance
+            analysis = analyze_diagnostic_chapters(diagnostic, detailed=False, use_cloud=use_cloud)
+            print(analysis)
+            # Step 2: Get top relevant chapters
+            top_chapters = get_relevant_chapters(diagnostic, top_n=3, use_cloud=use_cloud)
+            print(f"\n🏆 Top 3 relevant chapters: {top_chapters}")
+            # Step 3: Smart search in those chapters
+            search_results = smart_diagnostic_search(
+                diagnostic,
+                results_per_sentence=5,
+                use_cloud=use_cloud
+            )
+            formatted_results = format_smart_search_results(
+                diagnostic,
+                search_results,
+                use_cloud=use_cloud
+            )
+            print(formatted_results)
+        except Exception as e:
+            print(f"❌ Error processing '{diagnostic}': {e}")
+            continue
+def test_cloud_connection():
+    """Test Qdrant Cloud connection and basic functionality"""
+    print("🧪 Testing Qdrant Cloud Connection...")
+    try:
+        retriever = MultiCollectionChapterRetrieval(use_cloud=True)
+        # Test basic search
+        test_query = "heart disease"
+        print(f"\n🔬 Testing with query: '{test_query}'")
+        # Get collections
+        collections = retriever.get_chapter_collections()
+        print(f"📊 Available collections: {len(collections)}")
+        if collections:
+            # Test search
+            top_chapters = retriever.get_top_chapters(test_query, top_n=3)
+            print(f"🎯 Top chapters for '{test_query}': {[ch[0] for ch in top_chapters]}")
+            print("✅ Cloud connection test successful!")
+            return True
+        else:
+            print("⚠️  No collections found")
+            return False
+    except Exception as e:
+        print(f"❌ Cloud connection test failed: {e}")
+        return False
+if __name__ == "__main__":
+    # Test cloud connection first
+    if test_cloud_connection():
+        print("\n" + "="*100)
+        print("🚀 Running example analysis with Qdrant Cloud...")
+        print("="*100)
+        # Run examples with cloud
+        example_multi_collection_analysis(use_cloud=True)
+    else:
+        print("❌ Skipping examples due to connection issues")
+    # Or use directly:
+    # chapters = get_relevant_chapters("heart attack symptoms", use_cloud=True)
+    # results = smart_diagnostic_search("heart attack symptoms", use_cloud=True)
+    # print(format_smart_search_results("heart attack symptoms", results, use_cloud=True))

requirements.txt ADDED Viewed

File without changes

service_v2.py ADDED Viewed

	@@ -0,0 +1,462 @@

+from fastapi import FastAPI, HTTPException, Query
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from typing import List, Optional, Dict, Any
+import time
+import logging
+import pprint
+# Import your existing neural searcher and the new multi-collection system
+# from neural_searcher import NeuralSearcher
+from chapter_retrieval_system_v2 import MultiCollectionChapterRetrieval
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = FastAPI(
+    title="ICD-10 Multi-Collection Search API",
+    description="Advanced ICD-10 code search with intelligent chapter detection",
+    version="2.0.0"
+)
+# Add CORS middleware for web frontend integration
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Configure this properly for production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize systems
+try:
+    # Initialize the multi-collection chapter retrieval system
+    chapter_retriever = MultiCollectionChapterRetrieval()
+    # Keep your original neural searcher for backward compatibility
+    # You might not need this if switching fully to multi-collection approach
+    # neural_searcher = NeuralSearcher(collection_name="icd10_codes_chapter_3")
+    logger.info("Successfully initialized search systems")
+except Exception as e:
+    logger.error(f"Failed to initialize search systems: {e}")
+    chapter_retriever = None
+    # neural_searcher = None
+# Pydantic models for request/response validation
+class SearchRequest(BaseModel):
+    query: str
+    limit: Optional[int] = 10
+    score_threshold: Optional[float] = 0.3
+    search_mode: Optional[str] = "smart"  # "smart", "all_chapters", "specific_chapters"
+    target_chapters: Optional[List[str]] = None
+    detailed_analysis: Optional[bool] = False
+    chapters_per_sentence: Optional[int] = 2  # NEW: How many chapters to search per sentence
+class ChapterInfo(BaseModel):
+    chapter_id: str
+    collection_name: str
+    relevance_score: float
+    description: str
+    match_count: int
+    avg_score: float
+    max_score: float
+class SearchResult(BaseModel):
+    code: str
+    title: str
+    description: Optional[str] = None
+    score: float
+    chapter_id: Optional[str] = None
+    collection: str
+    source_sentence: Optional[str] = None  # NEW: Track which sentence generated this result
+    sentence_key: Optional[str] = None     # NEW: Track sentence identifier
+class SentenceResults(BaseModel):
+    sentence_text: str
+    sentence_key: str
+    results: List[SearchResult]
+    total_results: int
+class SearchResponse(BaseModel):
+    query: str
+    total_results: int
+    search_time: float
+    search_mode: str
+    relevant_chapters: List[ChapterInfo]
+    results: List[SearchResult]  # Keep for backward compatibility
+    sentence_results: Optional[List[SentenceResults]] = None  # NEW: Results grouped by sentence
+class ChapterAnalysisResponse(BaseModel):
+    query: str
+    analysis_time: float
+    chapters: List[ChapterInfo]
+# Health check endpoint
+@app.get("/health")
+def health_check():
+    """Health check endpoint"""
+    if chapter_retriever is None:
+        raise HTTPException(status_code=503, detail="Search system not initialized")
+    return {"status": "healthy", "timestamp": time.time()}
+# Chapter analysis endpoint
+@app.get("/api/analyze-chapters", response_model=ChapterAnalysisResponse)
+def analyze_chapters(
+    q: str = Query(..., description="Diagnostic query string"),
+    detailed: bool = Query(False, description="Include detailed chapter statistics")
+):
+    """
+    Analyze which ICD-10 chapters are most relevant for a diagnostic query
+    """
+    if not chapter_retriever:
+        raise HTTPException(status_code=503, detail="Chapter retrieval system not available")
+    if not q or not q.strip():
+        raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
+    try:
+        start_time = time.time()
+        # Perform chapter analysis
+        analysis = chapter_retriever.analyze_chapters_parallel(
+            q.strip(),
+            sample_size_per_chapter=15,
+            score_threshold=0.2
+        )
+        analysis_time = time.time() - start_time
+        # Convert to response format
+        chapters = []
+        for chapter_id, stats in analysis.items():
+            if stats['relevance_score'] > 0.05:  # Filter very low relevance
+                chapter_info = ChapterInfo(
+                    chapter_id=chapter_id,
+                    collection_name=stats['collection_name'],
+                    relevance_score=stats['relevance_score'],
+                    description=chapter_retriever.chapter_info.get(chapter_id, "Unknown chapter"),
+                    match_count=stats['match_count'],
+                    avg_score=stats['avg_score'],
+                    max_score=stats['max_score']
+                )
+                chapters.append(chapter_info)
+        return ChapterAnalysisResponse(
+            query=q,
+            analysis_time=analysis_time,
+            chapters=chapters
+        )
+    except Exception as e:
+        logger.error(f"Error in chapter analysis: {e}")
+        raise HTTPException(status_code=500, detail=f"Chapter analysis failed: {str(e)}")
+# Smart search endpoint (main search functionality)
+@app.post("/api/search", response_model=SearchResponse)
+def search_smart(request: SearchRequest):
+    """
+    Advanced search with intelligent chapter detection and targeted searching
+    """
+    return _perform_search(request)
+@app.get("/api/search", response_model=SearchResponse)
+def search_smart_get(
+    q: str = Query(..., description="Diagnostic query string"),
+    limit: int = Query(10, ge=1, le=100, description="Maximum number of results"),
+    score_threshold: float = Query(0.3, ge=0.0, le=1.0, description="Minimum similarity score"),
+    search_mode: str = Query("smart", description="Search mode: smart, all_chapters, specific_chapters"),
+    target_chapters: Optional[str] = Query(None, description="Comma-separated list of target chapters (for specific_chapters mode)"),
+    detailed_analysis: bool = Query(False, description="Include detailed chapter analysis"),
+    chapters_per_sentence: int = Query(2, ge=1, le=5, description="Number of chapters to search per sentence")  # NEW
+):
+    """
+    Advanced search with intelligent chapter detection (GET version)
+    """
+    # Parse target_chapters if provided
+    parsed_chapters = None
+    if target_chapters:
+        parsed_chapters = [ch.strip() for ch in target_chapters.split(",") if ch.strip()]
+    request = SearchRequest(
+        query=q,
+        limit=limit,
+        score_threshold=score_threshold,
+        search_mode=search_mode,
+        target_chapters=parsed_chapters,
+        detailed_analysis=detailed_analysis,
+        chapters_per_sentence=chapters_per_sentence  # NEW
+    )
+    return _perform_search(request)
+def _perform_search(request: SearchRequest) -> SearchResponse:
+    """Internal search logic - UPDATED to return top responses for each sentence"""
+    if not chapter_retriever:
+        raise HTTPException(status_code=503, detail="Search system not available")
+    if not request.query or not request.query.strip():
+        raise HTTPException(status_code=400, detail="Query is required")
+    try:
+        start_time = time.time()
+        query = request.query.strip()
+        # Initialize response data
+        relevant_chapters = []
+        results = []
+        sentence_results = []  # NEW: For sentence-based results
+        if request.search_mode == "smart":
+            # Smart search: auto-identify chapters then search them sentence by sentence
+            logger.info(f"Performing sentence-based smart search for: '{query}'")
+            # First, analyze chapters if detailed analysis is requested
+            if request.detailed_analysis:
+                analysis = chapter_retriever.analyze_chapters_parallel(query)
+                for chapter_id, stats in analysis.items():
+                    if stats['relevance_score'] > 0.1:
+                        chapter_info = ChapterInfo(
+                            chapter_id=chapter_id,
+                            collection_name=stats['collection_name'],
+                            relevance_score=stats['relevance_score'],
+                            description=chapter_retriever.chapter_info.get(chapter_id, "Unknown"),
+                            match_count=stats['match_count'],
+                            avg_score=stats['avg_score'],
+                            max_score=stats['max_score']
+                        )
+                        relevant_chapters.append(chapter_info)
+            # Perform sentence-based targeted search
+            search_results = chapter_retriever.search_targeted_chapters(
+                query,
+                target_chapters=request.target_chapters,
+                results_per_sentence=request.limit,  # Use full limit per sentence
+                chapters_per_sentence=request.chapters_per_sentence
+            )
+            # NEW: Process results by sentence instead of flattening
+            sentence_result_map = {}  # Track results by sentence
+            all_results = []  # Keep flattened results for backward compatibility
+            # Group results by sentence
+            for chapter_id, chapter_data in search_results.items():
+                for sentence_key, sentence_data in chapter_data.items():
+                    sentence_text = sentence_data['text']
+                    # Initialize sentence entry if not exists
+                    if sentence_key not in sentence_result_map:
+                        sentence_result_map[sentence_key] = {
+                            'text': sentence_text,
+                            'results': []
+                        }
+                    # Add results for this sentence
+                    for result in sentence_data['results']:
+                        # Create enriched result with metadata
+                        enriched_result = {
+                            **result,
+                            'chapter_id': chapter_id,
+                            'source_sentence': sentence_text,
+                            'sentence_key': sentence_key
+                        }
+                        # Add to sentence-specific results
+                        sentence_result_map[sentence_key]['results'].append(enriched_result)
+                        # Add to flattened results for backward compatibility
+                        all_results.append(enriched_result)
+            # NEW: Create sentence-based result objects
+            for sentence_key, sentence_data in sentence_result_map.items():
+                # Sort sentence results by score
+                sentence_data['results'].sort(key=lambda x: x['score'], reverse=True)
+                # Apply score threshold and limit per sentence
+                filtered_sentence_results = [
+                    r for r in sentence_data['results']
+                    if r['score'] >= request.score_threshold
+                ][:request.limit]
+                # Convert to SearchResult objects
+                sentence_search_results = []
+                for result in filtered_sentence_results:
+                    payload = result['payload']
+                    search_result = SearchResult(
+                        code=payload.get('code', 'N/A'),
+                        title=payload.get('title', 'N/A'),
+                        description=payload.get('description'),
+                        score=result['score'],
+                        chapter_id=result.get('chapter_id'),
+                        collection=result['collection'],
+                        source_sentence=result.get('source_sentence'),
+                        sentence_key=result.get('sentence_key')
+                    )
+                    sentence_search_results.append(search_result)
+                # Create SentenceResults object
+                if sentence_search_results:  # Only include sentences with results
+                    sentence_result_obj = SentenceResults(
+                        sentence_text=sentence_data['text'],
+                        sentence_key=sentence_key,
+                        results=sentence_search_results,
+                        total_results=len(sentence_search_results)
+                    )
+                    sentence_results.append(sentence_result_obj)
+            # Sort sentence results by average score (optional)
+            sentence_results.sort(
+                key=lambda x: sum(r.score for r in x.results) / len(x.results) if x.results else 0,
+                reverse=True
+            )
+            # Process flattened results for backward compatibility
+            all_results.sort(key=lambda x: x['score'], reverse=True)
+            all_results = all_results[:request.limit]
+        elif request.search_mode == "all_chapters":
+            # Handle other search modes (keeping original logic)
+            # You can implement similar sentence-based logic here if needed
+            logger.info("All chapters search mode - using original logic")
+            # ... implement if needed
+        elif request.search_mode == "specific_chapters":
+            # Handle specific chapters mode
+            logger.info("Specific chapters search mode - using original logic")
+            # ... implement if needed
+        else:
+            raise HTTPException(status_code=400, detail=f"Unknown search mode: {request.search_mode}")
+        # Convert flattened results to response format (for backward compatibility)
+        for result in all_results:
+            if result['score'] >= request.score_threshold:
+                payload = result['payload']
+                search_result = SearchResult(
+                    code=payload.get('code', 'N/A'),
+                    title=payload.get('title', 'N/A'),
+                    description=payload.get('description'),
+                    score=result['score'],
+                    chapter_id=result.get('chapter_id'),
+                    collection=result['collection'],
+                    source_sentence=result.get('source_sentence'),
+                    sentence_key=result.get('sentence_key')
+                )
+                results.append(search_result)
+        search_time = time.time() - start_time
+        logger.info(f"Sentence-based search completed: {len(results)} total results, {len(sentence_results)} sentences in {search_time:.3f}s")
+        # Debug output
+        logger.info(f"Sentence results breakdown:")
+        for sent_result in sentence_results:
+            logger.info(f"  '{sent_result.sentence_text}': {sent_result.total_results} results")
+        return SearchResponse(
+            query=query,
+            total_results=len(results),
+            search_time=search_time,
+            search_mode=request.search_mode,
+            relevant_chapters=relevant_chapters,
+            results=results,  # Flattened results for backward compatibility
+            sentence_results=sentence_results  # NEW: Results organized by sentence
+        )
+    except Exception as e:
+        logger.error(f"Search error: {e}")
+        raise HTTPException(status_code=500, detail=f"Search failed: {str(e)}")
+# Backward compatibility endpoint (your original endpoint)
+# @app.get("/api/search/legacy")
+# def search_legacy(q: str):
+#     """
+#     Legacy search endpoint for backward compatibility
+#     Uses your original neural searcher
+#     """
+#     # if not neural_searcher:
+#     #     raise HTTPException(status_code=503, detail="Legacy search system not available")
+#     if not q or not q.strip():
+#         raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
+#     try:
+#         result = neural_searcher.search(text=q.strip())
+#         return {"result": result}
+#     except Exception as e:
+#         logger.error(f"Legacy search error: {e}")
+#         raise HTTPException(status_code=500, detail=f"Legacy search failed: {str(e)}")
+# Get available chapters
+@app.get("/api/chapters")
+def get_available_chapters():
+    """
+    Get list of available ICD-10 chapters and their descriptions
+    """
+    if not chapter_retriever:
+        raise HTTPException(status_code=503, detail="Chapter system not available")
+    try:
+        chapter_collections = chapter_retriever.get_chapter_collections()
+        chapters = []
+        for chapter_id, collection_name in chapter_collections.items():
+            description = chapter_retriever.chapter_info.get(chapter_id, "Unknown chapter")
+            chapters.append({
+                "chapter_id": chapter_id,
+                "collection_name": collection_name,
+                "description": description
+            })
+        return {
+            "total_chapters": len(chapters),
+            "chapters": chapters
+        }
+    except Exception as e:
+        logger.error(f"Error getting chapters: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get chapters: {str(e)}")
+# Get search suggestions/autocomplete (optional enhancement)
+@app.get("/api/suggest")
+def get_search_suggestions(
+    q: str = Query(..., min_length=2, description="Partial query for suggestions"),
+    limit: int = Query(5, ge=1, le=20, description="Maximum number of suggestions")
+):
+    """
+    Get search suggestions based on partial query
+    This is a simple implementation - you might want to enhance this
+    """
+    # Simple keyword-based suggestions
+    # In a real implementation, you might use a more sophisticated approach
+    common_terms = [
+        "chest pain", "shortness of breath", "diabetes", "hypertension",
+        "pneumonia", "fracture", "depression", "anxiety", "fever",
+        "headache", "abdominal pain", "nausea", "vomiting", "infection",
+        "cancer", "tumor", "heart attack", "stroke", "asthma"
+    ]
+    query_lower = q.lower().strip()
+    suggestions = [term for term in common_terms if query_lower in term.lower()]
+    return {"suggestions": suggestions[:limit]}
+if __name__ == "__main__":
+    import uvicorn
+    # Run with more configuration options
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        log_level="info",
+        access_log=True
+    )