Spaces:

moazx
/

Lung-Cancer-AI-Advisor

Running


1	+ Defaulting to user installation because normal site-packages is not writeable
2	+ Requirement already satisfied: python-multipart in c:\users\moaze\appdata\roaming\python\python313\site-packages (0.0.9)

api/__pycache__/app.cpython-313.pyc CHANGED Viewed

Binary files a/api/__pycache__/app.cpython-313.pyc and b/api/__pycache__/app.cpython-313.pyc differ

api/__pycache__/middleware.cpython-313.pyc CHANGED Viewed

Binary files a/api/__pycache__/middleware.cpython-313.pyc and b/api/__pycache__/middleware.cpython-313.pyc differ

api/app.py CHANGED Viewed

@@ -6,11 +6,12 @@ from fastapi.exceptions import RequestValidationError
 from starlette.exceptions import HTTPException as StarletteHTTPException
 # Import routers
-from api.routers import medical, health, export
 from api.middleware import (
     ProcessTimeMiddleware,
     LoggingMiddleware,
     RateLimitMiddleware,
     get_cors_middleware_config
 )
 from fastapi.middleware.cors import CORSMiddleware
@@ -63,6 +64,7 @@ app.add_middleware(CORSMiddleware, **get_cors_middleware_config())
 app.add_middleware(ProcessTimeMiddleware)
 app.add_middleware(LoggingMiddleware)
 app.add_middleware(RateLimitMiddleware, calls_per_minute=100)  # Adjust as needed
 # Add exception handlers
 app.add_exception_handler(HTTPException, http_exception_handler)
@@ -71,6 +73,7 @@ app.add_exception_handler(StarletteHTTPException, starlette_exception_handler)
 app.add_exception_handler(Exception, general_exception_handler)
 # Include routers
 app.include_router(health.router)
 app.include_router(medical.router)
 app.include_router(export.router)

 from starlette.exceptions import HTTPException as StarletteHTTPException
 # Import routers
+from api.routers import medical, health, export, auth
 from api.middleware import (
     ProcessTimeMiddleware,
     LoggingMiddleware,
     RateLimitMiddleware,
+    AuthenticationMiddleware,
     get_cors_middleware_config
 )
 from fastapi.middleware.cors import CORSMiddleware
 app.add_middleware(ProcessTimeMiddleware)
 app.add_middleware(LoggingMiddleware)
 app.add_middleware(RateLimitMiddleware, calls_per_minute=100)  # Adjust as needed
+app.add_middleware(AuthenticationMiddleware)  # Protect API endpoints
 # Add exception handlers
 app.add_exception_handler(HTTPException, http_exception_handler)
 app.add_exception_handler(Exception, general_exception_handler)
 # Include routers
+app.include_router(auth.router)
 app.include_router(health.router)
 app.include_router(medical.router)
 app.include_router(export.router)

api/middleware.py CHANGED Viewed

@@ -3,8 +3,8 @@ Middleware for Medical RAG AI Advisor API
 """
 import time
 import logging
-from typing import Callable, Awaitable
-from fastapi import Request, Response, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.middleware.base import BaseHTTPMiddleware
@@ -90,6 +90,53 @@ class RateLimitMiddleware(BaseHTTPMiddleware):
         return await call_next(request)
 def get_cors_middleware_config():
     """Get CORS middleware configuration"""
     return {

 """
 import time
 import logging
+from typing import Callable, Awaitable, Optional
+from fastapi import Request, Response, HTTPException, Cookie
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.middleware.base import BaseHTTPMiddleware
         return await call_next(request)
+class AuthenticationMiddleware(BaseHTTPMiddleware):
+    """Middleware to protect endpoints with session authentication"""
+    # Paths that don't require authentication
+    PUBLIC_PATHS = [
+        "/",
+        "/docs",
+        "/redoc",
+        "/openapi.json",
+        "/health",
+        "/auth/login",
+        "/auth/status",
+    ]
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        # Check if path is public
+        path = request.url.path
+        # Allow public paths
+        if any(path.startswith(public_path) for public_path in self.PUBLIC_PATHS):
+            return await call_next(request)
+        # Check for session token
+        session_token = request.cookies.get("session_token")
+        if not session_token:
+            raise HTTPException(
+                status_code=401,
+                detail="Authentication required"
+            )
+        # Verify session
+        from api.routers.auth import verify_session
+        session_data = verify_session(session_token)
+        if not session_data:
+            raise HTTPException(
+                status_code=401,
+                detail="Invalid or expired session"
+            )
+        # Add user info to request state
+        request.state.user = session_data.get("username")
+        return await call_next(request)
 def get_cors_middleware_config():
     """Get CORS middleware configuration"""
     return {

api/routers/__pycache__/medical.cpython-313.pyc CHANGED Viewed

Binary files a/api/routers/__pycache__/medical.cpython-313.pyc and b/api/routers/__pycache__/medical.cpython-313.pyc differ

api/routers/auth.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""
+Authentication router for simple login system
+"""
+import os
+import secrets
+from datetime import datetime, timedelta
+from typing import Dict, Optional
+from fastapi import APIRouter, HTTPException, Response, Cookie, Form
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from itsdangerous import URLSafeTimedSerializer, BadSignature, SignatureExpired
+import logging
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/auth", tags=["Authentication"])
+# Session management
+SESSION_SECRET_KEY = os.getenv("SESSION_SECRET_KEY", secrets.token_hex(32))
+SESSION_MAX_AGE = 86400  # 24 hours in seconds
+serializer = URLSafeTimedSerializer(SESSION_SECRET_KEY)
+# In-memory session store (for simple use case)
+# For production, consider using Redis or database
+active_sessions: Dict[str, dict] = {}
+class LoginRequest(BaseModel):
+    username: str
+    password: str
+class LoginResponse(BaseModel):
+    success: bool
+    message: str
+def create_session(username: str) -> str:
+    """Create a new session token"""
+    session_id = secrets.token_urlsafe(32)
+    session_data = {
+        "username": username,
+        "created_at": datetime.utcnow().isoformat(),
+        "expires_at": (datetime.utcnow() + timedelta(seconds=SESSION_MAX_AGE)).isoformat()
+    }
+    # Store session
+    active_sessions[session_id] = session_data
+    # Create signed token
+    token = serializer.dumps(session_id)
+    return token
+def verify_session(token: Optional[str]) -> Optional[dict]:
+    """Verify session token and return session data"""
+    if not token:
+        return None
+    try:
+        # Verify signature and age
+        session_id = serializer.loads(token, max_age=SESSION_MAX_AGE)
+        # Check if session exists
+        session_data = active_sessions.get(session_id)
+        if not session_data:
+            return None
+        # Check expiration
+        expires_at = datetime.fromisoformat(session_data["expires_at"])
+        if datetime.utcnow() > expires_at:
+            # Clean up expired session
+            active_sessions.pop(session_id, None)
+            return None
+        return session_data
+    except (BadSignature, SignatureExpired):
+        return None
+    except Exception as e:
+        logger.error(f"Session verification error: {e}")
+        return None
+def verify_credentials(username: str, password: str) -> bool:
+    """Verify username and password against environment variables"""
+    expected_username = os.getenv("AUTH_USERNAME", "volaris")
+    expected_password = os.getenv("AUTH_PASSWORD", "volaris")
+    return username == expected_username and password == expected_password
+@router.post("/login", response_model=LoginResponse)
+async def login(
+    response: Response,
+    username: str = Form(...),
+    password: str = Form(...)
+):
+    """
+    Login endpoint - validates credentials and creates session
+    """
+    # Verify credentials
+    if not verify_credentials(username, password):
+        logger.warning(f"Failed login attempt for username: {username}")
+        raise HTTPException(status_code=401, detail="Invalid username or password")
+    # Create session
+    token = create_session(username)
+    # Set secure cookie
+    response.set_cookie(
+        key="session_token",
+        value=token,
+        httponly=True,
+        max_age=SESSION_MAX_AGE,
+        samesite="lax",
+        secure=False  # Set to True in production with HTTPS
+    )
+    logger.info(f"Successful login for user: {username}")
+    return LoginResponse(
+        success=True,
+        message="Login successful"
+    )
+@router.post("/logout")
+async def logout(
+    response: Response,
+    session_token: Optional[str] = Cookie(None)
+):
+    """
+    Logout endpoint - invalidates session
+    """
+    if session_token:
+        try:
+            session_id = serializer.loads(session_token, max_age=SESSION_MAX_AGE)
+            active_sessions.pop(session_id, None)
+        except Exception:
+            pass
+    # Clear cookie
+    response.delete_cookie(key="session_token")
+    return {"success": True, "message": "Logged out successfully"}
+@router.get("/verify")
+async def verify(session_token: Optional[str] = Cookie(None)):
+    """
+    Verify if current session is valid
+    """
+    session_data = verify_session(session_token)
+    if not session_data:
+        raise HTTPException(status_code=401, detail="Not authenticated")
+    return {
+        "authenticated": True,
+        "username": session_data.get("username")
+    }
+@router.get("/status")
+async def status(session_token: Optional[str] = Cookie(None)):
+    """
+    Check authentication status without raising exception
+    """
+    session_data = verify_session(session_token)
+    return {
+        "authenticated": session_data is not None,
+        "username": session_data.get("username") if session_data else None
+    }

core/__pycache__/agent.cpython-313.pyc CHANGED Viewed

Binary files a/core/__pycache__/agent.cpython-313.pyc and b/core/__pycache__/agent.cpython-313.pyc differ

core/__pycache__/background_init.cpython-313.pyc CHANGED Viewed

Binary files a/core/__pycache__/background_init.cpython-313.pyc and b/core/__pycache__/background_init.cpython-313.pyc differ

core/__pycache__/github_storage.cpython-313.pyc CHANGED Viewed

Binary files a/core/__pycache__/github_storage.cpython-313.pyc and b/core/__pycache__/github_storage.cpython-313.pyc differ

core/__pycache__/retrievers.cpython-313.pyc CHANGED Viewed

Binary files a/core/__pycache__/retrievers.cpython-313.pyc and b/core/__pycache__/retrievers.cpython-313.pyc differ

core/__pycache__/tools.cpython-313.pyc CHANGED Viewed

Binary files a/core/__pycache__/tools.cpython-313.pyc and b/core/__pycache__/tools.cpython-313.pyc differ

core/__pycache__/validation.cpython-313.pyc CHANGED Viewed

Binary files a/core/__pycache__/validation.cpython-313.pyc and b/core/__pycache__/validation.cpython-313.pyc differ

core/agent.py CHANGED Viewed

@@ -89,52 +89,89 @@ AVAILABLE_TOOLS = [
 # System message template for the agent
 SYSTEM_MESSAGE = """
-You are an advanced Medical Advisor Chatbot for healthcare professionals.
-Your primary purpose is to answer clinical and medical questions strictly based on authoritative medical guidelines using the tool "medical_guidelines_knowledge_tool".
-Your answers must be concise, medically informative, evidence-based responses in an authoritative, precise, and clinical tone.
-You will be responding to practicing medical professionals so adjust your answer and language accordingly.
-**INSTRUCTIONS:**
-- Always answer using only the information retrieved from medical guidelines via "medical_guidelines_knowledge_tool".
-- **SIDE EFFECT REPORTING**: When a healthcare professional reports an adverse drug reaction, side effect, or medication-related complication, ALWAYS use the "side_effect_recording_tool" first to document the information. Return the tool's response directly to the user without modification. DO NOT use validation or generate additional reports for side effect reporting queries.
-- Use the side effect recording tool when the input contains phrases like: "patient experienced", "side effect", "adverse reaction", "drug reaction", "medication caused", "developed after taking", etc.
-- When the side effect recording tool requests additional information, present the request exactly as provided by the tool.
-- **PROVIDER COMPARISON**: When the user asks to compare guidance between two providers (e.g., "compare NCCN vs ESMO on ..."), use the "compare_providers_tool" with appropriate `provider_a` and `provider_b` values to retrieve side-by-side, cited results.
-- **TIME/DATE QUERIES**: For any questions about the current date/time or references like "today" or "now", use the "get_current_datetime_tool". Treat this tool as the only reliable source of current time information.
-- For every answer, you MUST provide detailed citations including:
   * Source file name
-  * Page number
-  * Provider name
-  * Specific location (e.g., Table 1, Figure 2, Box 3, Section Header, etc.)
-  * Type of content (e.g., table, flowchart, bullet point, paragraph, etc.)
-- Use this format for citations:
-  (Source: [file name], Page: [page number], Provider: [provider name], Location: [specific location], Type: [content type])
 - If multiple sources are used, cite each one with its corresponding metadata.
 - If a specific provider (NCCN, ASCO, ESMO, etc.) is mentioned in the question, prioritize information from that provider.
 - When citing tables or flowcharts:
-  * Specify the table/figure number if available
-  * Describe which part of the table/figure contains the information
-  * Reference any relevant footnotes or legends
 - When citing text:
-  * Specify the section or subsection heading
-  * Indicate if it's from a bullet point, paragraph, or other format
-- If the answer is not found in the retrieved guidelines, provide a helpful response that:
-  * Acknowledges the limitation: "Based on the available medical guidelines in my knowledge base, I could not find specific information about [topic]."
   * Suggests alternatives: "You may want to:
     - Rephrase your question with more specific clinical details
     - Specify a particular guideline provider (NCCN, ASCO, ESMO, NICE)
     - Consult the latest published guidelines directly for emerging topics"
   * Maintains professionalism: Never simply say "I don't know" - always provide context and next steps
-- Never speculate or provide information not present in the guidelines.
 - Always respond in English.
-**FORMATTING:**
-- Use markdown formatting for clarity:
-  * Use bullet points for lists
-  * Use bold for emphasis on key points
-  * Use tables when summarizing multiple points
 **SAFETY DISCLAIMER:**
 Important: For emergencies call emergency services immediately. This is educational information for healthcare professionals, not a substitute for clinical judgment.

 # System message template for the agent
 SYSTEM_MESSAGE = """
+You are an advanced Clinical Decision Support System for expert healthcare professionals, oncologists, and medical specialists.
+Your primary purpose is to provide comprehensive, evidence-based clinical guidance strictly from authoritative medical guidelines using the tool "medical_guidelines_knowledge_tool".
+**AUDIENCE**: Your responses are for practicing physicians, oncologists, and medical experts. Use appropriate medical terminology, clinical precision, and expert-level detail.
+**RESPONSE STYLE**:
+- Provide DETAILED, COMPREHENSIVE answers with clinical depth appropriate for specialists
+- Use precise medical terminology without oversimplification
+- Include specific clinical parameters, dosing regimens, biomarker thresholds, and staging details when available
+- Reference specific tables, figures, algorithms, and flowcharts from guidelines
+- Discuss nuances, clinical considerations, and evidence levels
+- Compare different approaches when multiple options exist
+- Highlight contraindications, special populations, and important clinical caveats
+**CRITICAL INSTRUCTIONS - TOOL USAGE IS MANDATORY:**
+**YOU MUST ALWAYS USE THE "medical_guidelines_knowledge_tool" FIRST FOR EVERY MEDICAL QUESTION.**
+- Do NOT answer from your general knowledge or training data
+- Do NOT provide information without first retrieving it from the guidelines
+- ALWAYS call "medical_guidelines_knowledge_tool" before formulating your response
+- Even for basic medical concepts (e.g., "what is a driver mutation"), you MUST retrieve information from the guidelines first
+- Only after retrieving guideline information should you formulate your answer based on what was retrieved
+**TOOL USAGE REQUIREMENTS:**
+1. **MEDICAL QUESTIONS** (definitions, treatments, guidelines, etc.):
+   - MANDATORY: Use "medical_guidelines_knowledge_tool" FIRST
+   - Then answer based ONLY on retrieved information
+2. **SIDE EFFECT REPORTING**: When a healthcare professional reports an adverse drug reaction, side effect, or medication-related complication:
+   - MANDATORY: Use "side_effect_recording_tool" first to document the information
+   - Return the tool's response directly to the user without modification
+   - DO NOT use validation or generate additional reports for side effect reporting queries
+   - Trigger phrases: "patient experienced", "side effect", "adverse reaction", "drug reaction", "medication caused", "developed after taking"
+3. **PROVIDER COMPARISON**: When comparing guidance between providers (e.g., "compare NCCN vs ESMO on ..."):
+   - MANDATORY: Use "compare_providers_tool" with appropriate `provider_a` and `provider_b` values
+4. **TIME/DATE QUERIES**: For current date/time or references like "today" or "now":
+   - MANDATORY: Use "get_current_datetime_tool"
+- For every answer, you MUST provide COMPREHENSIVE citations including:
   * Source file name
+  * Page number(s) - including context pages if enriched content is provided
+  * Provider name (NCCN, ASCO, ESMO, NICE, etc.)
+  * Specific location (e.g., Table 1, Figure 2, Algorithm 3, Box 4, Section Header, etc.)
+  * Type of content (e.g., treatment algorithm, dosing table, biomarker criteria, staging flowchart, etc.)
+  * Evidence level or recommendation grade when available
+- Use this format for detailed citations:
+  (Source: [file name], Pages: [page numbers], Provider: [provider name], Location: [specific location], Type: [content type], Evidence Level: [if available])
 - If multiple sources are used, cite each one with its corresponding metadata.
 - If a specific provider (NCCN, ASCO, ESMO, etc.) is mentioned in the question, prioritize information from that provider.
 - When citing tables or flowcharts:
+  * Specify the table/figure number and title
+  * Describe which specific rows, columns, or sections contain the relevant information
+  * Reference any relevant footnotes, legends, or annotations
+  * Include specific values, thresholds, or criteria mentioned
 - When citing text:
+  * Specify the section or subsection heading with full hierarchy
+  * Indicate if it's from a bullet point, paragraph, recommendation box, or other format
+  * Quote key phrases or specific recommendations when appropriate
+- **ENRICHED CONTEXT**: When the retrieved content includes context pages (marked as "CONTEXT - Page X"), use this surrounding information to provide more complete clinical context and understanding
+**IMPORTANT - NO GENERAL KNOWLEDGE RESPONSES:**
+- If the answer is not found in the retrieved guidelines after using the tool, provide a helpful response that:
+  * Acknowledges the limitation: "I searched the available medical guidelines but could not find specific information about [topic]."
   * Suggests alternatives: "You may want to:
     - Rephrase your question with more specific clinical details
     - Specify a particular guideline provider (NCCN, ASCO, ESMO, NICE)
     - Consult the latest published guidelines directly for emerging topics"
   * Maintains professionalism: Never simply say "I don't know" - always provide context and next steps
+- **NEVER answer from general knowledge or training data - ALWAYS use the tool first**
+- Never speculate or provide information not present in the guidelines
+- If the retrieved information is insufficient, acknowledge this and ask for clarification rather than supplementing with general knowledge
 - Always respond in English.
+**FORMATTING FOR EXPERT AUDIENCE:**
+- Use advanced markdown formatting for clinical clarity:
+  * Use **bold** for critical clinical points, drug names, and key recommendations
+  * Use bullet points and numbered lists for treatment sequences and decision algorithms
+  * Use tables to compare regimens, dosing schedules, or guideline differences
+  * Use headers (###) to organize complex responses by topic
+  * Use blockquotes (>) for direct guideline quotes or key recommendations
+  * Include specific numeric values, percentages, and statistical data when available
+  * Structure responses logically: Indication → Regimen → Dosing → Monitoring → Special Considerations
 **SAFETY DISCLAIMER:**
 Important: For emergencies call emergency services immediately. This is educational information for healthcare professionals, not a substitute for clinical judgment.

core/background_init.py CHANGED Viewed

@@ -51,8 +51,24 @@ class BackgroundInitializer:
             _ensure_initialized()
             self._update_progress("Retrievers initialized successfully", 90)
-            # Step 3: Warm up LLM (optional, lightweight)
-            self._update_progress("Warming up LLM...", 95)
             from .config import get_llm
             llm = get_llm()
             self._update_progress("All components initialized successfully", 100)

             _ensure_initialized()
             self._update_progress("Retrievers initialized successfully", 90)
+            # Step 3: Learn medical terminology from corpus
+            self._update_progress("Learning medical terminology from corpus...", 92)
+            try:
+                from .medical_terminology import learn_from_corpus
+                from . import utils
+                # Load chunks to learn from
+                chunks = utils.load_chunks()
+                if chunks:
+                    # Convert to format expected by learner
+                    documents = [{'content': chunk.page_content} for chunk in chunks[:1000]]  # Limit for performance
+                    learn_from_corpus(documents)
+                    logger.info(f"Learned medical terminology from {len(documents)} documents")
+            except Exception as e:
+                logger.warning(f"Could not learn terminology from corpus: {e}")
+            # Step 4: Warm up LLM (optional, lightweight)
+            self._update_progress("Warming up LLM...", 97)
             from .config import get_llm
             llm = get_llm()
             self._update_progress("All components initialized successfully", 100)

core/context_enrichment.py ADDED Viewed

	@@ -0,0 +1,336 @@

+"""
+Context Enrichment Module for Medical RAG
+This module enriches retrieved documents with surrounding context (adjacent pages)
+to provide comprehensive information for expert medical professionals.
+"""
+from typing import List, Dict, Set, Optional
+from langchain.schema import Document
+from pathlib import Path
+from .config import logger
+class ContextEnricher:
+    """
+    Enriches retrieved documents with surrounding pages for richer context.
+    """
+    def __init__(self, cache_size: int = 100):
+        """
+        Initialize context enricher with document cache.
+        Args:
+            cache_size: Maximum number of source documents to cache
+        """
+        self._document_cache: Dict[str, List[Document]] = {}
+        self._cache_size = cache_size
+    def enrich_documents(
+        self,
+        retrieved_docs: List[Document],
+        pages_before: int = 1,
+        pages_after: int = 1,
+        max_enriched_docs: int = 5
+    ) -> List[Document]:
+        """
+        Enrich retrieved documents by adding separate context pages.
+        Args:
+            retrieved_docs: List of retrieved documents
+            pages_before: Number of pages to include before each document
+            pages_after: Number of pages to include after each document
+            max_enriched_docs: Maximum number of documents to enrich (top results)
+        Returns:
+            List with original documents + separate context page documents
+        """
+        if not retrieved_docs:
+            return []
+        result_docs = []
+        processed_sources = set()
+        enriched_count = 0
+        # Only enrich top documents to avoid overwhelming context
+        docs_to_enrich = retrieved_docs[:max_enriched_docs]
+        for doc in docs_to_enrich:
+            try:
+                # Get source information
+                source = doc.metadata.get('source', 'unknown')
+                page_num = doc.metadata.get('page_number', 1)
+                # Skip if already processed this source-page combination
+                source_page_key = f"{source}_{page_num}"
+                if source_page_key in processed_sources:
+                    continue
+                processed_sources.add(source_page_key)
+                # Get surrounding pages
+                surrounding_docs = self._get_surrounding_pages(
+                    doc,
+                    pages_before,
+                    pages_after
+                )
+                if surrounding_docs:
+                    # Add separate documents for each page
+                    page_docs = self._create_separate_page_documents(
+                        doc,
+                        surrounding_docs,
+                        pages_before,
+                        pages_after
+                    )
+                    result_docs.extend(page_docs)
+                    enriched_count += 1
+                    # Log enrichment details
+                    page_numbers = [int(d.metadata.get('page_number', 0)) for d in page_docs]
+                    logger.debug(f"Enriched {source} page {page_num} with pages: {page_numbers}")
+                else:
+                    # No surrounding pages found, add original with empty enrichment metadata
+                    original_with_metadata = self._add_empty_enrichment_metadata(doc)
+                    result_docs.append(original_with_metadata)
+            except Exception as e:
+                logger.warning(f"Could not enrich document from {doc.metadata.get('source')}: {e}")
+                original_with_metadata = self._add_empty_enrichment_metadata(doc)
+                result_docs.append(original_with_metadata)
+        # Add remaining documents without enrichment
+        for doc in retrieved_docs[max_enriched_docs:]:
+            original_with_metadata = self._add_empty_enrichment_metadata(doc)
+            result_docs.append(original_with_metadata)
+        logger.info(f"Enriched {enriched_count} documents with surrounding context pages")
+        return result_docs
+    def _get_surrounding_pages(
+        self,
+        doc: Document,
+        pages_before: int,
+        pages_after: int
+    ) -> List[Document]:
+        """
+        Get surrounding pages for a document.
+        Args:
+            doc: Original document
+            pages_before: Number of pages before
+            pages_after: Number of pages after
+        Returns:
+            List of surrounding documents (including original), deduplicated by page number
+        """
+        source = doc.metadata.get('source', 'unknown')
+        page_num = doc.metadata.get('page_number', 1)
+        provider = doc.metadata.get('provider', 'unknown')
+        disease = doc.metadata.get('disease', 'unknown')
+        # Try to get full document from cache or load it
+        full_doc_pages = self._get_full_document(source, provider, disease)
+        if not full_doc_pages:
+            return []
+        # Find the target page and surrounding pages
+        target_page = int(page_num) if isinstance(page_num, (int, str)) else 1
+        # Use a dict to deduplicate by page number (keep first occurrence)
+        pages_dict = {}
+        for page_doc in full_doc_pages:
+            doc_page_num = page_doc.metadata.get('page_number', 0)
+            if isinstance(doc_page_num, str):
+                try:
+                    doc_page_num = int(doc_page_num)
+                except:
+                    continue
+            # Include pages within range
+            if target_page - pages_before <= doc_page_num <= target_page + pages_after:
+                # Only add if not already present (deduplication)
+                if doc_page_num not in pages_dict:
+                    pages_dict[doc_page_num] = page_doc
+        # Return sorted by page number
+        surrounding = [pages_dict[pn] for pn in sorted(pages_dict.keys())]
+        return surrounding
+    def _get_full_document(
+        self,
+        source: str,
+        provider: str,
+        disease: str
+    ) -> Optional[List[Document]]:
+        """
+        Get full document pages from chunks cache.
+        Args:
+            source: Source filename
+            provider: Provider name
+            disease: Disease name
+        Returns:
+            List of all pages in the document, or None if not found
+        """
+        cache_key = f"{provider}_{disease}_{source}"
+        # Check cache
+        if cache_key in self._document_cache:
+            return self._document_cache[cache_key]
+        # Load from chunks cache instead of trying to reload PDFs
+        try:
+            from . import utils
+            # Load all chunks
+            all_chunks = utils.load_chunks()
+            if not all_chunks:
+                logger.debug(f"No chunks available for enrichment")
+                return None
+            # Filter chunks for this specific document
+            doc_pages = []
+            for chunk in all_chunks:
+                chunk_source = chunk.metadata.get('source', '')
+                chunk_provider = chunk.metadata.get('provider', '')
+                chunk_disease = chunk.metadata.get('disease', '')
+                # Match by source, provider, and disease
+                if (chunk_source == source and
+                    chunk_provider == provider and
+                    chunk_disease == disease):
+                    doc_pages.append(chunk)
+            if not doc_pages:
+                logger.debug(f"Could not find chunks for document: {source} (Provider: {provider}, Disease: {disease})")
+                return None
+            # Sort by page number
+            doc_pages.sort(key=lambda d: int(d.metadata.get('page_number', 0)))
+            # Cache it (with size limit)
+            if len(self._document_cache) >= self._cache_size:
+                # Remove oldest entry
+                self._document_cache.pop(next(iter(self._document_cache)))
+            self._document_cache[cache_key] = doc_pages
+            logger.debug(f"Loaded {len(doc_pages)} pages for {source} from chunks cache")
+            return doc_pages
+        except Exception as e:
+            logger.warning(f"Error loading document from chunks cache {source}: {e}")
+            return None
+    def _create_separate_page_documents(
+        self,
+        original_doc: Document,
+        surrounding_docs: List[Document],
+        pages_before: int,
+        pages_after: int
+    ) -> List[Document]:
+        """
+        Create separate document objects for original page and context pages.
+        Args:
+            original_doc: Original retrieved document
+            surrounding_docs: List of surrounding documents
+            pages_before: Number of pages before
+            pages_after: Number of pages after
+        Returns:
+            List of separate documents (context pages + original page + context pages)
+        """
+        # Sort by page number
+        sorted_docs = sorted(
+            surrounding_docs,
+            key=lambda d: int(d.metadata.get('page_number', 0))
+        )
+        original_page = int(original_doc.metadata.get('page_number', 1))
+        result_docs = []
+        for doc in sorted_docs:
+            page_num = int(doc.metadata.get('page_number', 0))
+            # Determine if this is a context page or the original page
+            is_context_page = (page_num != original_page)
+            # Create document with appropriate metadata
+            page_doc = Document(
+                page_content=doc.page_content,
+                metadata={
+                    **doc.metadata,
+                    'context_enrichment': is_context_page,
+                    'enriched': False,
+                    'pages_included': [],
+                    'primary_page': None,
+                    'context_pages_before': None,
+                    'context_pages_after': None,
+                }
+            )
+            result_docs.append(page_doc)
+        return result_docs
+    def _add_empty_enrichment_metadata(self, doc: Document) -> Document:
+        """
+        Add empty enrichment metadata fields to a document.
+        Args:
+            doc: Original document
+        Returns:
+            Document with enrichment metadata fields set to default values
+        """
+        return Document(
+            page_content=doc.page_content,
+            metadata={
+                **doc.metadata,
+                'enriched': False,
+                'pages_included': [],
+                'primary_page': None,
+                'context_pages_before': None,
+                'context_pages_after': None,
+            }
+        )
+# Global enricher instance
+_context_enricher = ContextEnricher(cache_size=100)
+def enrich_retrieved_documents(
+    documents: List[Document],
+    pages_before: int = 1,
+    pages_after: int = 1,
+    max_enriched: int = 5
+) -> List[Document]:
+    """
+    Convenience function to enrich retrieved documents.
+    Args:
+        documents: Retrieved documents
+        pages_before: Number of pages to include before each document
+        pages_after: Number of pages to include after each document
+        max_enriched: Maximum number of documents to enrich
+    Returns:
+        Enriched documents with surrounding context
+    """
+    return _context_enricher.enrich_documents(
+        documents,
+        pages_before=pages_before,
+        pages_after=pages_after,
+        max_enriched_docs=max_enriched
+    )
+def get_context_enricher() -> ContextEnricher:
+    """Get the global context enricher instance."""
+    return _context_enricher

core/github_storage.py CHANGED Viewed

@@ -201,10 +201,10 @@ class GitHubStorage:
     def save_validation_results(self, evaluation_data: Dict[str, Any]) -> bool:
         """
-        Save validation results to GitHub repository as JSON with unique ID generation
         Args:
-            evaluation_data: Dictionary containing evaluation data
         Returns:
             True if successful, False otherwise
@@ -222,31 +222,16 @@ class GitHubStorage:
                     if not isinstance(evaluations, list):
                         evaluations = []
                 except json.JSONDecodeError:
                     evaluations = []
             else:
                 evaluations = []
-            # Generate unique interaction ID
-            existing_ids = set()
-            for eval_item in evaluations:
-                existing_id = eval_item.get("interaction_id")
-                if existing_id:
-                    try:
-                        existing_ids.add(int(existing_id))
-                    except (ValueError, TypeError):
-                        # If ID is not numeric, add as string
-                        existing_ids.add(existing_id)
-            # Find next available numeric ID
-            next_id = 1
-            while next_id in existing_ids:
-                next_id += 1
-            # Update the evaluation data with unique ID
-            evaluation_data["interaction_id"] = str(next_id)
-            logger.info(f"Assigned unique interaction ID: {next_id}")
-            # Add new evaluation
             evaluations.append(evaluation_data)
             # Convert to JSON string
@@ -258,7 +243,12 @@ class GitHubStorage:
             # Upload file
             commit_message = f"Add validation results for interaction {evaluation_data.get('interaction_id', 'unknown')} - {evaluation_data.get('timestamp', 'unknown time')}"
-            return self._upload_file(file_path, json_content, commit_message, sha)
         except Exception as e:
             logger.error(f"Error saving validation results to GitHub: {e}")

     def save_validation_results(self, evaluation_data: Dict[str, Any]) -> bool:
         """
+        Save validation results to GitHub repository as JSON
         Args:
+            evaluation_data: Dictionary containing evaluation data with interaction_id already set
         Returns:
             True if successful, False otherwise
                     if not isinstance(evaluations, list):
                         evaluations = []
                 except json.JSONDecodeError:
+                    logger.warning("Failed to parse existing evaluation_results.json, starting fresh")
                     evaluations = []
             else:
                 evaluations = []
+            # Log the current state
+            logger.info(f"Loading existing evaluations: {len(evaluations)} found")
+            logger.info(f"Adding new evaluation with ID: {evaluation_data.get('interaction_id', 'unknown')}")
+            # Add new evaluation to the list
             evaluations.append(evaluation_data)
             # Convert to JSON string
             # Upload file
             commit_message = f"Add validation results for interaction {evaluation_data.get('interaction_id', 'unknown')} - {evaluation_data.get('timestamp', 'unknown time')}"
+            success = self._upload_file(file_path, json_content, commit_message, sha)
+            if success:
+                logger.info(f"Successfully saved evaluation. Total evaluations now: {len(evaluations)}")
+            return success
         except Exception as e:
             logger.error(f"Error saving validation results to GitHub: {e}")

core/medical_terminology.py ADDED Viewed

	@@ -0,0 +1,506 @@

+"""
+Medical Terminology Module with Dynamic Learning
+This module provides intelligent handling of medical linguistic variability including:
+- Synonyms and alternate terms
+- Abbreviations and acronyms (with context awareness)
+- Regional spelling variations (US/UK/International)
+- Specialty-specific terminology
+- Dynamic learning from corpus
+"""
+import re
+import json
+from typing import List, Dict, Set, Tuple, Optional
+from collections import defaultdict
+from pathlib import Path
+from .config import logger
+# ============================================================================
+# CORE MEDICAL TERMINOLOGY MAPPINGS
+# ============================================================================
+# Common medical abbreviations with context-aware expansions
+MEDICAL_ABBREVIATIONS = {
+    # Cancer Types
+    "nsclc": ["non-small cell lung cancer", "non small cell lung cancer"],
+    "sclc": ["small cell lung cancer"],
+    "nscl": ["non-small cell lung"],
+    "alk": ["anaplastic lymphoma kinase"],
+    "egfr": ["epidermal growth factor receptor"],
+    "ros1": ["ros proto-oncogene 1", "c-ros oncogene 1"],
+    "braf": ["b-raf proto-oncogene"],
+    "kras": ["kirsten rat sarcoma viral oncogene"],
+    "met": ["mesenchymal epithelial transition", "met proto-oncogene"],
+    "her2": ["human epidermal growth factor receptor 2"],
+    "ret": ["ret proto-oncogene", "rearranged during transfection"],
+    "ntrk": ["neurotrophic tyrosine receptor kinase", "neurotrophic tropomyosin receptor kinase"],
+    # Treatment & Procedures
+    "chemo": ["chemotherapy"],
+    "rt": ["radiation therapy", "radiotherapy"],
+    "sbrt": ["stereotactic body radiation therapy", "stereotactic body radiotherapy"],
+    "imrt": ["intensity-modulated radiation therapy"],
+    "ct": ["computed tomography", "ct scan"],
+    "pet": ["positron emission tomography"],
+    "mri": ["magnetic resonance imaging"],
+    "io": ["immunotherapy", "immune-oncology"],
+    "ici": ["immune checkpoint inhibitor", "immune checkpoint inhibitors"],
+    "tki": ["tyrosine kinase inhibitor", "tyrosine kinase inhibitors"],
+    "pd-1": ["programmed death-1", "programmed cell death protein 1"],
+    "pd-l1": ["programmed death-ligand 1"],
+    "ctla-4": ["cytotoxic t-lymphocyte-associated protein 4"],
+    # Clinical Terms
+    "os": ["overall survival"],
+    "pfs": ["progression-free survival"],
+    "dfs": ["disease-free survival"],
+    "orr": ["overall response rate", "objective response rate"],
+    "cr": ["complete response"],
+    "pr": ["partial response"],
+    "sd": ["stable disease"],
+    "pd": ["progressive disease"],
+    "ecog": ["eastern cooperative oncology group"],
+    "ps": ["performance status"],
+    "aes": ["adverse events"],
+    "sae": ["serious adverse event", "serious adverse events"],
+    "qol": ["quality of life"],
+    # Staging
+    "tnm": ["tumor node metastasis", "tnm staging"],
+    "ajcc": ["american joint committee on cancer"],
+    # Drugs (common abbreviations)
+    "cddp": ["cisplatin"],
+    "cbdca": ["carboplatin"],
+    "pem": ["pemetrexed"],
+    "gem": ["gemcitabine"],
+    "doc": ["docetaxel"],
+    "pac": ["paclitaxel"],
+    "vin": ["vinorelbine"],
+    "eto": ["etoposide"],
+}
+# Synonym mappings for medical terms
+MEDICAL_SYNONYMS = {
+    # Cancer terminology
+    "lung cancer": ["pulmonary cancer", "lung carcinoma", "pulmonary carcinoma", "bronchogenic carcinoma"],
+    "non-small cell lung cancer": ["nsclc", "non small cell lung cancer", "non-small-cell lung cancer"],
+    "small cell lung cancer": ["sclc", "small-cell lung cancer", "oat cell carcinoma"],
+    "adenocarcinoma": ["adeno", "glandular cancer"],
+    "squamous cell carcinoma": ["squamous carcinoma", "scc", "epidermoid carcinoma"],
+    "metastatic": ["advanced", "stage iv", "stage 4", "metastases", "mets"],
+    "locally advanced": ["stage iii", "stage 3", "regional spread"],
+    "early stage": ["stage i", "stage ii", "stage 1", "stage 2", "localized"],
+    # Treatment terms
+    "chemotherapy": ["chemo", "cytotoxic therapy", "systemic therapy"],
+    "radiation therapy": ["radiotherapy", "rt", "radiation treatment", "irradiation"],
+    "immunotherapy": ["immune therapy", "io", "immune-oncology", "checkpoint inhibitor"],
+    "targeted therapy": ["molecular therapy", "precision medicine", "targeted treatment"],
+    "surgery": ["surgical resection", "resection", "operative treatment", "surgical intervention"],
+    "lobectomy": ["lobe resection", "pulmonary lobectomy"],
+    "pneumonectomy": ["lung removal", "complete lung resection"],
+    "wedge resection": ["segmentectomy", "limited resection"],
+    # Molecular markers
+    "mutation": ["alteration", "variant", "genetic change", "molecular alteration"],
+    "biomarker": ["molecular marker", "tumor marker", "genetic marker"],
+    "driver mutation": ["oncogenic driver", "actionable mutation", "targetable mutation"],
+    # Clinical outcomes
+    "survival": ["survival rate", "survival outcome"],
+    "response": ["treatment response", "tumor response", "clinical response"],
+    "progression": ["disease progression", "tumor progression", "cancer progression"],
+    "recurrence": ["relapse", "disease recurrence", "tumor recurrence"],
+    "remission": ["response", "disease control"],
+    # Side effects
+    "adverse event": ["side effect", "adverse reaction", "toxicity", "adverse drug reaction"],
+    "neutropenia": ["low white blood cell count", "low neutrophil count"],
+    "anemia": ["low red blood cell count", "low hemoglobin"],
+    "thrombocytopenia": ["low platelet count"],
+    "nausea": ["feeling sick", "queasiness"],
+    "fatigue": ["tiredness", "exhaustion", "weakness"],
+    # Diagnostic terms
+    "biopsy": ["tissue sample", "tissue sampling"],
+    "imaging": ["radiology", "diagnostic imaging", "medical imaging"],
+    "screening": ["early detection", "cancer screening"],
+}
+# Regional spelling variations (US/UK/International)
+SPELLING_VARIATIONS = {
+    # US -> UK/International variants
+    "tumor": ["tumour"],
+    "tumors": ["tumours"],
+    "metastasis": ["metastases"],
+    "anemia": ["anaemia"],
+    "edema": ["oedema"],
+    "esophageal": ["oesophageal"],
+    "pediatric": ["paediatric"],
+    "hematology": ["haematology"],
+    "hemoglobin": ["haemoglobin"],
+    "leukemia": ["leukaemia"],
+    "lymphoma": ["lymphoma"],  # Same in both
+    "optimize": ["optimise"],
+    "randomized": ["randomised"],
+    "analyze": ["analyse"],
+    "center": ["centre"],
+    "fiber": ["fibre"],
+}
+# Context-dependent abbreviations (require disambiguation)
+CONTEXT_DEPENDENT_ABBREVS = {
+    "ca": {
+        "cancer": ["cancer", "carcinoma"],
+        "calcium": ["calcium"],
+    },
+    "cr": {
+        "complete_response": ["complete response", "complete remission"],
+        "creatinine": ["creatinine"],
+    },
+    "pt": {
+        "patient": ["patient"],
+        "prothrombin_time": ["prothrombin time"],
+    },
+    "rt": {
+        "radiation_therapy": ["radiation therapy", "radiotherapy"],
+        "reverse_transcriptase": ["reverse transcriptase"],
+    },
+}
+# ============================================================================
+# DYNAMIC LEARNING COMPONENTS
+# ============================================================================
+class MedicalTerminologyLearner:
+    """
+    Dynamically learns medical term variations from the corpus.
+    Builds co-occurrence patterns and semantic relationships.
+    """
+    def __init__(self, cache_path: Optional[str] = None):
+        self.cache_path = cache_path or "data/medical_terms_cache.json"
+        self.term_cooccurrence = defaultdict(lambda: defaultdict(int))
+        self.learned_synonyms = defaultdict(set)
+        self.learned_abbreviations = defaultdict(set)
+        self.context_patterns = defaultdict(list)
+        self._load_cache()
+    def _load_cache(self):
+        """Load previously learned terms from cache"""
+        try:
+            cache_file = Path(self.cache_path)
+            if cache_file.exists():
+                with open(cache_file, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    self.learned_synonyms = defaultdict(set, {k: set(v) for k, v in data.get('synonyms', {}).items()})
+                    self.learned_abbreviations = defaultdict(set, {k: set(v) for k, v in data.get('abbreviations', {}).items()})
+                logger.info(f"Loaded {len(self.learned_synonyms)} learned synonyms from cache")
+        except Exception as e:
+            logger.warning(f"Could not load term cache: {e}")
+    def _save_cache(self):
+        """Save learned terms to cache"""
+        try:
+            cache_file = Path(self.cache_path)
+            cache_file.parent.mkdir(parents=True, exist_ok=True)
+            data = {
+                'synonyms': {k: list(v) for k, v in self.learned_synonyms.items()},
+                'abbreviations': {k: list(v) for k, v in self.learned_abbreviations.items()}
+            }
+            with open(cache_file, 'w', encoding='utf-8') as f:
+                json.dump(data, f, indent=2)
+            logger.info(f"Saved learned terms to cache")
+        except Exception as e:
+            logger.warning(f"Could not save term cache: {e}")
+    def learn_from_documents(self, documents: List[Dict[str, str]]):
+        """
+        Learn term variations from a corpus of documents.
+        Identifies patterns like:
+        - "X (Y)" -> Y is abbreviation of X
+        - "X, also known as Y" -> X and Y are synonyms
+        - "X or Y" in similar contexts -> potential synonyms
+        """
+        for doc in documents:
+            content = doc.get('content', '')
+            self._extract_abbreviation_patterns(content)
+            self._extract_synonym_patterns(content)
+            self._build_cooccurrence(content)
+        self._save_cache()
+    def _extract_abbreviation_patterns(self, text: str):
+        """Extract abbreviations from patterns like 'Full Term (ABBR)'"""
+        # Pattern: "Full Term (ABBR)" or "Full Term [ABBR]"
+        pattern = r'([A-Z][a-z]+(?:\s+[A-Z]?[a-z]+)*)\s*[\(\[]([A-Z]{2,}|[A-Z][a-z]*(?:-[A-Z][a-z]*)*)[\)\]]'
+        matches = re.finditer(pattern, text)
+        for match in matches:
+            full_term = match.group(1).strip().lower()
+            abbrev = match.group(2).strip().lower()
+            # Validate: abbreviation should be shorter and contain initials
+            if len(abbrev) < len(full_term) and len(abbrev) >= 2:
+                self.learned_abbreviations[abbrev].add(full_term)
+                logger.debug(f"Learned: {abbrev} -> {full_term}")
+    def _extract_synonym_patterns(self, text: str):
+        """Extract synonyms from patterns like 'X, also known as Y' or 'X (Y)'"""
+        # Pattern: "X, also known as Y" or "X, also called Y"
+        patterns = [
+            r'([a-z\s\-]+),?\s+also\s+known\s+as\s+([a-z\s\-]+)',
+            r'([a-z\s\-]+),?\s+also\s+called\s+([a-z\s\-]+)',
+            r'([a-z\s\-]+)\s+\(([a-z\s\-]+)\)',
+        ]
+        for pattern in patterns:
+            matches = re.finditer(pattern, text.lower())
+            for match in matches:
+                term1 = match.group(1).strip()
+                term2 = match.group(2).strip()
+                # Validate: both should be reasonable length
+                if 3 <= len(term1) <= 50 and 3 <= len(term2) <= 50:
+                    self.learned_synonyms[term1].add(term2)
+                    self.learned_synonyms[term2].add(term1)
+    def _build_cooccurrence(self, text: str):
+        """Build co-occurrence matrix for terms"""
+        # Extract medical terms (simplified)
+        terms = re.findall(r'\b[a-z]{3,}(?:\s+[a-z]{3,}){0,3}\b', text.lower())
+        # Build co-occurrence within a window
+        window_size = 10
+        for i, term in enumerate(terms):
+            for j in range(max(0, i - window_size), min(len(terms), i + window_size + 1)):
+                if i != j:
+                    self.term_cooccurrence[term][terms[j]] += 1
+    def get_related_terms(self, term: str, threshold: int = 3) -> Set[str]:
+        """Get terms that frequently co-occur with the given term"""
+        term_lower = term.lower()
+        related = set()
+        if term_lower in self.term_cooccurrence:
+            for related_term, count in self.term_cooccurrence[term_lower].items():
+                if count >= threshold:
+                    related.add(related_term)
+        return related
+# Global learner instance
+_terminology_learner = MedicalTerminologyLearner()
+# ============================================================================
+# QUERY NORMALIZATION AND EXPANSION FUNCTIONS
+# ============================================================================
+def normalize_query(query: str) -> str:
+    """
+    Normalize a query by:
+    - Converting to lowercase
+    - Removing extra whitespace
+    - Standardizing punctuation
+    """
+    # Convert to lowercase
+    normalized = query.lower()
+    # Standardize hyphens and dashes
+    normalized = re.sub(r'[–—]', '-', normalized)
+    # Remove extra whitespace
+    normalized = re.sub(r'\s+', ' ', normalized).strip()
+    return normalized
+def expand_abbreviations(text: str, context: Optional[str] = None) -> List[str]:
+    """
+    Expand abbreviations in text to their full forms.
+    Uses context when available for disambiguation.
+    """
+    expansions = [text]
+    text_lower = text.lower()
+    # Check learned abbreviations first
+    for abbrev, full_forms in _terminology_learner.learned_abbreviations.items():
+        if abbrev in text_lower:
+            for full_form in full_forms:
+                expanded = text_lower.replace(abbrev, full_form)
+                if expanded != text_lower:
+                    expansions.append(expanded)
+    # Check predefined abbreviations
+    for abbrev, full_forms in MEDICAL_ABBREVIATIONS.items():
+        if re.search(rf'\b{re.escape(abbrev)}\b', text_lower):
+            for full_form in full_forms:
+                expanded = re.sub(rf'\b{re.escape(abbrev)}\b', full_form, text_lower)
+                if expanded != text_lower:
+                    expansions.append(expanded)
+    # Remove duplicates while preserving order
+    seen = set()
+    unique_expansions = []
+    for exp in expansions:
+        if exp not in seen:
+            seen.add(exp)
+            unique_expansions.append(exp)
+    return unique_expansions
+def get_synonyms(term: str) -> Set[str]:
+    """Get all known synonyms for a medical term"""
+    term_lower = term.lower()
+    synonyms = set()
+    # Check predefined synonyms
+    if term_lower in MEDICAL_SYNONYMS:
+        synonyms.update(MEDICAL_SYNONYMS[term_lower])
+    # Check if term is a synonym of something else
+    for key, syn_list in MEDICAL_SYNONYMS.items():
+        if term_lower in syn_list:
+            synonyms.add(key)
+            synonyms.update(syn_list)
+    # Check learned synonyms
+    if term_lower in _terminology_learner.learned_synonyms:
+        synonyms.update(_terminology_learner.learned_synonyms[term_lower])
+    # Remove the original term
+    synonyms.discard(term_lower)
+    return synonyms
+def get_spelling_variations(term: str) -> Set[str]:
+    """Get regional spelling variations for a term"""
+    term_lower = term.lower()
+    variations = set()
+    # Check direct mapping
+    if term_lower in SPELLING_VARIATIONS:
+        variations.update(SPELLING_VARIATIONS[term_lower])
+    # Check reverse mapping
+    for key, var_list in SPELLING_VARIATIONS.items():
+        if term_lower in var_list:
+            variations.add(key)
+            variations.update(var_list)
+    variations.discard(term_lower)
+    return variations
+def extract_medical_entities(text: str) -> List[Tuple[str, str]]:
+    """
+    Extract medical entities from text.
+    Returns list of (entity, type) tuples.
+    """
+    entities = []
+    text_lower = text.lower()
+    # Extract abbreviations
+    for abbrev in MEDICAL_ABBREVIATIONS.keys():
+        if re.search(rf'\b{re.escape(abbrev)}\b', text_lower):
+            entities.append((abbrev, 'abbreviation'))
+    # Extract known medical terms
+    for term in MEDICAL_SYNONYMS.keys():
+        if term in text_lower:
+            entities.append((term, 'medical_term'))
+    return entities
+def is_medical_abbreviation(text: str) -> bool:
+    """Check if text is a known medical abbreviation"""
+    text_lower = text.lower().strip()
+    return text_lower in MEDICAL_ABBREVIATIONS or text_lower in _terminology_learner.learned_abbreviations
+def get_abbreviation_expansion(abbrev: str) -> List[str]:
+    """Get all possible expansions for an abbreviation"""
+    abbrev_lower = abbrev.lower().strip()
+    expansions = []
+    # Check predefined
+    if abbrev_lower in MEDICAL_ABBREVIATIONS:
+        expansions.extend(MEDICAL_ABBREVIATIONS[abbrev_lower])
+    # Check learned
+    if abbrev_lower in _terminology_learner.learned_abbreviations:
+        expansions.extend(_terminology_learner.learned_abbreviations[abbrev_lower])
+    return expansions
+def expand_query_with_variations(query: str, max_variations: int = 5) -> List[str]:
+    """
+    Generate query variations by expanding abbreviations, adding synonyms,
+    and including spelling variations.
+    Args:
+        query: Original query string
+        max_variations: Maximum number of variations to generate
+    Returns:
+        List of query variations including the original
+    """
+    variations = [query]
+    query_lower = normalize_query(query)
+    # 1. Expand abbreviations
+    abbrev_expansions = expand_abbreviations(query_lower)
+    variations.extend(abbrev_expansions)
+    # 2. Add synonym variations
+    words = query_lower.split()
+    for i, word in enumerate(words):
+        synonyms = get_synonyms(word)
+        for syn in list(synonyms)[:2]:  # Limit to 2 synonyms per word
+            new_query = ' '.join(words[:i] + [syn] + words[i+1:])
+            variations.append(new_query)
+    # 3. Add spelling variations
+    for word in words:
+        spelling_vars = get_spelling_variations(word)
+        for var in spelling_vars:
+            new_query = query_lower.replace(word, var)
+            variations.append(new_query)
+    # 4. Add multi-word phrase variations
+    for term, synonyms in MEDICAL_SYNONYMS.items():
+        if term in query_lower:
+            for syn in list(synonyms)[:2]:
+                new_query = query_lower.replace(term, syn)
+                variations.append(new_query)
+    # Remove duplicates and limit
+    seen = set()
+    unique_variations = []
+    for var in variations:
+        if var not in seen:
+            seen.add(var)
+            unique_variations.append(var)
+            if len(unique_variations) >= max_variations:
+                break
+    return unique_variations
+def learn_from_corpus(documents: List[Dict[str, str]]):
+    """
+    Learn medical term variations from a corpus of documents.
+    Should be called during system initialization.
+    """
+    _terminology_learner.learn_from_documents(documents)
+def get_terminology_learner() -> MedicalTerminologyLearner:
+    """Get the global terminology learner instance"""
+    return _terminology_learner

core/query_expansion.py ADDED Viewed

	@@ -0,0 +1,432 @@

+"""
+Query Expansion Module for Medical Linguistic Variability
+This module provides intelligent query expansion to handle:
+- Medical term variations and synonyms
+- Abbreviation expansion
+- Spelling variations (US/UK/International)
+- Specialty-specific terminology
+- Multi-query retrieval strategies
+"""
+import re
+from typing import List, Dict, Set, Tuple, Optional
+from langchain.schema import Document
+from .medical_terminology import (
+    normalize_query,
+    expand_query_with_variations,
+    get_synonyms,
+    expand_abbreviations,
+    extract_medical_entities,
+    is_medical_abbreviation,
+    get_abbreviation_expansion,
+)
+from .config import logger
+class QueryExpansionStrategy:
+    """
+    Intelligent query expansion strategy that adapts based on query characteristics.
+    """
+    def __init__(self):
+        self.expansion_cache = {}
+    def expand(self, query: str, strategy: str = "adaptive") -> List[str]:
+        """
+        Expand query using specified strategy.
+        Args:
+            query: Original query string
+            strategy: Expansion strategy - "adaptive", "aggressive", "conservative", "abbreviation_focused"
+        Returns:
+            List of expanded query variations
+        """
+        # Check cache
+        cache_key = f"{query}_{strategy}"
+        if cache_key in self.expansion_cache:
+            return self.expansion_cache[cache_key]
+        if strategy == "adaptive":
+            expansions = self._adaptive_expansion(query)
+        elif strategy == "aggressive":
+            expansions = self._aggressive_expansion(query)
+        elif strategy == "conservative":
+            expansions = self._conservative_expansion(query)
+        elif strategy == "abbreviation_focused":
+            expansions = self._abbreviation_focused_expansion(query)
+        else:
+            expansions = [query]
+        # Cache result
+        self.expansion_cache[cache_key] = expansions
+        return expansions
+    def _adaptive_expansion(self, query: str) -> List[str]:
+        """
+        Adaptive expansion that adjusts based on query characteristics.
+        - Short queries (< 5 words): More aggressive expansion
+        - Long queries: More conservative
+        - Queries with abbreviations: Focus on abbreviation expansion
+        """
+        words = query.split()
+        word_count = len(words)
+        # Detect if query contains abbreviations
+        has_abbrev = any(is_medical_abbreviation(word) for word in words)
+        if has_abbrev:
+            # Focus on abbreviation expansion
+            return self._abbreviation_focused_expansion(query)
+        elif word_count <= 3:
+            # Short query - aggressive expansion
+            return self._aggressive_expansion(query)
+        elif word_count <= 7:
+            # Medium query - balanced expansion
+            return expand_query_with_variations(query, max_variations=5)
+        else:
+            # Long query - conservative expansion
+            return self._conservative_expansion(query)
+    def _aggressive_expansion(self, query: str) -> List[str]:
+        """
+        Aggressive expansion with more variations.
+        Useful for short queries that need more context.
+        """
+        expansions = []
+        normalized = normalize_query(query)
+        expansions.append(normalized)
+        # 1. Abbreviation expansion
+        abbrev_expansions = expand_abbreviations(normalized)
+        expansions.extend(abbrev_expansions)
+        # 2. Synonym expansion for each word
+        words = normalized.split()
+        for i, word in enumerate(words):
+            synonyms = get_synonyms(word)
+            for syn in list(synonyms)[:3]:  # Top 3 synonyms
+                new_query = ' '.join(words[:i] + [syn] + words[i+1:])
+                expansions.append(new_query)
+        # 3. Multi-word phrase synonyms
+        from .medical_terminology import MEDICAL_SYNONYMS
+        for term, syn_list in MEDICAL_SYNONYMS.items():
+            if term in normalized:
+                for syn in syn_list[:3]:
+                    expansions.append(normalized.replace(term, syn))
+        # 4. Spelling variations
+        from .medical_terminology import SPELLING_VARIATIONS
+        for us_spelling, uk_variants in SPELLING_VARIATIONS.items():
+            if us_spelling in normalized:
+                for uk_spelling in uk_variants:
+                    expansions.append(normalized.replace(us_spelling, uk_spelling))
+        # Remove duplicates
+        return list(dict.fromkeys(expansions))[:10]
+    def _conservative_expansion(self, query: str) -> List[str]:
+        """
+        Conservative expansion with fewer variations.
+        Useful for specific, well-formed queries.
+        """
+        expansions = []
+        normalized = normalize_query(query)
+        expansions.append(normalized)
+        # Only expand obvious abbreviations
+        words = normalized.split()
+        for word in words:
+            if is_medical_abbreviation(word):
+                abbrev_expansions = expand_abbreviations(word)
+                for exp in abbrev_expansions[:2]:  # Limit to 2
+                    new_query = normalized.replace(word, exp)
+                    expansions.append(new_query)
+        # Remove duplicates
+        return list(dict.fromkeys(expansions))[:5]
+    def _abbreviation_focused_expansion(self, query: str) -> List[str]:
+        """
+        Expansion focused on abbreviation handling.
+        Expands all abbreviations to their full forms.
+        """
+        expansions = []
+        normalized = normalize_query(query)
+        expansions.append(normalized)
+        # Identify and expand all abbreviations
+        words = normalized.split()
+        current_query = normalized
+        for word in words:
+            if is_medical_abbreviation(word):
+                full_forms = get_abbreviation_expansion(word)
+                for full_form in full_forms:
+                    expanded = current_query.replace(word, full_form)
+                    expansions.append(expanded)
+                    # Also try with the expanded form as base for further expansion
+                    current_query = expanded
+        # Remove duplicates
+        return list(dict.fromkeys(expansions))[:8]
+class MultiQueryRetriever:
+    """
+    Retrieves documents using multiple query variations and merges results.
+    """
+    def __init__(self, base_retriever_func):
+        """
+        Args:
+            base_retriever_func: Function that takes (query, **kwargs) and returns List[Document]
+        """
+        self.base_retriever = base_retriever_func
+        self.query_expander = QueryExpansionStrategy()
+    def retrieve(
+        self,
+        query: str,
+        expansion_strategy: str = "adaptive",
+        merge_strategy: str = "weighted",
+        **retriever_kwargs
+    ) -> List[Document]:
+        """
+        Retrieve documents using multiple query variations.
+        Args:
+            query: Original query
+            expansion_strategy: How to expand the query
+            merge_strategy: How to merge results - "weighted", "union", "intersection"
+            **retriever_kwargs: Additional arguments for base retriever
+        Returns:
+            Merged list of documents
+        """
+        # Expand query
+        query_variations = self.query_expander.expand(query, strategy=expansion_strategy)
+        logger.info(f"Expanded query into {len(query_variations)} variations")
+        logger.debug(f"Query variations: {query_variations}")
+        # Retrieve for each variation
+        all_results = []
+        for i, var_query in enumerate(query_variations):
+            try:
+                docs = self.base_retriever(var_query, **retriever_kwargs)
+                # Tag documents with query variation rank
+                for doc in docs:
+                    if not hasattr(doc, 'metadata'):
+                        doc.metadata = {}
+                    doc.metadata['query_variation_rank'] = i
+                    doc.metadata['query_variation'] = var_query
+                all_results.append((var_query, docs))
+            except Exception as e:
+                logger.warning(f"Retrieval failed for variation '{var_query}': {e}")
+        # Merge results
+        if merge_strategy == "weighted":
+            merged = self._weighted_merge(all_results)
+        elif merge_strategy == "union":
+            merged = self._union_merge(all_results)
+        elif merge_strategy == "intersection":
+            merged = self._intersection_merge(all_results)
+        else:
+            # Default to weighted
+            merged = self._weighted_merge(all_results)
+        logger.info(f"Retrieved {len(merged)} unique documents after merging")
+        return merged
+    def _weighted_merge(self, results: List[Tuple[str, List[Document]]]) -> List[Document]:
+        """
+        Merge results with weighted scoring.
+        Earlier query variations get higher weight.
+        """
+        doc_scores = {}  # doc_id -> (doc, score)
+        for query_idx, (query_var, docs) in enumerate(results):
+            # Weight decreases with query variation rank
+            query_weight = 1.0 / (query_idx + 1)
+            for doc_idx, doc in enumerate(docs):
+                # Create unique doc identifier
+                doc_id = self._get_doc_id(doc)
+                # Position score (earlier is better)
+                position_score = 1.0 / (doc_idx + 1)
+                # Combined score
+                score = query_weight * position_score
+                if doc_id in doc_scores:
+                    # Document appeared in multiple variations - boost score
+                    existing_doc, existing_score = doc_scores[doc_id]
+                    doc_scores[doc_id] = (existing_doc, existing_score + score)
+                else:
+                    doc_scores[doc_id] = (doc, score)
+        # Sort by score and return documents
+        sorted_docs = sorted(doc_scores.values(), key=lambda x: x[1], reverse=True)
+        return [doc for doc, score in sorted_docs]
+    def _union_merge(self, results: List[Tuple[str, List[Document]]]) -> List[Document]:
+        """
+        Merge results using union (all unique documents).
+        Preserves order from first appearance.
+        """
+        seen_ids = set()
+        merged = []
+        for query_var, docs in results:
+            for doc in docs:
+                doc_id = self._get_doc_id(doc)
+                if doc_id not in seen_ids:
+                    seen_ids.add(doc_id)
+                    merged.append(doc)
+        return merged
+    def _intersection_merge(self, results: List[Tuple[str, List[Document]]]) -> List[Document]:
+        """
+        Merge results using intersection (only documents in all variations).
+        Useful for high-precision retrieval.
+        """
+        if not results:
+            return []
+        # Get doc IDs from first variation
+        first_docs = {self._get_doc_id(doc): doc for doc in results[0][1]}
+        common_ids = set(first_docs.keys())
+        # Intersect with other variations
+        for query_var, docs in results[1:]:
+            current_ids = {self._get_doc_id(doc) for doc in docs}
+            common_ids &= current_ids
+        # Return documents that appear in all variations
+        return [first_docs[doc_id] for doc_id in common_ids if doc_id in first_docs]
+    def _get_doc_id(self, doc: Document) -> str:
+        """
+        Generate unique identifier for a document.
+        Uses source, page number, and content hash.
+        """
+        source = doc.metadata.get('source', 'unknown')
+        page = doc.metadata.get('page_number', 'unknown')
+        content_hash = hash(doc.page_content[:200])  # Hash first 200 chars
+        return f"{source}_{page}_{content_hash}"
+class SemanticQueryExpander:
+    """
+    Expands queries using semantic understanding.
+    Uses context and co-occurrence patterns.
+    """
+    def __init__(self):
+        from .medical_terminology import get_terminology_learner
+        self.learner = get_terminology_learner()
+    def expand_with_context(self, query: str, context: Optional[str] = None) -> List[str]:
+        """
+        Expand query using contextual information.
+        Args:
+            query: Original query
+            context: Additional context (e.g., previous queries, conversation history)
+        Returns:
+            List of contextually expanded queries
+        """
+        expansions = [query]
+        normalized = normalize_query(query)
+        # Extract key terms
+        entities = extract_medical_entities(normalized)
+        # Get related terms from learned patterns
+        for entity, entity_type in entities:
+            related = self.learner.get_related_terms(entity)
+            for related_term in list(related)[:3]:
+                expanded = normalized.replace(entity, related_term)
+                expansions.append(expanded)
+        # If context provided, extract relevant terms
+        if context:
+            context_entities = extract_medical_entities(normalize_query(context))
+            # Add context terms to query
+            for entity, _ in context_entities[:2]:
+                expansions.append(f"{normalized} {entity}")
+        return list(dict.fromkeys(expansions))[:7]
+    def expand_with_specialization(self, query: str, specialty: Optional[str] = None) -> List[str]:
+        """
+        Expand query with specialty-specific terminology.
+        Args:
+            query: Original query
+            specialty: Medical specialty (e.g., "oncology", "radiology")
+        Returns:
+            List of specialty-aware expanded queries
+        """
+        expansions = [query]
+        # Specialty-specific term mappings
+        specialty_terms = {
+            "oncology": ["cancer", "tumor", "malignancy", "neoplasm", "carcinoma"],
+            "radiology": ["imaging", "scan", "ct", "mri", "pet"],
+            "pathology": ["biopsy", "histology", "cytology", "tissue"],
+            "surgery": ["resection", "operative", "surgical", "procedure"],
+        }
+        if specialty and specialty.lower() in specialty_terms:
+            # Add specialty context to query
+            for term in specialty_terms[specialty.lower()][:2]:
+                if term not in query.lower():
+                    expansions.append(f"{query} {term}")
+        return expansions
+# ============================================================================
+# CONVENIENCE FUNCTIONS
+# ============================================================================
+def expand_medical_query(
+    query: str,
+    strategy: str = "adaptive",
+    max_variations: int = 5
+) -> List[str]:
+    """
+    Convenience function to expand a medical query.
+    Args:
+        query: Original query
+        strategy: Expansion strategy
+        max_variations: Maximum number of variations
+    Returns:
+        List of query variations
+    """
+    expander = QueryExpansionStrategy()
+    variations = expander.expand(query, strategy=strategy)
+    return variations[:max_variations]
+def create_multi_query_retriever(base_retriever_func):
+    """
+    Create a multi-query retriever instance.
+    Args:
+        base_retriever_func: Base retrieval function
+    Returns:
+        MultiQueryRetriever instance
+    """
+    return MultiQueryRetriever(base_retriever_func)

core/retrievers.py CHANGED Viewed

@@ -1,10 +1,17 @@
 from concurrent.futures import ThreadPoolExecutor
 from . import utils
 from langchain_community.retrievers import BM25Retriever
 from langchain.retrievers import EnsembleRetriever
 from .config import logger
 from .tracing import traceable
 # Global variables for lazy loading
 _vector_store = None
@@ -98,6 +105,14 @@ def is_initialized() -> bool:
 _retrieval_pool = ThreadPoolExecutor(max_workers=4)
 def _match_provider(doc, provider: str) -> bool:
     if not provider:
         return True
@@ -106,16 +121,47 @@ def _match_provider(doc, provider: str) -> bool:
 @traceable(name="VectorRetriever")
-def vector_search(query: str, provider: str | None = None, k: int = 5):
-    """Search FAISS vector store with optional provider metadata filter."""
     _ensure_initialized()
     if not _vector_store:
         return []
     try:
-        if provider:
-            docs = _vector_store.similarity_search(query, k=k, filter={"provider": provider})
         else:
-            docs = _vector_store.similarity_search(query, k=k)
         # Ensure provider post-filter in case backend filter is lenient
         if provider:
             docs = [d for d in docs if _match_provider(d, provider)]
@@ -126,36 +172,75 @@ def vector_search(query: str, provider: str | None = None, k: int = 5):
 @traceable(name="BM25Retriever")
-def bm25_search(query: str, provider: str | None = None, k: int = 5):
-    """Search BM25 using the global retriever and optionally filter by provider."""
     _ensure_initialized()
     try:
         if not _bm25_retriever:
             return []
-        _bm25_retriever.k = max(1, k)
-        docs = _bm25_retriever.get_relevant_documents(query) or []
         if provider:
             docs = [d for d in docs if _match_provider(d, provider)]
-        return docs[:k]
     except Exception as e:
         logger.error(f"BM25 search failed: {e}")
         return []
-def hybrid_search(query: str, provider: str | None = None, k_vector: int = 5, k_bm25: int = 5):
-    """Combine vector and BM25 results (provider-filtered if provided)."""
     _ensure_initialized()  # Ensure retrievers are initialized before parallel execution
-    f_vector = _retrieval_pool.submit(vector_search, query, provider, k_vector)
-    f_bm25 = _retrieval_pool.submit(bm25_search, query, provider, k_bm25)
     v_docs = f_vector.result()
     b_docs = f_bm25.result()
-    # Merge uniquely by (source, page_number, snippet)
     seen = set()
     merged = []
     for d in v_docs + b_docs:
-        key = (d.metadata.get("source"), d.metadata.get("page_number"), d.page_content[:100])
-        if key not in seen:
-            seen.add(key)
             merged.append(d)
     return merged

 from concurrent.futures import ThreadPoolExecutor
+from typing import List, Optional
 from . import utils
 from langchain_community.retrievers import BM25Retriever
 from langchain.retrievers import EnsembleRetriever
+from langchain.schema import Document
 from .config import logger
 from .tracing import traceable
+from .query_expansion import expand_medical_query, MultiQueryRetriever
+# Global configuration for retrieval parameters
+DEFAULT_K_VECTOR = 3  # Number of documents to retrieve from vector search
+DEFAULT_K_BM25 = 2    # Number of documents to retrieve from BM25 search
 # Global variables for lazy loading
 _vector_store = None
 _retrieval_pool = ThreadPoolExecutor(max_workers=4)
+def _get_doc_id(doc: Document) -> str:
+    """Generate unique identifier for a document."""
+    source = doc.metadata.get('source', 'unknown')
+    page = doc.metadata.get('page_number', 'unknown')
+    content_hash = hash(doc.page_content[:200])  # Hash first 200 chars
+    return f"{source}_{page}_{content_hash}"
 def _match_provider(doc, provider: str) -> bool:
     if not provider:
         return True
 @traceable(name="VectorRetriever")
+def vector_search(query: str, provider: str | None = None, k: int = None, use_query_expansion: bool = True):
+    """Search FAISS vector store with optional provider metadata filter and query expansion."""
     _ensure_initialized()
     if not _vector_store:
         return []
+    # Use global default if k is not specified
+    if k is None:
+        k = DEFAULT_K_VECTOR
     try:
+        # Use query expansion for better medical term matching
+        if use_query_expansion:
+            query_variations = expand_medical_query(query, strategy="adaptive", max_variations=3)
+            logger.debug(f"Expanded query '{query}' into {len(query_variations)} variations")
+            # Retrieve with each variation and merge
+            all_docs = []
+            seen_ids = set()
+            for var_query in query_variations:
+                if provider:
+                    docs = _vector_store.similarity_search(var_query, k=k, filter={"provider": provider})
+                else:
+                    docs = _vector_store.similarity_search(var_query, k=k)
+                # Deduplicate while preserving order
+                for doc in docs:
+                    doc_id = _get_doc_id(doc)
+                    if doc_id not in seen_ids:
+                        seen_ids.add(doc_id)
+                        all_docs.append(doc)
+            docs = all_docs[:k * 2]  # Return more results due to expansion
         else:
+            # Standard search without expansion
+            if provider:
+                docs = _vector_store.similarity_search(query, k=k, filter={"provider": provider})
+            else:
+                docs = _vector_store.similarity_search(query, k=k)
         # Ensure provider post-filter in case backend filter is lenient
         if provider:
             docs = [d for d in docs if _match_provider(d, provider)]
 @traceable(name="BM25Retriever")
+def bm25_search(query: str, provider: str | None = None, k: int = None, use_query_expansion: bool = True):
+    """Search BM25 using the global retriever with query expansion and optional provider filter."""
     _ensure_initialized()
+    # Use global default if k is not specified
+    if k is None:
+        k = DEFAULT_K_BM25
     try:
         if not _bm25_retriever:
             return []
+        # Use query expansion for better medical term matching
+        if use_query_expansion:
+            query_variations = expand_medical_query(query, strategy="adaptive", max_variations=3)
+            logger.debug(f"BM25: Expanded query '{query}' into {len(query_variations)} variations")
+            # Retrieve with each variation and merge
+            all_docs = []
+            seen_ids = set()
+            for var_query in query_variations:
+                _bm25_retriever.k = max(1, k * 2)
+                docs = _bm25_retriever.get_relevant_documents(var_query) or []
+                # Deduplicate while preserving order
+                for doc in docs:
+                    doc_id = _get_doc_id(doc)
+                    if doc_id not in seen_ids:
+                        seen_ids.add(doc_id)
+                        all_docs.append(doc)
+            docs = all_docs[:k * 2]  # Return more results due to expansion
+        else:
+            # Standard search without expansion
+            _bm25_retriever.k = max(1, k)
+            docs = _bm25_retriever.get_relevant_documents(query) or []
         if provider:
             docs = [d for d in docs if _match_provider(d, provider)]
+        return docs[:k * 2 if use_query_expansion else k]
     except Exception as e:
         logger.error(f"BM25 search failed: {e}")
         return []
+def hybrid_search(query: str, provider: str | None = None, k_vector: int = None, k_bm25: int = None, use_query_expansion: bool = True):
+    """Combine vector and BM25 results with query expansion (provider-filtered if provided)."""
     _ensure_initialized()  # Ensure retrievers are initialized before parallel execution
+    # Use global defaults if not specified
+    if k_vector is None:
+        k_vector = DEFAULT_K_VECTOR
+    if k_bm25 is None:
+        k_bm25 = DEFAULT_K_BM25
+    f_vector = _retrieval_pool.submit(vector_search, query, provider, k_vector, use_query_expansion)
+    f_bm25 = _retrieval_pool.submit(bm25_search, query, provider, k_bm25, use_query_expansion)
     v_docs = f_vector.result()
     b_docs = f_bm25.result()
+    # Merge uniquely by document ID
     seen = set()
     merged = []
     for d in v_docs + b_docs:
+        doc_id = _get_doc_id(d)
+        if doc_id not in seen:
+            seen.add(doc_id)
             merged.append(d)
+    logger.info(f"Hybrid search returned {len(merged)} unique documents (query expansion: {use_query_expansion})")
     return merged

core/tools.py CHANGED Viewed

@@ -11,6 +11,8 @@ from langchain.tools import tool
 from .retrievers import hybrid_search, vector_search, bm25_search
 from .validation import validate_medical_answer
 from .github_storage import get_github_storage
 from langchain_openai import ChatOpenAI
 CANONICAL_PROVIDERS = {"Manus", "ASCO", "NCCN", "ESMO", "NICE"}
@@ -160,21 +162,31 @@ def _format_docs_with_citations(docs: List[Document]) -> str:
         page = meta.get("page_number", "?")
         provider = meta.get("provider", "unknown")
         disease = meta.get("disease", "unknown")
         snippet = clear_text(d.page_content)
-        parts.append(
-            f"Result {i}:\n"
-            f"Provider: {provider} | Disease: {disease} | Source: {source} | Page: {page}\n"
-            f"Text:\n{snippet}\n"
-        )
     return "\n\n".join(parts) if parts else "No results."
 @tool
 def medical_guidelines_knowledge_tool(query: str, provider: Optional[str] = None) -> str:
     """
-    Retrieve medical guideline knowledge for a query with optional provider filtering.
     If provider is provided (e.g., "NCCN", "ASCO", "ESMO", "NICE"), results will be filtered by metadata provider.
-    Always returns text with full metadata (source file, page number, provider, disease) for each result.
     """
     global _last_question, _last_documents
     try:
@@ -183,25 +195,46 @@ def medical_guidelines_knowledge_tool(query: str, provider: Optional[str] = None
         # Normalize provider name from either explicit arg or query text
         normalized_provider = _normalize_provider(provider, query)
-        # Use hybrid search by default for quality
-        docs = hybrid_search(query=query, provider=normalized_provider, k_vector=5, k_bm25=5)
-        # Store documents for validation context
         _last_documents = []
-        for doc in docs:
             doc_dict = {
-                "doc_id": getattr(doc, 'id', f"doc_{len(_last_documents)}"),
                 "source": doc.metadata.get("source", "unknown"),
                 "provider": doc.metadata.get("provider", "unknown"),
                 "page_number": doc.metadata.get("page_number", "unknown"),
                 "disease": doc.metadata.get("disease", "unknown"),
-                "snippet": doc.page_content[:500] + "..." if len(doc.page_content) > 500 else doc.page_content,
                 "content": doc.page_content
             }
             _last_documents.append(doc_dict)
-        return _format_docs_with_citations(docs)
     except Exception as e:
         return f"Retrieval error: {str(e)}"

 from .retrievers import hybrid_search, vector_search, bm25_search
 from .validation import validate_medical_answer
 from .github_storage import get_github_storage
+from .context_enrichment import enrich_retrieved_documents
+from .config import logger
 from langchain_openai import ChatOpenAI
 CANONICAL_PROVIDERS = {"Manus", "ASCO", "NCCN", "ESMO", "NICE"}
         page = meta.get("page_number", "?")
         provider = meta.get("provider", "unknown")
         disease = meta.get("disease", "unknown")
+        is_context = meta.get("context_enrichment", False)
         snippet = clear_text(d.page_content)
+        # Build citation header
+        citation = f"Result {i}:\n"
+        citation += f"Provider: {provider} | Disease: {disease} | Source: {source} | Page: {page}"
+        # Add context enrichment marker if this is a context page
+        if is_context:
+            citation += " [CONTEXT PAGE]"
+        citation += f"\nText:\n{snippet}\n"
+        parts.append(citation)
     return "\n\n".join(parts) if parts else "No results."
 @tool
 def medical_guidelines_knowledge_tool(query: str, provider: Optional[str] = None) -> str:
     """
+    Retrieve comprehensive medical guideline knowledge with enriched context.
+    Includes surrounding pages (before/after) for top results to provide complete clinical context.
     If provider is provided (e.g., "NCCN", "ASCO", "ESMO", "NICE"), results will be filtered by metadata provider.
+    Returns detailed text with full metadata and contextual information for expert analysis.
     """
     global _last_question, _last_documents
     try:
         # Normalize provider name from either explicit arg or query text
         normalized_provider = _normalize_provider(provider, query)
+        # Use hybrid search with query expansion for comprehensive retrieval
+        # Uses global defaults: DEFAULT_K_VECTOR=7, DEFAULT_K_BM25=3 (configurable in core/retrievers.py)
+        docs = hybrid_search(query=query, provider=normalized_provider)
+        # Enrich top documents with surrounding pages for richer context
+        # This provides complete clinical context including adjacent information
+        enriched_docs = enrich_retrieved_documents(
+            documents=docs,
+            pages_before=1,  # Include 1 page before
+            pages_after=1,   # Include 1 page after
+            max_enriched=5   # Enrich top 5 most relevant documents
+        )
+        # Count context pages added
+        context_pages_count = sum(1 for doc in enriched_docs if doc.metadata.get("context_enrichment", False))
+        logger.info(f"Retrieved {len(docs)} documents, added {context_pages_count} context pages")
+        # Store documents for validation context with enrichment metadata
         _last_documents = []
+        for doc in enriched_docs:
             doc_dict = {
+                "doc_id": getattr(doc, 'id', None),
                 "source": doc.metadata.get("source", "unknown"),
                 "provider": doc.metadata.get("provider", "unknown"),
                 "page_number": doc.metadata.get("page_number", "unknown"),
                 "disease": doc.metadata.get("disease", "unknown"),
+                "context_enrichment": doc.metadata.get("context_enrichment", False),
+                "enriched": doc.metadata.get("enriched", False),
+                "pages_included": doc.metadata.get("pages_included", []),
+                "primary_page": doc.metadata.get("primary_page"),
+                "context_pages_before": doc.metadata.get("context_pages_before"),
+                "context_pages_after": doc.metadata.get("context_pages_after"),
                 "content": doc.page_content
             }
             _last_documents.append(doc_dict)
+        return _format_docs_with_citations(enriched_docs)
     except Exception as e:
+        logger.error(f"Retrieval error: {str(e)}")
         return f"Retrieval error: {str(e)}"

core/validation.py CHANGED Viewed

@@ -25,7 +25,7 @@ class MedicalAnswerValidator:
         logger.info("Medical answer validator initialized successfully")
     def _get_next_interaction_id(self) -> str:
-        """Get a temporary interaction ID (will be replaced by GitHub storage with unique ID)."""
         try:
             # Try to get from GitHub first
             github_storage = get_github_storage()
@@ -35,6 +35,7 @@ class MedicalAnswerValidator:
                 try:
                     evaluations = json.loads(existing_content)
                     if evaluations and isinstance(evaluations, list):
                         # Find the highest existing ID
                         max_id = 0
                         for eval_item in evaluations:
@@ -43,16 +44,21 @@ class MedicalAnswerValidator:
                                 max_id = max(max_id, current_id)
                             except (ValueError, TypeError):
                                 continue
-                        return str(max_id + 1)
-                except json.JSONDecodeError:
                     pass
             # Fallback to local file check
             if os.path.exists(self.evaluation_file):
                 with open(self.evaluation_file, "r", encoding="utf-8") as f:
                     evaluations = json.load(f)
                 if evaluations:
                     # Find the highest existing ID
                     max_id = 0
                     for eval_item in evaluations:
@@ -61,10 +67,14 @@ class MedicalAnswerValidator:
                             max_id = max(max_id, current_id)
                         except (ValueError, TypeError):
                             continue
-                    return str(max_id + 1)
                 else:
                     return "1"
             else:
                 return "1"
         except Exception as e:
             logger.error(f"Error getting next interaction ID: {e}")
@@ -74,12 +84,16 @@ class MedicalAnswerValidator:
         """Clean documents by removing snippets and keeping only essential fields."""
         cleaned_docs = []
         for doc in documents:
             cleaned_doc = {
                 "doc_id": doc.get("doc_id"),
                 "source": doc.get("source", "unknown"),
                 "provider": doc.get("provider", "unknown"),
                 "page_number": doc.get("page_number", "unknown"),
                 "disease": doc.get("disease", "unknown"),
                 "content": doc.get("content", "")
             }
             cleaned_docs.append(cleaned_doc)

         logger.info("Medical answer validator initialized successfully")
     def _get_next_interaction_id(self) -> str:
+        """Get the next interaction ID by finding the highest existing ID and adding 1."""
         try:
             # Try to get from GitHub first
             github_storage = get_github_storage()
                 try:
                     evaluations = json.loads(existing_content)
                     if evaluations and isinstance(evaluations, list):
+                        logger.info(f"Found {len(evaluations)} existing evaluations in GitHub")
                         # Find the highest existing ID
                         max_id = 0
                         for eval_item in evaluations:
                                 max_id = max(max_id, current_id)
                             except (ValueError, TypeError):
                                 continue
+                        next_id = str(max_id + 1)
+                        logger.info(f"Next interaction ID will be: {next_id}")
+                        return next_id
+                except json.JSONDecodeError as e:
+                    logger.warning(f"Failed to parse GitHub evaluation file: {e}")
                     pass
             # Fallback to local file check
             if os.path.exists(self.evaluation_file):
+                logger.info("GitHub file not found, checking local file")
                 with open(self.evaluation_file, "r", encoding="utf-8") as f:
                     evaluations = json.load(f)
                 if evaluations:
+                    logger.info(f"Found {len(evaluations)} existing evaluations in local file")
                     # Find the highest existing ID
                     max_id = 0
                     for eval_item in evaluations:
                             max_id = max(max_id, current_id)
                         except (ValueError, TypeError):
                             continue
+                    next_id = str(max_id + 1)
+                    logger.info(f"Next interaction ID from local file: {next_id}")
+                    return next_id
                 else:
+                    logger.info("Local file is empty, starting with ID 1")
                     return "1"
             else:
+                logger.info("No existing evaluation file found, starting with ID 1")
                 return "1"
         except Exception as e:
             logger.error(f"Error getting next interaction ID: {e}")
         """Clean documents by removing snippets and keeping only essential fields."""
         cleaned_docs = []
         for doc in documents:
+            is_context_page = doc.get("context_enrichment", False)
             cleaned_doc = {
                 "doc_id": doc.get("doc_id"),
                 "source": doc.get("source", "unknown"),
                 "provider": doc.get("provider", "unknown"),
                 "page_number": doc.get("page_number", "unknown"),
                 "disease": doc.get("disease", "unknown"),
+                "page_type": "CONTEXT PAGE" if is_context_page else "ORIGINAL PAGE",
+                "context_enrichment": is_context_page,
                 "content": doc.get("content", "")
             }
             cleaned_docs.append(cleaned_doc)

data/medical_terms_cache.json ADDED Viewed

	@@ -0,0 +1,3420 @@

+{
+  "synonyms": {
+    "european society of medical oncology": [
+      "esmo"
+    ],
+    "esmo": [
+      "european society of medical oncology",
+      "european society for medical oncology",
+      "european\nsociety of medical oncology",
+      "european society for\nmedical oncology",
+      "the european society for medical oncology",
+      "the european society for medical\noncology"
+    ],
+    "american society of clinical\n\noncology": [
+      "asco"
+    ],
+    "asco": [
+      "md american society of clinical oncology",
+      "american\nsociety of clinical oncology",
+      "american society of clinical\n\noncology",
+      "american society of clinical oncology",
+      "inc"
+    ],
+    "italian association of medical oncology": [
+      "aiom"
+    ],
+    "aiom": [
+      "italian association\nof medical oncology",
+      "italian association of medical oncology"
+    ],
+    "national comprehensive cancer network": [
+      "nccn"
+    ],
+    "nccn": [
+      "leading american cancer centers",
+      "national comprehensive cancer network",
+      "vs insurance-based"
+    ],
+    "non-small cell lung cancer": [
+      "nsclc"
+    ],
+    "nsclc": [
+      "mutant advanced non-small cell lung cancer",
+      "small cell\nlung cancer",
+      "non-small-cell lung cancer",
+      "non-small cell lung cancer",
+      "lung cancer",
+      "stage iii non small cell lung cancer",
+      "robotic lobectomy for non-small cell lung cancer",
+      "cancer",
+      "advanced non-small-cell lung cancer",
+      "small-cell lung cancer",
+      "iii non-small-cell lung cancer",
+      "advanced non-small cell lung cancer",
+      "small-cell\nlung cancer"
+    ],
+    "american\nsociety of clinical oncology": [
+      "asco"
+    ],
+    "italian association\nof medical oncology": [
+      "aiom"
+    ],
+    "european\nsociety of medical oncology": [
+      "esmo"
+    ],
+    "leading american cancer centers": [
+      "nccn"
+    ],
+    "using the guidelines into decision\nsupport": [
+      "glides"
+    ],
+    "glides": [
+      "using the guidelines into decision\nsupport",
+      "guidelines into decision support"
+    ],
+    "for all the newly\neuropean medicines agency": [
+      "ema"
+    ],
+    "ema": [
+      "european medicines agency",
+      "and the european medicines\nagency",
+      "for all the newly\neuropean medicines agency",
+      "not european medicines agency"
+    ],
+    "evaluates treatments with curative intent": [
+      "such\nas adjuvant chemotherapy"
+    ],
+    "such\nas adjuvant chemotherapy": [
+      "evaluates treatments with curative intent"
+    ],
+    "panel members": [
+      "from\ndifferent institutions"
+    ],
+    "from\ndifferent institutions": [
+      "panel members"
+    ],
+    "and the\nearly study interruption": [
+      "even if based on pre-specified\ninterim analysis"
+    ],
+    "even if based on pre-specified\ninterim analysis": [
+      "and the\nearly study interruption"
+    ],
+    "-positive nonsmall-cell lung cancer": [
+      "magrit"
+    ],
+    "magrit": [
+      "-positive nonsmall-cell lung cancer"
+    ],
+    "guidelines into decision support": [
+      "glides"
+    ],
+    "early-stage and\n\nlocally advanced": [
+      "non-metastatic"
+    ],
+    "non-metastatic": [
+      "early-stage and\n\nlocally advanced"
+    ],
+    "or best supportive\ncare": [
+      "bsc"
+    ],
+    "bsc": [
+      "leads to\nincreased dfs versus best supportive care",
+      "or best supportive\ncare"
+    ],
+    "author affiliations\nand support\n\ninformation": [
+      "if\n\napplicable"
+    ],
+    "if\n\napplicable": [
+      "author affiliations\nand support\n\ninformation"
+    ],
+    "small-cell lung cancers": [
+      "nsclcs"
+    ],
+    "nsclcs": [
+      "small-cell lung cancers"
+    ],
+    "two randomized control trials": [
+      "rcts"
+    ],
+    "rcts": [
+      "controlled trials",
+      "two randomized control trials",
+      "clinical trials"
+    ],
+    "the primary end point was disease-free\nsurvival": [
+      "dfs"
+    ],
+    "dfs": [
+      "the primary end point was disease-free\nsurvival"
+    ],
+    "joint cancer care ontario": [
+      "cco"
+    ],
+    "cco": [
+      "joint cancer care ontario"
+    ],
+    "small-cell\nlung cancer": [
+      "nsclc"
+    ],
+    "puma biotechnology": [
+      "inst"
+    ],
+    "inst": [
+      "calithera biosciences",
+      "novartis",
+      "cullinan oncology",
+      "regeneron",
+      "glaxosmithkline canada",
+      "oncomed",
+      "genentech",
+      "verastem",
+      "bayer",
+      "bristol myers squibb foundation",
+      "puma biotechnology",
+      "boehringer ingelheim",
+      "amgen",
+      "arcus biosciences",
+      "turning point therapeutics",
+      "crispr\ntherapeutics",
+      "msd",
+      "takeda",
+      "revolution medicines",
+      "merck serono",
+      "macrogenics",
+      "oric pharmaceuticals",
+      "astrazeneca",
+      "merck",
+      "summit therapeutics",
+      "palobiofarma",
+      "astex pharmaceuticals",
+      "black diamond\ntherapeutics",
+      "janssen oncology",
+      "mirati therapeutics",
+      "bristol myers squibb",
+      "abbvie",
+      "dohme",
+      "anheart therapeutics",
+      "neogenomics",
+      "sutro biopharma",
+      "polaris",
+      "pfizer",
+      "forward",
+      "elevation oncology",
+      "astra zeneca",
+      "nuvation bio",
+      "bms",
+      "gsk",
+      "inhibrx",
+      "bristol myers\nsquibb",
+      "roche",
+      "bristol-myers squibb",
+      "dizal\npharma",
+      "harpoon therapeutics",
+      "vivace therapeutics",
+      "janssen",
+      "jazz pharmaceuticals",
+      "lilly",
+      "advaxis",
+      "astrazeneca canada",
+      "constellation pharmaceuticals",
+      "guardant health",
+      "trizell",
+      "pharmamar",
+      "medimmune",
+      "inc",
+      "blueprint medicines",
+      "glaxosmithkline",
+      "therapeutics",
+      "exelixis"
+    ],
+    "pfizer": [
+      "inst"
+    ],
+    "genentech": [
+      "inst"
+    ],
+    "bristol-myers squibb": [
+      "inst"
+    ],
+    "medimmune": [
+      "inst"
+    ],
+    "of patients with\nstage i to iii sclc": [
+      "limited stage"
+    ],
+    "limited stage": [
+      "of patients with\nstage i to iii sclc"
+    ],
+    "small-cell lung cancer": [
+      "pacific",
+      "nsclc"
+    ],
+    "or small-cell lung cancer": [
+      "sclc"
+    ],
+    "sclc": [
+      "small cell lung cancer",
+      "trial in small cell lung cancer",
+      "and small-cell lung cancer",
+      "or small-cell lung cancer"
+    ],
+    "cancer": [
+      "relay",
+      "nsclc"
+    ],
+    "and small-cell lung cancer": [
+      "sclc"
+    ],
+    "with contrast": [
+      "preferred"
+    ],
+    "preferred": [
+      "with contrast"
+    ],
+    "prophylactic cranial irradiation": [
+      "pci"
+    ],
+    "pci": [
+      "dose prophylactic cranial irradiation",
+      "prophylactic cranial irradiation"
+    ],
+    "the bottom line": [
+      "continued"
+    ],
+    "continued": [
+      "the bottom line",
+      "all recommendations"
+    ],
+    "salvage stereotactic body radiation therapy": [
+      "sbrt"
+    ],
+    "sbrt": [
+      "salvage stereotactic body radiation therapy",
+      "sabr or stereotactic body radiotherapy",
+      "fdg-pet and stereotactic body radiotherapy",
+      "stereotactic body radiotherapy"
+    ],
+    "oncomed": [
+      "inst"
+    ],
+    "macrogenics": [
+      "inst"
+    ],
+    "astrazeneca": [
+      "inst"
+    ],
+    "thecollege of americanpathologists": [
+      "cap"
+    ],
+    "cap": [
+      "thecollege of americanpathologists"
+    ],
+    "association\nfor molecular pathology": [
+      "amp"
+    ],
+    "amp": [
+      "association\nfor molecular pathology"
+    ],
+    "and anaplastic lymphoma kinase": [
+      "alk"
+    ],
+    "alk": [
+      "positive anaplastic lymphoma kinase",
+      "crizotinib-pretreated anaplastic lymphoma kinase",
+      "and anaplastic lymphoma kinase"
+    ],
+    "immunohistochemistry": [
+      "ihc"
+    ],
+    "ihc": [
+      "egfr fish or immunohistochemistry",
+      "immunohistochemistry"
+    ],
+    "merck": [
+      "german",
+      "inst"
+    ],
+    "glaxosmithkline": [
+      "inst",
+      "gsk"
+    ],
+    "astex pharmaceuticals": [
+      "inst"
+    ],
+    "takeda": [
+      "inst"
+    ],
+    "bristol myers\nsquibb": [
+      "inst",
+      "bms"
+    ],
+    "polaris": [
+      "inst"
+    ],
+    "inhibrx": [
+      "inst"
+    ],
+    "vivace therapeutics": [
+      "inst"
+    ],
+    "constellation pharmaceuticals": [
+      "inst"
+    ],
+    "harpoon therapeutics": [
+      "inst"
+    ],
+    "bayer": [
+      "inst"
+    ],
+    "novartis": [
+      "inst"
+    ],
+    "crispr\ntherapeutics": [
+      "inst"
+    ],
+    "calithera biosciences": [
+      "inst"
+    ],
+    "therapeutics": [
+      "inst"
+    ],
+    "stage iii non small cell lung cancer": [
+      "nsclc"
+    ],
+    "immune checkpoint inhibitors": [
+      "icis"
+    ],
+    "icis": [
+      "neoadjuvant immune checkpoint inhibitors",
+      "immune checkpoint inhibitors"
+    ],
+    "american society of clinical oncology": [
+      "asco"
+    ],
+    "boehringer ingelheim": [
+      "inst"
+    ],
+    "pharmamar": [
+      "inst"
+    ],
+    "roche": [
+      "inst"
+    ],
+    "janssen": [
+      "inst"
+    ],
+    "merck serono": [
+      "inst"
+    ],
+    "bms": [
+      "bristol\nmyers squibb",
+      "bristol-myers\nsquibb",
+      "bristol myers squibb",
+      "bristol myers\nsquibb",
+      "inst",
+      "celgene"
+    ],
+    "trizell": [
+      "inst"
+    ],
+    "amgen": [
+      "inst"
+    ],
+    "clinical lung cancer": [
+      "elsevier"
+    ],
+    "elsevier": [
+      "clinical lung cancer"
+    ],
+    "one randomized controlled trial": [
+      "rct"
+    ],
+    "rct": [
+      "phase iii randomised clinical trial",
+      "phase iib\nrandomised controlled trial",
+      "one randomized controlled trial",
+      "a phase iii randomised clinical trial"
+    ],
+    "the primary end point of progression-free survival": [
+      "pfs"
+    ],
+    "pfs": [
+      "quality of life and progression-free survival",
+      "the primary end point of progression-free survival",
+      "the median\nprogression-free survival",
+      "the median progression-free\nsurvival",
+      "no\nimprovement in progression-free survival",
+      "and\nprogression-free survival",
+      "reported improved\nprogression-free survival"
+    ],
+    "adverse events": [
+      "aes"
+    ],
+    "aes": [
+      "mainly altered lipid levels",
+      "adverse\nevents",
+      "adverse events"
+    ],
+    "and consolidation": [
+      "for unresectable stage iii nsclc"
+    ],
+    "for unresectable stage iii nsclc": [
+      "and consolidation"
+    ],
+    "treatment is now an\negfr-targeted drug": [
+      "osimertinib"
+    ],
+    "osimertinib": [
+      "treatment is now an\negfr-targeted drug"
+    ],
+    "inc": [
+      "asco",
+      "inst"
+    ],
+    "small cell\nlung cancer": [
+      "nsclc"
+    ],
+    "the median\nprogression-free survival": [
+      "pfs"
+    ],
+    "adverse\nevents": [
+      "aes"
+    ],
+    "and vascular\nendothelial growth factor": [
+      "vegf"
+    ],
+    "vegf": [
+      "and vascular\nendothelial growth factor"
+    ],
+    "though rates of\nimmune-related aes": [
+      "iraes"
+    ],
+    "iraes": [
+      "though rates of\nimmune-related aes"
+    ],
+    "bristol myers squibb": [
+      "inst",
+      "bms"
+    ],
+    "palobiofarma": [
+      "inst"
+    ],
+    "dohme": [
+      "msd",
+      "inst"
+    ],
+    "mirati therapeutics": [
+      "inst"
+    ],
+    "abbvie": [
+      "inst"
+    ],
+    "blueprint medicines": [
+      "inst"
+    ],
+    "advaxis": [
+      "inst"
+    ],
+    "janssen oncology": [
+      "inst"
+    ],
+    "elevation oncology": [
+      "inst"
+    ],
+    "black diamond\ntherapeutics": [
+      "inst"
+    ],
+    "forward": [
+      "inst"
+    ],
+    "gsk": [
+      "inst",
+      "glaxosmithkline"
+    ],
+    "regeneron": [
+      "inst"
+    ],
+    "jazz pharmaceuticals": [
+      "inst"
+    ],
+    "oric pharmaceuticals": [
+      "inst"
+    ],
+    "summit therapeutics": [
+      "inst"
+    ],
+    "turning point therapeutics": [
+      "inst"
+    ],
+    "anheart therapeutics": [
+      "inst"
+    ],
+    "nuvation bio": [
+      "inst"
+    ],
+    "and terminology": [
+      "data supplement"
+    ],
+    "data supplement": [
+      "and terminology"
+    ],
+    "nab-paclitaxel with or without bevacizumab": [
+      "in the absence of\ncontraindications to bevacizumab"
+    ],
+    "in the absence of\ncontraindications to bevacizumab": [
+      "nab-paclitaxel with or without bevacizumab"
+    ],
+    "paclitaxel": [
+      "or nab-paclitaxel"
+    ],
+    "or nab-paclitaxel": [
+      "paclitaxel"
+    ],
+    "all recommendations": [
+      "continued"
+    ],
+    "interdisciplinary palliative care teams": [
+      "consultation"
+    ],
+    "consultation": [
+      "interdisciplinary palliative care teams"
+    ],
+    "md american society of clinical oncology": [
+      "asco"
+    ],
+    "va asco practice guideline staff": [
+      "health research methods"
+    ],
+    "health research methods": [
+      "va asco practice guidelines staff",
+      "va asco practice guideline staff"
+    ],
+    "in tyrosine kinase inhibitor": [
+      "tki"
+    ],
+    "tki": [
+      "in tyrosine kinase inhibitor",
+      "tyrosine kinase inhibitor"
+    ],
+    "reuss et al\n\n\n\nrate": [
+      "orr"
+    ],
+    "orr": [
+      "reuss et al\n\n\n\nrate",
+      "which had an overall response rate"
+    ],
+    "disease control rate": [
+      "dcr"
+    ],
+    "dcr": [
+      "disease control rate"
+    ],
+    "limited generalizability": [
+      "united states only"
+    ],
+    "united states only": [
+      "limited generalizability"
+    ],
+    "with a median duration of\nresponse": [
+      "dor"
+    ],
+    "dor": [
+      "with a median duration of\nresponse",
+      "the median\nduration of response"
+    ],
+    "the most common treatment-emergent adverse events": [
+      "teaes"
+    ],
+    "teaes": [
+      "the most common treatment-emergent adverse events"
+    ],
+    "verastem": [
+      "inst"
+    ],
+    "exelixis": [
+      "inst"
+    ],
+    "arcus biosciences": [
+      "inst"
+    ],
+    "revolution medicines": [
+      "inst"
+    ],
+    "sutro biopharma": [
+      "inst"
+    ],
+    "dizal\npharma": [
+      "inst"
+    ],
+    "msd": [
+      "dohme",
+      "inst"
+    ],
+    "lilly": [
+      "inst"
+    ],
+    "astrazeneca canada": [
+      "inst"
+    ],
+    "neogenomics": [
+      "inst"
+    ],
+    "guardant health": [
+      "inst"
+    ],
+    "glaxosmithkline canada": [
+      "inst"
+    ],
+    "va asco practice guidelines staff": [
+      "health research methods"
+    ],
+    "was published by asco and ontario health": [
+      "cancer care ontario"
+    ],
+    "cancer care ontario": [
+      "was published by asco and ontario health",
+      "asco-ontario health"
+    ],
+    "the median progression-free\nsurvival": [
+      "pfs"
+    ],
+    "pneumonitis": [
+      "of any cause"
+    ],
+    "of any cause": [
+      "pneumonitis"
+    ],
+    "consolidation immunotherapy": [
+      "durvalumab"
+    ],
+    "durvalumab": [
+      "if ps improves\n\nv\n\nconsolidation immunotherapy",
+      "consolidation immunotherapy"
+    ],
+    "if ps improves\n\nv\n\nconsolidation immunotherapy": [
+      "durvalumab"
+    ],
+    "and sclc-i": [
+      "inflamed phenotype"
+    ],
+    "inflamed phenotype": [
+      "and sclc-i"
+    ],
+    "a bispecific t-cell\nengager": [
+      "bite"
+    ],
+    "bite": [
+      "a bispecific t-cell\nengager"
+    ],
+    "the most\ncommon ae was cytokine release syndrome": [
+      "crs"
+    ],
+    "crs": [
+      "the most\ncommon ae was cytokine release syndrome",
+      "cytokine release syndrome"
+    ],
+    "asco-ontario health": [
+      "cancer care ontario"
+    ],
+    "trial in small cell lung cancer": [
+      "sclc"
+    ],
+    "cullinan oncology": [
+      "inst"
+    ],
+    "astra zeneca": [
+      "inst"
+    ],
+    "bristol myers squibb foundation": [
+      "inst"
+    ],
+    "and\nprogression-free survival": [
+      "pfs"
+    ],
+    "the us food and drug administration": [
+      "fda"
+    ],
+    "fda": [
+      "and the united states food and drug administration",
+      "the food and drug administration",
+      "the us food and drug administration",
+      "food and drug administration",
+      "or food and drug administration",
+      "and the food and drug administration",
+      "entrectinib received food and\ndrug administration"
+    ],
+    "cytokine release syndrome": [
+      "crs"
+    ],
+    "associated neurotoxicity\nsyndrome": [
+      "icans"
+    ],
+    "icans": [
+      "associated neurotoxicity\nsyndrome"
+    ],
+    "department of surgical sciences": [
+      "ikv"
+    ],
+    "ikv": [
+      "department of surgical sciences"
+    ],
+    "has grouped lung and thymic neuroendocrine\ntumours": [
+      "nets"
+    ],
+    "nets": [
+      "tumors",
+      "has grouped lung and thymic neuroendocrine\ntumours"
+    ],
+    "small cell lung cancer": [
+      "sclc"
+    ],
+    "and\nlarge cell neuroendocrine carcinoma": [
+      "lcnec"
+    ],
+    "lcnec": [
+      "and\nlarge cell neuroendocrine carcinoma"
+    ],
+    "and thymic\ncarcinoid": [
+      "thc"
+    ],
+    "thc": [
+      "and thymic\ncarcinoid"
+    ],
+    "org": [
+      "esmo guidelines committee"
+    ],
+    "esmo guidelines committee": [
+      "org"
+    ],
+    "epidemiology and end results": [
+      "seer"
+    ],
+    "seer": [
+      "epidemiology and end results",
+      "and end results"
+    ],
+    "s syndrome": [
+      "cus"
+    ],
+    "cus": [
+      "s syndrome"
+    ],
+    "due to adrenocorticotropic hormone": [
+      "acth"
+    ],
+    "acth": [
+      "due to adrenocorticotropic hormone"
+    ],
+    "due to\ngrowth hormone-releasing hormone": [
+      "ghrh"
+    ],
+    "ghrh": [
+      "due to\ngrowth hormone-releasing hormone"
+    ],
+    "contrast": [
+      "liver mri"
+    ],
+    "liver mri": [
+      "contrast"
+    ],
+    "mediastinoscopy": [
+      "or ebus"
+    ],
+    "or ebus": [
+      "mediastinoscopy"
+    ],
+    "contrast-enhanced cross-sectional conventional": [
+      "radiological"
+    ],
+    "radiological": [
+      "contrast-enhanced cross-sectional conventional"
+    ],
+    "-labelled somatostatin analogues": [
+      "ssas"
+    ],
+    "ssas": [
+      "-labelled somatostatin analogues",
+      "medical options"
+    ],
+    "urinary-free cortisol": [
+      "ufc"
+    ],
+    "ufc": [
+      "urinary-free cortisol"
+    ],
+    "neuroendocrine tumor test": [
+      "netest"
+    ],
+    "netest": [
+      "neuroendocrine tumor test"
+    ],
+    "the who classification and pathological tnm": [
+      "ptnm"
+    ],
+    "ptnm": [
+      "the who classification and pathological tnm"
+    ],
+    "tumour burden and somatostatin receptor imaging": [
+      "sri"
+    ],
+    "sri": [
+      "tumour burden and somatostatin receptor imaging"
+    ],
+    "surgery\nrepresents the treatment of choice for lcs": [
+      "both tcs and\nacs"
+    ],
+    "both tcs and\nacs": [
+      "surgery\nrepresents the treatment of choice for lcs"
+    ],
+    "medical options": [
+      "ssas"
+    ],
+    "a combined approach": [
+      "sternotomy plus anterior thoracotomy"
+    ],
+    "sternotomy plus anterior thoracotomy": [
+      "a combined approach"
+    ],
+    "mainly cytotoxic chemotherapy": [
+      "cht"
+    ],
+    "cht": [
+      "over platinum-based doublet\nchemotherapy",
+      "mainly cytotoxic chemotherapy",
+      "the beneficial effects of adjuvant chemotherapy",
+      "platinum-based chemo\ntherapy",
+      "chemotherapy",
+      "the addition of the chemotherapy"
+    ],
+    "or systemic therapies": [
+      "with options\ndiscussed in these guidelines"
+    ],
+    "with options\ndiscussed in these guidelines": [
+      "or systemic therapies"
+    ],
+    "in case of\nclinical": [
+      "functioning syndrome"
+    ],
+    "functioning syndrome": [
+      "in case of\nclinical"
+    ],
+    "and the united states food and drug administration": [
+      "fda"
+    ],
+    "annals of oncology\n\n\n\nparathyroid hormone": [
+      "pth"
+    ],
+    "pth": [
+      "annals of oncology\n\n\n\nparathyroid hormone"
+    ],
+    "palliative surgery\nor radiofrequency ablation": [
+      "rfa"
+    ],
+    "rfa": [
+      "for these patients radiofrequency ablation",
+      "palliative surgery\nor radiofrequency ablation"
+    ],
+    "or cryoablation or endobronchial treatment": [
+      "ebt"
+    ],
+    "ebt": [
+      "or cryoablation or endobronchial treatment"
+    ],
+    "peptide\nreceptor radionuclide therapy": [
+      "prrt"
+    ],
+    "prrt": [
+      "peptide\nreceptor radionuclide therapy"
+    ],
+    "and interferon- a": [
+      "ifna"
+    ],
+    "ifna": [
+      "and interferon- a"
+    ],
+    "long-acting release": [
+      "lar"
+    ],
+    "lar": [
+      "long-acting release"
+    ],
+    "progression-free rate": [
+      "pfr"
+    ],
+    "pfr": [
+      "progression-free rate"
+    ],
+    "progression before enrolment": [
+      "luna study"
+    ],
+    "luna study": [
+      "progression before enrolment"
+    ],
+    "placebo-controlled trial": [
+      "sanet"
+    ],
+    "sanet": [
+      "placebo-controlled trial"
+    ],
+    "oxaliplatin combined with gemcitabine": [
+      "gemox"
+    ],
+    "gemox": [
+      "oxaliplatin combined with gemcitabine"
+    ],
+    "or capecitabine": [
+      "capox"
+    ],
+    "capox": [
+      "or capecitabine"
+    ],
+    "-fluorouracil": [
+      "folfox"
+    ],
+    "folfox": [
+      "-fluorouracil"
+    ],
+    "as alternative second-line": [
+      "in case of\nuncontrolled cs"
+    ],
+    "in case of\nuncontrolled cs": [
+      "as alternative second-line"
+    ],
+    "or mainly third-line therapy": [
+      "beyond\nssas and or everolimus"
+    ],
+    "beyond\nssas and or everolimus": [
+      "or mainly third-line therapy"
+    ],
+    "ifn- a as a potential second-line": [
+      "in case of uncontrolled\ncs"
+    ],
+    "in case of uncontrolled\ncs": [
+      "ifn- a as a potential second-line"
+    ],
+    "or mainly third-line alternative": [
+      "beyond ssas and or\neverolimus"
+    ],
+    "beyond ssas and or\neverolimus": [
+      "or mainly third-line alternative"
+    ],
+    "thymic net recurrences may be local": [
+      "if located in the\nanterior mediastinum"
+    ],
+    "if located in the\nanterior mediastinum": [
+      "thymic net recurrences may be local"
+    ],
+    "regional": [
+      "intrathoracic especially\npleural"
+    ],
+    "intrathoracic especially\npleural": [
+      "regional"
+    ],
+    "an esmo\nmagnitude of clinical benefit scale": [
+      "esmo-mcbs"
+    ],
+    "esmo-mcbs": [
+      "esmo-magnitude of clinical\nbenefit",
+      "esmo-magnitude of clinical benefit scale",
+      "esmo-magnitude of\nclinical benefit",
+      "esmomagnitude of clinical benefit scale",
+      "an esmo\nmagnitude of clinical benefit scale",
+      "esmo-magnitude of clinical benefit"
+    ],
+    "advanced carcinoids of the lung and thymus": [
+      "luna"
+    ],
+    "luna": [
+      "advanced carcinoids of the lung and thymus"
+    ],
+    "neuroendocrine cell hyperplasia": [
+      "dipnech"
+    ],
+    "dipnech": [
+      "neuroendocrine cell hyperplasia"
+    ],
+    "neuroendocrine carcinomas": [
+      "carcinoid tumor"
+    ],
+    "carcinoid tumor": [
+      "neuroendocrine carcinomas"
+    ],
+    "and end results": [
+      "seer"
+    ],
+    "synchronous multiple neuroendocrine lung tumours": [
+      "case series"
+    ],
+    "case series": [
+      "synchronous multiple neuroendocrine lung tumours"
+    ],
+    "tumors": [
+      "nets"
+    ],
+    "mo lanreotide autogel": [
+      "lan"
+    ],
+    "lan": [
+      "mo lanreotide autogel"
+    ],
+    "and temozolomide": [
+      "tmz"
+    ],
+    "tmz": [
+      "and temozolomide"
+    ],
+    "neuroendocrine tumours": [
+      "tnets"
+    ],
+    "tnets": [
+      "neuroendocrine tumours"
+    ],
+    "centre hospitalier universitaire vaudois": [
+      "chuv"
+    ],
+    "chuv": [
+      "centre hospitalier universitaire vaudois",
+      "centre hospitalier universitaire\nvaudois"
+    ],
+    "comparing low-dose computed tomography": [
+      "ldct"
+    ],
+    "ldct": [
+      "low-dose ct",
+      "comparing low-dose computed tomography"
+    ],
+    "such as lepidic adenocarcinomas": [
+      "previously named bronchioloalveolar carcinoma"
+    ],
+    "previously named bronchioloalveolar carcinoma": [
+      "such as lepidic adenocarcinomas"
+    ],
+    "how to handle": [
+      "false-"
+    ],
+    "false-": [
+      "how to handle"
+    ],
+    "or endoscopic\nultrasound": [
+      "eus"
+    ],
+    "eus": [
+      "or endoscopic\nultrasound"
+    ],
+    "the recent world health organization": [
+      "who"
+    ],
+    "who": [
+      "global",
+      "global statistics",
+      "world health organization",
+      "vs universal",
+      "the recent world health organization"
+    ],
+    "with its further sub-classification of": [
+      "surgically resected"
+    ],
+    "surgically resected": [
+      "with its further sub-classification of"
+    ],
+    "the beneficial effects of adjuvant chemotherapy": [
+      "cht"
+    ],
+    "the categories adenocarcinoma in situ": [
+      "ais"
+    ],
+    "ais": [
+      "the categories adenocarcinoma in situ",
+      "proposed\nthat ais be classified as tis"
+    ],
+    "minimally invasive adenocarcinoma": [
+      "mia"
+    ],
+    "mia": [
+      "minimally invasive adenocarcinoma"
+    ],
+    "and lepidic predominant": [
+      "lep"
+    ],
+    "lep": [
+      "and lepidic predominant"
+    ],
+    "of fluorodeoxyglucose-positron emission tomography": [
+      "fdg-pet"
+    ],
+    "fdg-pet": [
+      "of fluorodeoxyglucose-positron emission tomography"
+    ],
+    "the rate of nos": [
+      "not otherwise\nspecified"
+    ],
+    "not otherwise\nspecified": [
+      "the rate of nos"
+    ],
+    "the union for\ninternational cancer control": [
+      "uicc"
+    ],
+    "uicc": [
+      "union for international cancer control",
+      "union for international\ncancer control",
+      "the union for\ninternational cancer control"
+    ],
+    "node and metastasis": [
+      "tnm"
+    ],
+    "tnm": [
+      "tumourenodeemetastasis",
+      "node and metastasis"
+    ],
+    "proposed\nthat ais be classified as tis": [
+      "ais"
+    ],
+    "the display is best with wide": [
+      "lung"
+    ],
+    "lung": [
+      "the display is best with wide"
+    ],
+    "a should be restricted to the same histological": [
+      "sub"
+    ],
+    "sub": [
+      "research support as",
+      "a should be restricted to the same histological"
+    ],
+    "videoassisted mediastinoscopy": [
+      "vam"
+    ],
+    "vam": [
+      "videoassisted mediastinoscopy"
+    ],
+    "whereas the american college of chest physicians": [
+      "accp"
+    ],
+    "accp": [
+      "whereas the american college of chest physicians",
+      "the\namerican college of chest physicians"
+    ],
+    "adjuvant chemotherapy": [
+      "radiotherapy"
+    ],
+    "radiotherapy": [
+      "adjuvant chemotherapy"
+    ],
+    "or a video-assisted thoracoscopic surgery": [
+      "vats"
+    ],
+    "vats": [
+      "or a video-assisted thoracoscopic surgery"
+    ],
+    "based on the lung cancer study group": [
+      "lcsg"
+    ],
+    "lcsg": [
+      "based on the lung cancer study group"
+    ],
+    "research based on large databases suggest a": [
+      "limited"
+    ],
+    "limited": [
+      "research based on large databases suggest a"
+    ],
+    "acc guidelines\n\nneed for coronary\nintervention": [
+      "cabg or pci"
+    ],
+    "cabg or pci": [
+      "acc guidelines\n\nneed for coronary\nintervention"
+    ],
+    "high risk surgery": [
+      "including\nlobectomy or pneumonectomy"
+    ],
+    "including\nlobectomy or pneumonectomy": [
+      "high risk surgery"
+    ],
+    "disease": [
+      "stage ii and\niii"
+    ],
+    "stage ii and\niii": [
+      "disease"
+    ],
+    "sabr or stereotactic body radiotherapy": [
+      "sbrt"
+    ],
+    "in those with proven recurrence": [
+      "or a high suspicion"
+    ],
+    "or a high suspicion": [
+      "in those with proven recurrence"
+    ],
+    "for these patients radiofrequency ablation": [
+      "rfa"
+    ],
+    "clinical trials": [
+      "rcts"
+    ],
+    "the induction regimen of chemoradiotherapy": [
+      "crt"
+    ],
+    "crt": [
+      "the induction regimen of chemoradiotherapy"
+    ],
+    "sequential crt": [
+      "induction cht followed by rt"
+    ],
+    "induction cht followed by rt": [
+      "sequential crt"
+    ],
+    "- immunotherapy is being studied in early nsclc as": [
+      "neo"
+    ],
+    "neo": [
+      "- immunotherapy is being studied in early nsclc as",
+      "immunotherapy is being studied in early nsclc as",
+      "the\nimmune strategy in the"
+    ],
+    "cl\n\ntreatment of locally advanced stage": [
+      "stage ill"
+    ],
+    "stage ill": [
+      "cl\n\ntreatment of locally advanced stage"
+    ],
+    "immunotherapy is being studied in early nsclc as": [
+      "neo"
+    ],
+    "controlled trial of good methodological quality": [
+      "low potential for bias"
+    ],
+    "low potential for bias": [
+      "controlled trial of good methodological quality"
+    ],
+    "or imaging": [
+      "preferably ct"
+    ],
+    "preferably ct": [
+      "or imaging"
+    ],
+    "european society of gastrointestinal endoscopy": [
+      "esge"
+    ],
+    "esge": [
+      "european society of gastrointestinal endoscopy"
+    ],
+    "and the european\nsociety of thoracic surgeons": [
+      "ests"
+    ],
+    "ests": [
+      "and the european\nsociety of thoracic surgeons",
+      "and european society of thoracic surgeons"
+    ],
+    "gv scagliotti": [
+      "eds"
+    ],
+    "eds": [
+      "gv scagliotti"
+    ],
+    "the thoracic surgery scoring\nsystem": [
+      "thoracoscore"
+    ],
+    "thoracoscore": [
+      "the thoracic surgery scoring\nsystem",
+      "the thoracic surgery scoring system"
+    ],
+    "stereotactic body radiotherapy": [
+      "sbrt"
+    ],
+    "respiratory oncology unit": [
+      "pulmonology"
+    ],
+    "pulmonology": [
+      "respiratory oncology",
+      "respiratory oncology unit"
+    ],
+    "edegem": [
+      "antwerp"
+    ],
+    "antwerp": [
+      "edegem"
+    ],
+    "centre hospitalier universitaire\nvaudois": [
+      "chuv"
+    ],
+    "early-stage nsclc": [
+      "stages i ii"
+    ],
+    "stages i ii": [
+      "early-stage nsclc"
+    ],
+    "locally advanced nsclc": [
+      "stage iii"
+    ],
+    "stage iii": [
+      "treatment of locally advanced stage",
+      "unresectable nsclc",
+      "and unresectable locally advanced",
+      "locally advanced nsclc"
+    ],
+    "in paral\npractice guidelines": [
+      "cpgs"
+    ],
+    "cpgs": [
+      "in paral\npractice guidelines"
+    ],
+    "and a general consen\nconsensus process": [
+      "see panel members listed in the appendix"
+    ],
+    "see panel members listed in the appendix": [
+      "experts were involved in this\nconsensus process",
+      "and a general consen\nconsensus process"
+    ],
+    "experts were involved in this\nconsensus process": [
+      "see panel members listed in the appendix"
+    ],
+    "controlled trials": [
+      "rcts"
+    ],
+    "the\namerican college of chest physicians": [
+      "accp"
+    ],
+    "as relative value depends on personal": [
+      "usually unexplained or unquantifiable"
+    ],
+    "usually unexplained or unquantifiable": [
+      "as relative value depends on personal"
+    ],
+    "respiratory literature": [
+      "especially on exercise\ntesting"
+    ],
+    "especially on exercise\ntesting": [
+      "respiratory literature"
+    ],
+    "use of the revised cardiac risk index": [
+      "rcri"
+    ],
+    "rcri": [
+      "use of the revised cardiac risk index"
+    ],
+    "and european society of thoracic surgeons": [
+      "ests"
+    ],
+    "for sub-lobar resection": [
+      "wide wedge resection or anatomical segmentectomy"
+    ],
+    "wide wedge resection or anatomical segmentectomy": [
+      "for sub-lobar resection"
+    ],
+    "for which patients is limited": [
+      "sub-lobar"
+    ],
+    "sub-lobar": [
+      "for which patients is limited",
+      "special articles\n\n\n\nfor which patients is limited"
+    ],
+    "especially those with ground-glass\nopacity": [
+      "ggo"
+    ],
+    "ggo": [
+      "especially those with ground-glass\nopacity"
+    ],
+    "special articles\n\n\n\nfor which patients is limited": [
+      "sub-lobar"
+    ],
+    "other approaches such as local ablative": [
+      "sabr"
+    ],
+    "sabr": [
+      "other approaches such as local ablative",
+      "solidative stereotactic ablative radiotherapy"
+    ],
+    "some trials": [
+      "ialt"
+    ],
+    "ialt": [
+      "some trials"
+    ],
+    "anatomical resection": [
+      "lobectomy"
+    ],
+    "lobectomy": [
+      "anatomical resection"
+    ],
+    "european society for\nmedical oncology": [
+      "esmo"
+    ],
+    "boehringer ingelheim and astrazeneca": [
+      "for lectures"
+    ],
+    "for lectures": [
+      "boehringer ingelheim and astrazeneca"
+    ],
+    "results of\nthe initial": [
+      "prevalence"
+    ],
+    "prevalence": [
+      "results of\nthe initial",
+      "results of the\ninitial"
+    ],
+    "results of the\ninitial": [
+      "prevalence"
+    ],
+    "results\nof the initial": [
+      "prevalance"
+    ],
+    "prevalance": [
+      "results\nof the initial"
+    ],
+    "the thoracic surgery scoring system": [
+      "thoracoscore"
+    ],
+    "robotic lobectomy for non-small cell lung cancer": [
+      "nsclc"
+    ],
+    "fdg-pet and stereotactic body radiotherapy": [
+      "sbrt"
+    ],
+    "respiratory oncology": [
+      "pulmonology"
+    ],
+    "vrije\nuniversity medical centre": [
+      "vumc"
+    ],
+    "vumc": [
+      "vrije\nuniversity medical centre",
+      "university medical centre"
+    ],
+    "university medical centre": [
+      "vumc"
+    ],
+    "phase iii randomised clinical trial": [
+      "rct"
+    ],
+    "platinum-based doublet chemotherapy": [
+      "pbc"
+    ],
+    "pbc": [
+      "five cycles of\n\ntremelimumab",
+      "platinum-based doublet chemotherapy"
+    ],
+    "carboplatin": [
+      "arm a",
+      "arm b"
+    ],
+    "arm a": [
+      "carboplatin"
+    ],
+    "arm b": [
+      "carboplatin"
+    ],
+    "pbc significantly improved pfs": [
+      "primary\nendpoint"
+    ],
+    "primary\nendpoint": [
+      "pbc significantly improved pfs"
+    ],
+    "pbc\nsignificantly improved pfs": [
+      "primary endpoint"
+    ],
+    "primary endpoint": [
+      "significantly improved os",
+      "level",
+      "-year os",
+      "pbc\nsignificantly improved pfs"
+    ],
+    "besides immune checkpoint\n\ninhibitor": [
+      "ici"
+    ],
+    "ici": [
+      "besides immune checkpoint\n\ninhibitor",
+      "and have no prior immune checkpoint inhibitor"
+    ],
+    "esmo-magnitude of clinical benefit scale": [
+      "mcbs",
+      "esmo-mcbs"
+    ],
+    "mcbs": [
+      "esmo-magnitude of clinical benefit scale"
+    ],
+    "five cycles of\n\ntremelimumab": [
+      "pbc"
+    ],
+    "platinum-based chemo\ntherapy": [
+      "cht"
+    ],
+    "food and drug administration": [
+      "fda"
+    ],
+    "european medicines agency": [
+      "ema"
+    ],
+    "of tumour cells": [
+      "tcs"
+    ],
+    "tcs": [
+      "of tumour cells"
+    ],
+    "level": [
+      "primary endpoint"
+    ],
+    "tumour treating\nfields": [
+      "ttfields"
+    ],
+    "ttfields": [
+      "tumour treating\nfields"
+    ],
+    "significantly improved os": [
+      "primary endpoint"
+    ],
+    "ioanna ntai and claire bramley": [
+      "esmo guidelines staff"
+    ],
+    "esmo guidelines staff": [
+      "ioanna ntai and claire bramley",
+      "jennifer\nlamarre and guy atchison"
+    ],
+    "valerie laforest": [
+      "esmo\nguidelines staff"
+    ],
+    "esmo\nguidelines staff": [
+      "valerie laforest"
+    ],
+    "nicola latino and\nfrancesca chiovaro": [
+      "esmo scientific affairs staff"
+    ],
+    "esmo scientific affairs staff": [
+      "nicola\nlatino and francesca chiovaro",
+      "nicola latino",
+      "nicola\nlatino",
+      "nicola latino and\nfrancesca chiovaro"
+    ],
+    "bristol\nmyers squibb": [
+      "bms"
+    ],
+    "the european society for medical\noncology": [
+      "esmo"
+    ],
+    "and the\neuropean thoracic oncology platform": [
+      "foundation council\nmember"
+    ],
+    "foundation council\nmember": [
+      "and the\neuropean thoracic oncology platform"
+    ],
+    "with a platinum-containing\nregimen": [
+      "ipsos"
+    ],
+    "ipsos": [
+      "with a platinum-containing\nregimen"
+    ],
+    "esmo clinical practice guideline": [
+      "cpg"
+    ],
+    "cpg": [
+      "esmo clinical practice guideline"
+    ],
+    "tyrosine kinase inhibitors": [
+      "tkis"
+    ],
+    "tkis": [
+      "tyrosine kinase inhibitors"
+    ],
+    "by next-generation sequencing": [
+      "ngs"
+    ],
+    "ngs": [
+      "multiplex platforms",
+      "such as next-generation sequencing",
+      "by next-generation sequencing"
+    ],
+    "egfr fish or immunohistochemistry": [
+      "ihc"
+    ],
+    "positive anaplastic lymphoma kinase": [
+      "alk"
+    ],
+    "or neurotrophic tyrosine\nreceptor kinase": [
+      "ntrk"
+    ],
+    "ntrk": [
+      "and the neurotrophic receptor tyrosine\nkinase",
+      "or neurotrophic tyrosine\nreceptor kinase"
+    ],
+    "detection is reliable by\nin situ hybridisation": [
+      "ish"
+    ],
+    "ish": [
+      "detection is reliable by\nin situ hybridisation"
+    ],
+    "mesenchymal-epithelial transition": [
+      "met"
+    ],
+    "met": [
+      "mesenchymal-epithelial transition"
+    ],
+    "cell-free dna": [
+      "cfdna"
+    ],
+    "cfdna": [
+      "liquid biopsy",
+      "cell-free dna"
+    ],
+    "multiplex platforms": [
+      "ngs"
+    ],
+    "liquid biopsy": [
+      "cfdna"
+    ],
+    "scan of\nthe chest and upper abdomen": [
+      "including the liver and\nadrenal glands"
+    ],
+    "including the liver and\nadrenal glands": [
+      "scan of\nthe chest and upper abdomen"
+    ],
+    "imaging of the central nervous system": [
+      "cns"
+    ],
+    "cns": [
+      "imaging of the central nervous system"
+    ],
+    "e\ndeoxy-d-glucose": [
+      "fdg"
+    ],
+    "fdg": [
+      "e\ndeoxy-d-glucose"
+    ],
+    "union for international\ncancer control": [
+      "uicc"
+    ],
+    "tumourenodeemetastasis": [
+      "tnm",
+      "staging and risk assessment\n\n\nthe tnm"
+    ],
+    "over platinum-based doublet\nchemotherapy": [
+      "cht"
+    ],
+    "quality of life and progression-free survival": [
+      "pfs"
+    ],
+    "phase iib\nrandomised controlled trial": [
+      "rct"
+    ],
+    "demonstrating a superior\nmedian pfs": [
+      "mpfs"
+    ],
+    "mpfs": [
+      "demonstrating a superior\nmedian pfs"
+    ],
+    "and median os": [
+      "mos"
+    ],
+    "mos": [
+      "the malaysian oncological society",
+      "malaysia",
+      "and median os"
+    ],
+    "systemic progression\n\nlocal treatment": [
+      "surgery or ft"
+    ],
+    "surgery or ft": [
+      "systemic progression\n\nlocal treatment"
+    ],
+    "mpositive resistant disease": [
+      "occurring in approximately half of\nthe patients"
+    ],
+    "occurring in approximately half of\nthe patients": [
+      "mpositive resistant disease"
+    ],
+    "not european medicines agency": [
+      "ema"
+    ],
+    "single-agent": [
+      "third-generation"
+    ],
+    "third-generation": [
+      "single-agent"
+    ],
+    "ensartinib": [
+      "not ema approved",
+      "not ema\napproved"
+    ],
+    "not ema\napproved": [
+      "ensartinib"
+    ],
+    "j-alex": [
+      "japan"
+    ],
+    "japan": [
+      "j-alex",
+      "jsmo"
+    ],
+    "and\nalesia": [
+      "asia"
+    ],
+    "asia": [
+      "and\nalesia"
+    ],
+    "and continue targeted": [
+      "not mandatory for decision"
+    ],
+    "not mandatory for decision": [
+      "and continue targeted"
+    ],
+    "interstitial lung disease": [
+      "ild"
+    ],
+    "ild": [
+      "interstitial lung disease"
+    ],
+    "not ema approved": [
+      "ensartinib"
+    ],
+    "mainly altered lipid levels": [
+      "aes"
+    ],
+    "alectinib was superior to single-agent cht": [
+      "docetaxel or pemetrexed"
+    ],
+    "docetaxel or pemetrexed": [
+      "alectinib was superior to single-agent cht"
+    ],
+    "entrectinib received food and\ndrug administration": [
+      "fda"
+    ],
+    "the\nmedian duration of response": [
+      "mdor"
+    ],
+    "mdor": [
+      "the\nmedian duration of response"
+    ],
+    "tropomyosin receptor tyrosine kinase": [
+      "trk"
+    ],
+    "trk": [
+      "tropomyosin receptor tyrosine kinase"
+    ],
+    "disease progression\n\nlocal treatment": [
+      "surgery or rt"
+    ],
+    "surgery or rt": [
+      "local treatment",
+      "oligoprogression\n\nlocal treatment",
+      "disease progression\n\nlocal treatment"
+    ],
+    "or combination therapy with a mek inhibitor": [
+      "trametinib"
+    ],
+    "trametinib": [
+      "or combination therapy with a mek inhibitor"
+    ],
+    "local treatment": [
+      "surgery or rt"
+    ],
+    "a rearranged during transfection": [
+      "ret"
+    ],
+    "ret": [
+      "a rearranged during transfection"
+    ],
+    "trastuzumab deruxtecan": [
+      "fda\napproved"
+    ],
+    "fda\napproved": [
+      "trastuzumab deruxtecan"
+    ],
+    "the\nkirsten rat sarcoma virus": [
+      "kras"
+    ],
+    "kras": [
+      "the\nkirsten rat sarcoma virus"
+    ],
+    "data regarding the role of local ablative therapy": [
+      "lat"
+    ],
+    "lat": [
+      "data regarding the role of local ablative therapy"
+    ],
+    "one open-label phase iii rct": [
+      "sindas"
+    ],
+    "sindas": [
+      "one open-label phase iii rct"
+    ],
+    "in particular with the use\nof modern technologies": [
+      "robotic systems"
+    ],
+    "robotic systems": [
+      "in particular with the use\nof modern technologies"
+    ],
+    "esmo-magnitude of\nclinical benefit": [
+      "esmo-mcbs"
+    ],
+    "may\nbenefit from lat": [
+      "high-dose rt or surgery"
+    ],
+    "high-dose rt or surgery": [
+      "from lat",
+      "may\nbenefit from lat"
+    ],
+    "jennifer\nlamarre and guy atchison": [
+      "esmo guidelines staff"
+    ],
+    "nicola\nlatino": [
+      "esmo scientific affairs staff"
+    ],
+    "and dr svetlana jezdic": [
+      "esmo\nmedical affairs advisor",
+      "esmo medical affairs staff"
+    ],
+    "esmo\nmedical affairs advisor": [
+      "and dr svetlana jezdic"
+    ],
+    "bristol-myers\nsquibb": [
+      "bms"
+    ],
+    "asian\nthoracic oncology research group": [
+      "atorg"
+    ],
+    "atorg": [
+      "asian\nthoracic oncology research group"
+    ],
+    "chinese lung\ncancer research foundation limited": [
+      "clcrf"
+    ],
+    "clcrf": [
+      "chinese lung\ncancer research foundation limited"
+    ],
+    "chinese society of clinical oncology": [
+      "csco"
+    ],
+    "csco": [
+      "chinese society of clinical oncology"
+    ],
+    "hong kong cancer fund": [
+      "hkcf"
+    ],
+    "hkcf": [
+      "hong kong cancer fund"
+    ],
+    "hong kong cancer therapy society": [
+      "hkcts"
+    ],
+    "hkcts": [
+      "hong kong cancer therapy society"
+    ],
+    "prep school": [
+      "hong kong"
+    ],
+    "hong kong": [
+      "prep school"
+    ],
+    "s education resource": [
+      "per"
+    ],
+    "per": [
+      "s education resource"
+    ],
+    "partnerships in international medical education": [
+      "prime"
+    ],
+    "prime": [
+      "partnerships in international medical education"
+    ],
+    "llc": [
+      "rmei"
+    ],
+    "rmei": [
+      "llc"
+    ],
+    "research\nto practice": [
+      "rtp"
+    ],
+    "rtp": [
+      "research\nto practice"
+    ],
+    "research": [
+      "sakk",
+      "ukcccr"
+    ],
+    "sakk": [
+      "research"
+    ],
+    "international breast cancer study group": [
+      "ibcsg"
+    ],
+    "ibcsg": [
+      "international breast cancer study group"
+    ],
+    "s\nde clinique": [
+      "asmac"
+    ],
+    "asmac": [
+      "s\nde clinique"
+    ],
+    "rzte": [
+      "vsao"
+    ],
+    "vsao": [
+      "rzte"
+    ],
+    "decins suisses": [
+      "fmh"
+    ],
+    "fmh": [
+      "decins suisses"
+    ],
+    "cancers": [
+      "basel"
+    ],
+    "basel": [
+      "cancers"
+    ],
+    "relay": [
+      "cancer"
+    ],
+    "non-small-cell lung cancer": [
+      "alesia",
+      "nsclc"
+    ],
+    "alesia": [
+      "non-small-cell lung cancer"
+    ],
+    "crizotinib-pretreated anaplastic lymphoma kinase": [
+      "alk"
+    ],
+    "advanced non-small-cell lung cancer": [
+      "nsclc"
+    ],
+    "fda prescribing information - rozlytrek": [
+      "entrectinib"
+    ],
+    "entrectinib": [
+      "fda prescribing information - rozlytrek"
+    ],
+    "retsevmo - summary of opinion": [
+      "chmp"
+    ],
+    "chmp": [
+      "tabrecta - summary of opinion",
+      "retsevmo - summary of opinion",
+      "products for human use"
+    ],
+    "tabrecta - summary of opinion": [
+      "chmp"
+    ],
+    "prescribing information - rybrevant": [
+      "amivantamab-vmjw"
+    ],
+    "amivantamab-vmjw": [
+      "prescribing information - rybrevant"
+    ],
+    "solidative stereotactic ablative radiotherapy": [
+      "sabr"
+    ],
+    "the european society for medical oncology": [
+      "esmo"
+    ],
+    "to produce the pan-asian adapted": [
+      "paga"
+    ],
+    "paga": [
+      "to produce the pan-asian adapted"
+    ],
+    "indonesia": [
+      "ishmo"
+    ],
+    "ishmo": [
+      "indonesia"
+    ],
+    "india": [
+      "ismpo"
+    ],
+    "ismpo": [
+      "india"
+    ],
+    "jsmo": [
+      "japan",
+      "the\njapanese society of medical oncology"
+    ],
+    "korea": [
+      "ksmo"
+    ],
+    "ksmo": [
+      "korea"
+    ],
+    "malaysia": [
+      "mos"
+    ],
+    "the philippines": [
+      "psmo"
+    ],
+    "psmo": [
+      "the philippines",
+      "the philippine society of\nmedical oncology",
+      "and philippine society of medical\noncology"
+    ],
+    "singapore": [
+      "sso"
+    ],
+    "sso": [
+      "the singapore society of\noncology",
+      "singapore"
+    ],
+    "taiwan": [
+      "tos"
+    ],
+    "tos": [
+      "the taiwan oncology society",
+      "taiwan"
+    ],
+    "and thailand": [
+      "tsco"
+    ],
+    "tsco": [
+      "and thailand",
+      "and the\nthai society of clinical oncology"
+    ],
+    "and the neurotrophic receptor tyrosine\nkinase": [
+      "ntrk"
+    ],
+    "esmo open\n\n\n\nrecommendation": [
+      "gor"
+    ],
+    "gor": [
+      "esmo open\n\n\n\nrecommendation"
+    ],
+    "esmomagnitude of clinical benefit scale": [
+      "esmo-mcbs"
+    ],
+    "the\njapanese society of medical oncology": [
+      "jsmo"
+    ],
+    "the malaysian oncological society": [
+      "mos"
+    ],
+    "the philippine society of\nmedical oncology": [
+      "psmo"
+    ],
+    "the singapore society of\noncology": [
+      "sso"
+    ],
+    "the taiwan oncology society": [
+      "tos"
+    ],
+    "and the\nthai society of clinical oncology": [
+      "tsco"
+    ],
+    "only two of the six\nexpert members from the ksmo": [
+      "tmk and hrk"
+    ],
+    "tmk and hrk": [
+      "only two of the six\nexpert members from the ksmo"
+    ],
+    "performance status": [
+      "ecog ps"
+    ],
+    "ecog ps": [
+      "performance status"
+    ],
+    "scan of the chest and upper abdomen": [
+      "including the liver and adrenal glands"
+    ],
+    "including the liver and adrenal glands": [
+      "scan of the chest and upper abdomen"
+    ],
+    "resonance imaging": [
+      "mri"
+    ],
+    "mri": [
+      "and\na magnetic resonance imaging",
+      "resonance imaging"
+    ],
+    "-positron emission topography": [
+      "pet"
+    ],
+    "pet": [
+      "-positron emission topography",
+      "of whom had undergone positron\nemission tomography"
+    ],
+    "union for international cancer control": [
+      "uicc"
+    ],
+    "third-generation egfr tkis": [
+      "such as osimertinib"
+    ],
+    "such as osimertinib": [
+      "generation tki",
+      "third-generation egfr tkis"
+    ],
+    "generation tki": [
+      "such as osimertinib"
+    ],
+    "from lat": [
+      "high-dose rt or surgery"
+    ],
+    "such as next-generation sequencing": [
+      "ngs"
+    ],
+    "serious adverse events": [
+      "saes"
+    ],
+    "saes": [
+      "serious adverse events"
+    ],
+    "reported improved\nprogression-free survival": [
+      "pfs"
+    ],
+    "which had an overall response rate": [
+      "orr"
+    ],
+    "the median\nduration of response": [
+      "dor"
+    ],
+    "esmo-magnitude of clinical benefit": [
+      "esmo-mcbs"
+    ],
+    "esmo-magnitude of clinical\nbenefit": [
+      "esmo-mcbs"
+    ],
+    "the addition of the chemotherapy": [
+      "cht"
+    ],
+    "treatment-related aes": [
+      "traes"
+    ],
+    "traes": [
+      "treatment-related aes"
+    ],
+    "and have no prior immune checkpoint inhibitor": [
+      "ici"
+    ],
+    "the food and drug administration": [
+      "fda"
+    ],
+    "s national medical products\nadministration": [
+      "nsmpa"
+    ],
+    "nsmpa": [
+      "s national medical products\nadministration"
+    ],
+    "ishmo\n\n\nthe jaminan kesehatan nasional": [
+      "jkn"
+    ],
+    "jkn": [
+      "ishmo\n\n\nthe jaminan kesehatan nasional"
+    ],
+    "there is no regulation of partial coverage": [
+      "co-payment"
+    ],
+    "co-payment": [
+      "there is no regulation of partial coverage"
+    ],
+    "esmo open\n\n\n\nprogram": [
+      "pap"
+    ],
+    "pap": [
+      "esmo open\n\n\n\nprogram"
+    ],
+    "mandatory national health insurance": [
+      "nhi"
+    ],
+    "nhi": [
+      "mandatory national health insurance"
+    ],
+    "this includes ngs panel tests": [
+      "partially reimbursed"
+    ],
+    "partially reimbursed": [
+      "this includes ngs panel tests"
+    ],
+    "social\nsecurity and government officer": [
+      "csmbs"
+    ],
+    "csmbs": [
+      "social\nsecurity and government officer"
+    ],
+    "alk inhibitors": [
+      "ceritinib and brigatinib only"
+    ],
+    "ceritinib and brigatinib only": [
+      "alk inhibitors"
+    ],
+    "and atezolizumab": [
+      "in the\nsecond-line setting"
+    ],
+    "in the\nsecond-line setting": [
+      "and atezolizumab"
+    ],
+    "erlotinib and osimertinib": [
+      "second-line with reimbursement\nthrough the csmbs"
+    ],
+    "second-line with reimbursement\nthrough the csmbs": [
+      "erlotinib and osimertinib"
+    ],
+    "and the alk inhibitors ceritinib and\nbrigatinib": [
+      "first-line"
+    ],
+    "first-line": [
+      "and the alk inhibitors ceritinib and\nbrigatinib"
+    ],
+    "german": [
+      "merck"
+    ],
+    "thoracic oncology research\ngroup": [
+      "torg"
+    ],
+    "torg": [
+      "thoracic oncology research\ngroup"
+    ],
+    "and west japan oncology group": [
+      "wjog"
+    ],
+    "wjog": [
+      "and west japan oncology group"
+    ],
+    "product samples": [
+      "nonrenumerated"
+    ],
+    "nonrenumerated": [
+      "product samples"
+    ],
+    "and philippine society of medical\noncology": [
+      "psmo"
+    ],
+    "trial steering committee": [
+      "tsc"
+    ],
+    "tsc": [
+      "trial steering committee"
+    ],
+    "independent data monitoring committee": [
+      "idmc"
+    ],
+    "idmc": [
+      "independent data monitoring committee"
+    ],
+    "ireland oesophagogastric group": [
+      "ukiog"
+    ],
+    "ukiog": [
+      "ireland oesophagogastric group"
+    ],
+    "celgene": [
+      "bms"
+    ],
+    "hellenic cooperative oncology group": [
+      "hecog"
+    ],
+    "hecog": [
+      "hellenic cooperative oncology group"
+    ],
+    "chin med j": [
+      "engl"
+    ],
+    "engl": [
+      "chin med j"
+    ],
+    "commun": [
+      "lond"
+    ],
+    "lond": [
+      "cancer commun",
+      "commun"
+    ],
+    "mertinib": [
+      "osi"
+    ],
+    "osi": [
+      "mertinib"
+    ],
+    "platinum-pemetrexed in egfr-mutated": [
+      "egfrm"
+    ],
+    "egfrm": [
+      "with stage ibeiiia egfr mutation positive",
+      "platinum-pemetrexed in egfr-mutated"
+    ],
+    "advanced non-small cell lung cancer": [
+      "nsclc"
+    ],
+    "cancer commun": [
+      "lond"
+    ],
+    "e-mutant metastatic nsclc": [
+      "mnsclc"
+    ],
+    "mnsclc": [
+      "e-mutant metastatic nsclc"
+    ],
+    "binimetinib in patients": [
+      "pts"
+    ],
+    "pts": [
+      "binimetinib in patients",
+      "p repotrectinib in patients",
+      "versus docetaxel in patients",
+      "mo encorafenib plus\n\nbinimetinib in patients",
+      "therapy in patients",
+      "patients"
+    ],
+    "mutant advanced non-small cell lung cancer": [
+      "nsclc"
+    ],
+    "patients": [
+      "pts"
+    ],
+    "with epidermal growth factor receptor": [
+      "egfr"
+    ],
+    "egfr": [
+      "with epidermal growth factor receptor"
+    ],
+    "treatment of early stages": [
+      "stages i-iiia",
+      "stages i-ii"
+    ],
+    "stages i-iiia": [
+      "treatment of early stages"
+    ],
+    "stages i-ii": [
+      "treatment of early stages"
+    ],
+    "and the european medicines\nagency": [
+      "ema"
+    ],
+    "chemotherapy": [
+      "adaura",
+      "cht"
+    ],
+    "the median\nwas not reached": [
+      "ne-ne"
+    ],
+    "ne-ne": [
+      "the median\nwas not reached"
+    ],
+    "neoadjuvant immune checkpoint inhibitors": [
+      "icis"
+    ],
+    "leads to\nincreased dfs versus best supportive care": [
+      "bsc"
+    ],
+    "the\nimmune strategy in the": [
+      "neo"
+    ],
+    "or docetaxel or pemetrexed": [
+      "only in\nadenocarcinoma tumours"
+    ],
+    "only in\nadenocarcinoma tumours": [
+      "or docetaxel or pemetrexed"
+    ],
+    "post-operative radiotherapy": [
+      "port"
+    ],
+    "port": [
+      "post-operative radiotherapy"
+    ],
+    "treatment of locally advanced stage": [
+      "stage iii"
+    ],
+    "negative endoscopic staging": [
+      "ebus or eus"
+    ],
+    "ebus or eus": [
+      "negative endoscopic staging"
+    ],
+    "of whom had undergone positron\nemission tomography": [
+      "pet"
+    ],
+    "systemic treatment algorithm for early-stage": [
+      "stage ib-iiia"
+    ],
+    "stage ib-iiia": [
+      "systemic treatment algorithm for early-stage"
+    ],
+    "and unresectable locally advanced": [
+      "stage iii"
+    ],
+    "general categories or stratification": [
+      "symptom"
+    ],
+    "symptom": [
+      "general categories or stratification"
+    ],
+    "and the food and drug administration": [
+      "fda"
+    ],
+    "unresectable nsclc": [
+      "stage iii"
+    ],
+    "concurrent chemoradiation therapy": [
+      "pacific"
+    ],
+    "pacific": [
+      "small-cell lung cancer",
+      "concurrent chemoradiation therapy"
+    ],
+    "adaura": [
+      "chemotherapy"
+    ],
+    "d approval was based on all patient data": [
+      "including stage ib"
+    ],
+    "including stage ib": [
+      "d approval was based on all patient data"
+    ],
+    "on tumour cells": [
+      "as per the\nema-approved indication"
+    ],
+    "as per the\nema-approved indication": [
+      "on tumour cells"
+    ],
+    "gico clara campal": [
+      "hm-ciocc"
+    ],
+    "hm-ciocc": [
+      "gico clara campal"
+    ],
+    "rolf stahel": [
+      "esmo guidelines steering committee"
+    ],
+    "esmo guidelines steering committee": [
+      "rolf stahel"
+    ],
+    "george pentheroudakis": [
+      "chief medical officer of esmo"
+    ],
+    "chief medical officer of esmo": [
+      "george pentheroudakis"
+    ],
+    "richard lutz and jennifer lamarre": [
+      "esmo staff"
+    ],
+    "esmo staff": [
+      "richard lutz and jennifer lamarre"
+    ],
+    "nicola latino": [
+      "esmo scientific affairs staff"
+    ],
+    "research support as": [
+      "sub"
+    ],
+    "with stage ibeiiia egfr mutation positive": [
+      "egfrm"
+    ],
+    "nivolumab": [
+      "bristol myers squibb statement on opdivo",
+      "nivo"
+    ],
+    "nivo": [
+      "nivolumab"
+    ],
+    "platinum-doublet\nchemotherapy": [
+      "chemo"
+    ],
+    "chemo": [
+      "platinum-based chemotherapy",
+      "platinum-doublet\nchemotherapy"
+    ],
+    "for\nresectable": [
+      "ib-iiia"
+    ],
+    "ib-iiia": [
+      "for\nresectable"
+    ],
+    "lung cancer": [
+      "nsclc"
+    ],
+    "iii non-small-cell lung cancer": [
+      "nsclc"
+    ],
+    "products for human use": [
+      "chmp"
+    ],
+    "of circulating tumor dna": [
+      "ctdna"
+    ],
+    "ctdna": [
+      "of circulating tumor dna"
+    ],
+    "diques august pi i sunyer": [
+      "idibaps"
+    ],
+    "idibaps": [
+      "diques august pi i sunyer"
+    ],
+    "department of radiation oncology": [
+      "maastro clinic"
+    ],
+    "maastro clinic": [
+      "department of radiation oncology"
+    ],
+    "and tumour mutational\nburden": [
+      "tmb"
+    ],
+    "tmb": [
+      "and tumour mutational\nburden"
+    ],
+    "staging and risk assessment\n\n\nthe tnm": [
+      "tumourenodeemetastasis"
+    ],
+    "-deoxy-d-glucose positron\n\ne e\nemission tomography": [
+      "fdg pet"
+    ],
+    "fdg pet": [
+      "-deoxy-d-glucose positron\n\ne e\nemission tomography"
+    ],
+    "and\na magnetic resonance imaging": [
+      "mri"
+    ],
+    "elevated lactate dehydrogenase": [
+      "ldh"
+    ],
+    "ldh": [
+      "elevated lactate dehydrogenase"
+    ],
+    "creatinine\nand lung function test": [
+      "if localised disease"
+    ],
+    "if localised disease": [
+      "creatinine\nand lung function test"
+    ],
+    "pet is available\nimaging of the brain": [
+      "preferably mri"
+    ],
+    "preferably mri": [
+      "pet is available\nimaging of the brain"
+    ],
+    "the use of granulocyte\ncolony-stimulating factor": [
+      "g-csf"
+    ],
+    "g-csf": [
+      "the use of granulocyte\ncolony-stimulating factor"
+    ],
+    "no\nimprovement in progression-free survival": [
+      "pfs"
+    ],
+    "both given concurrently with cht": [
+      "starting on cycle\ntwo"
+    ],
+    "starting on cycle\ntwo": [
+      "both given concurrently with cht"
+    ],
+    "an historical southwest\n\noncology group": [
+      "swog"
+    ],
+    "swog": [
+      "an historical southwest\n\noncology group"
+    ],
+    "area under the curve": [
+      "auc"
+    ],
+    "auc": [
+      "area under the curve"
+    ],
+    "-year os": [
+      "primary endpoint"
+    ],
+    "interval": [
+      "tfi"
+    ],
+    "tfi": [
+      "interval"
+    ],
+    "the\norr": [
+      "primary outcome measure"
+    ],
+    "primary outcome measure": [
+      "the\norr"
+    ],
+    "comparing\nnivolumab to topotecan": [
+      "or amrubicin"
+    ],
+    "or amrubicin": [
+      "comparing\nnivolumab to topotecan"
+    ],
+    "as second-line\ntreatment in unselected": [
+      "platinum-sensitive and -resistant"
+    ],
+    "platinum-sensitive and -resistant": [
+      "as second-line\ntreatment in unselected"
+    ],
+    "e\nrovalpituzumab tesirine": [
+      "rova-t"
+    ],
+    "rova-t": [
+      "e\nrovalpituzumab tesirine"
+    ],
+    "the preferred cht for patients with limited-stage": [
+      "stage\ni-iii"
+    ],
+    "stage\ni-iii": [
+      "the preferred cht for patients with limited-stage"
+    ],
+    "another reason for regular": [
+      "long-term"
+    ],
+    "long-term": [
+      "another reason for regular"
+    ],
+    "forthcoming": [
+      "seventh"
+    ],
+    "seventh": [
+      "forthcoming"
+    ],
+    "techniques": [
+      "ct versus mri"
+    ],
+    "ct versus mri": [
+      "techniques"
+    ],
+    "front med": [
+      "lausanne"
+    ],
+    "lausanne": [
+      "front med"
+    ],
+    "cell lung cancer": [
+      "convert"
+    ],
+    "convert": [
+      "cell lung cancer"
+    ],
+    "dose prophylactic cranial irradiation": [
+      "pci"
+    ],
+    "extensive-stage small-cell lung cancer": [
+      "caspian"
+    ],
+    "caspian": [
+      "extensive-stage small-cell lung cancer",
+      "cer"
+    ],
+    "cer": [
+      "caspian"
+    ],
+    "plus ipilimumab": [
+      "ipi"
+    ],
+    "ipi": [
+      "plus ipilimumab"
+    ],
+    "or placebo": [
+      "pbo"
+    ],
+    "pbo": [
+      "or placebo"
+    ],
+    "therapy in patients": [
+      "pts"
+    ],
+    "platinum-based chemotherapy": [
+      "chemo"
+    ],
+    "bristol myers squibb statement on opdivo": [
+      "nivolumab"
+    ],
+    "ukcccr": [
+      "research"
+    ],
+    "and treatment of cancer": [
+      "eortc"
+    ],
+    "eortc": [
+      "and treatment of cancer"
+    ],
+    "randomized trial radiation therapy oncology group": [
+      "rtog"
+    ],
+    "rtog": [
+      "randomized trial radiation therapy oncology group"
+    ],
+    "a phase iii randomised clinical trial": [
+      "rct"
+    ],
+    "tyrosine kinase inhibitor": [
+      "tki"
+    ],
+    "oligoprogression\n\nlocal treatment": [
+      "surgery or rt"
+    ],
+    "vascular endothelial growth factor": [
+      "receptor"
+    ],
+    "receptor": [
+      "vascular endothelial growth factor"
+    ],
+    "or food and drug administration": [
+      "fda"
+    ],
+    "nicola\nlatino and francesca chiovaro": [
+      "esmo scientific affairs staff"
+    ],
+    "esmo medical affairs staff": [
+      "and dr svetlana jezdic"
+    ],
+    "the study of lung cancer": [
+      "iaslc"
+    ],
+    "iaslc": [
+      "the study of lung cancer"
+    ],
+    "global breast cancer initiative": [
+      "gbci"
+    ],
+    "gbci": [
+      "global breast cancer initiative"
+    ],
+    "european school of oncology": [
+      "eso"
+    ],
+    "eso": [
+      "european school of oncology"
+    ],
+    "and society for immunotherapy and cancer": [
+      "sitc"
+    ],
+    "sitc": [
+      "and society for immunotherapy and cancer"
+    ],
+    "p repotrectinib in patients": [
+      "pts"
+    ],
+    "mo encorafenib plus\n\nbinimetinib in patients": [
+      "pts"
+    ],
+    "versus docetaxel in patients": [
+      "pts"
+    ],
+    "of molecular targets": [
+      "escat"
+    ],
+    "escat": [
+      "of molecular targets"
+    ],
+    "european society for medical oncology": [
+      "esmo"
+    ],
+    "nice": [
+      "national institute for health and care excellence",
+      "nhs"
+    ],
+    "national institute for health and care excellence": [
+      "nice"
+    ],
+    "world health organization": [
+      "who"
+    ],
+    "australian national lung cancer screening program": [
+      "nlcsp"
+    ],
+    "nlcsp": [
+      "australian national lung cancer screening program"
+    ],
+    "- esmo": [
+      "selected guidelines"
+    ],
+    "selected guidelines": [
+      "- esmo"
+    ],
+    "- asco": [
+      "stage iv guidelines"
+    ],
+    "stage iv guidelines": [
+      "- asco"
+    ],
+    "low-dose ct": [
+      "ldct"
+    ],
+    "nhs": [
+      "nice"
+    ],
+    "vs insurance-based": [
+      "nccn"
+    ],
+    "vs universal": [
+      "who"
+    ],
+    "early": [
+      "i-ii"
+    ],
+    "i-ii": [
+      "early"
+    ],
+    "locally advanced": [
+      "iii"
+    ],
+    "iii": [
+      "locally advanced"
+    ],
+    "global": [
+      "who"
+    ],
+    "- regular imaging": [
+      "ct scans"
+    ],
+    "ct scans": [
+      "- regular imaging"
+    ],
+    "- surgery": [
+      "early stages"
+    ],
+    "early stages": [
+      "- surgery"
+    ],
+    "- radiotherapy": [
+      "radiation"
+    ],
+    "radiation": [
+      "- radiotherapy"
+    ],
+    "global statistics": [
+      "who"
+    ]
+  },
+  "abbreviations": {
+    "esmo": [
+      "european society of medical oncology",
+      "the most recent european society for medical oncology",
+      "european society for medical oncology",
+      "european\nsociety of medical oncology",
+      "european society for\nmedical oncology",
+      "the european society for medical oncology",
+      "the following european society for medical oncology",
+      "european society for medical\noncology"
+    ],
+    "asco": [
+      "american\nsociety of clinical oncology",
+      "american society of clinical\noncology",
+      "american society of clinical\n\noncology",
+      "american society of clinical oncology",
+      "the clinical practice guidelines published herein are provided by the american society of clinical oncology inc",
+      "this american society of clinical oncology"
+    ],
+    "aiom": [
+      "italian association\nof medical oncology",
+      "the italian association of medical oncology",
+      "italian association of medical oncology"
+    ],
+    "nccn": [
+      "national comprehensive cancer network",
+      "american cancer centers"
+    ],
+    "glides": [
+      "ecision support",
+      "guidelines into decision\nsupport"
+    ],
+    "glc": [
+      "guidelines committee"
+    ],
+    "mcbs": [
+      "magnitude of clinical benefit scale",
+      "magnitude\nof clinical benefit score"
+    ],
+    "ema": [
+      "european medicines agency",
+      "european medicines\nagency"
+    ],
+    "sclc": [
+      "small cell lung cancer",
+      "clinical practice guidelines on small cell lung\ncancer"
+    ],
+    "cco": [
+      "cancer care ontario"
+    ],
+    "astro": [
+      "executive summary of an american society for\nradiation oncology"
+    ],
+    "inst": [
+      "calithera biosciences",
+      "novartis",
+      "cullinan oncology",
+      "regeneron",
+      "kline canada",
+      "verastem",
+      "genentech",
+      "amgen",
+      "bayer",
+      "bristol myers squibb foundation",
+      "puma biotechnology",
+      "boehringer ingelheim",
+      "genomics",
+      "myers squibb",
+      "arcus biosciences",
+      "kline",
+      "turning point therapeutics",
+      "takeda",
+      "revolution medicines",
+      "merck serono",
+      "zeneca",
+      "macrogenics",
+      "merck",
+      "summit therapeutics",
+      "palobiofarma",
+      "astex pharmaceuticals",
+      "zeneca canada",
+      "black diamond\ntherapeutics",
+      "janssen oncology",
+      "mirati therapeutics",
+      "bristol myers squibb",
+      "dohme",
+      "immune",
+      "sutro biopharma",
+      "polaris",
+      "pfizer",
+      "forward",
+      "elevation oncology",
+      "astra zeneca",
+      "heart therapeutics",
+      "nuvation bio",
+      "inhibrx",
+      "pharmaceuticals",
+      "bristol myers\nsquibb",
+      "roche",
+      "dizal\npharma",
+      "harpoon therapeutics",
+      "vivace therapeutics",
+      "janssen",
+      "jazz pharmaceuticals",
+      "advaxis",
+      "lilly",
+      "constellation pharmaceuticals",
+      "guardant health",
+      "trizell",
+      "blueprint medicines",
+      "therapeutics",
+      "exelixis"
+    ],
+    "ct": [
+      "clinicians should use a diagnostic chest computed tomography",
+      "the use of\ncomputed tomography",
+      "computed tomography"
+    ],
+    "mri": [
+      "what is the role of brain magnetic resonance imaging"
+    ],
+    "sbrt": [
+      "salvage stereotactic body radiation therapy",
+      "stereotactic body radiotherapy"
+    ],
+    "cap": [
+      "pathologists"
+    ],
+    "iaslc": [
+      "international association for the\n\nstudy of lung cancer",
+      "pathology committee chair\nfor international association for the study of lung cancer",
+      "study of lung cancer",
+      "international association for the\nstudy of lung cancer",
+      "international association for\nthe study of lung cancer",
+      "the\ninternational association for the study of lung cancer"
+    ],
+    "amp": [
+      "association\nfor molecular pathology"
+    ],
+    "ihc": [
+      "immunohistochemistry"
+    ],
+    "ctc": [
+      "there is currently insufficient evidence to support the use of circulating tumor cell"
+    ],
+    "mars": [
+      "although the results from the mesothelioma and radical surgery"
+    ],
+    "os": [
+      "overall survival",
+      "the median\noverall survival"
+    ],
+    "elsevier": [
+      "clinical lung cancer"
+    ],
+    "rct": [
+      "one randomized controlled trial"
+    ],
+    "ps": [
+      "eastern cooperative oncology group performance\nstatus"
+    ],
+    "orr": [
+      "reuss et al\n\n\n\nrate",
+      "is result in a lower overall response\nrate"
+    ],
+    "pci": [
+      "prophylactic cranial irradiation"
+    ],
+    "fda": [
+      "osimertinib is approved by both the united states food and\ndrug administration",
+      "these results led to the food\n\nand drug administration",
+      "united states food and drug administration",
+      "food and drug administration",
+      "entrectinib received food and\ndrug administration"
+    ],
+    "crs": [
+      "cytokine release syndrome"
+    ],
+    "ikv": [
+      "department of surgical sciences"
+    ],
+    "who": [
+      "global",
+      "global statistics",
+      "the latest world health organization",
+      "world health organization",
+      "the recent world health organization"
+    ],
+    "lc": [
+      "these\nguidelines are restricted to lung carcinoid"
+    ],
+    "seer": [
+      "epidemiology and end results",
+      "end results"
+    ],
+    "uicc": [
+      "union for international cancer control",
+      "edition of the union for\ninternational cancer control",
+      "union for\ninternational cancer control",
+      "union for international\ncancer control"
+    ],
+    "gep": [
+      "based on\napproval and recommendations in gastroenteropancreatic"
+    ],
+    "pth": [
+      "annals of oncology\n\n\n\nparathyroid hormone"
+    ],
+    "rfa": [
+      "for these patients radiofrequency ablation",
+      "palliative surgery\nor radiofrequency ablation"
+    ],
+    "recist": [
+      "measurements and response assessment should follow\nresponse evaluation criteria in solid tumours",
+      "cs with response evaluation criteria\nin solid tumours",
+      "measurements and response assessment should follow response evaluation criteria in solid tumours"
+    ],
+    "gemox": [
+      "oxaliplatin combined with gemcitabine"
+    ],
+    "lan": [
+      "lanreotide autogel"
+    ],
+    "chuv": [
+      "centre hospitalier universitaire vaudois",
+      "centre hospitalier universitaire\nvaudois"
+    ],
+    "nlst": [
+      "national cancer institute\nannounced the results of the national lung cancer screening\ntrial",
+      "the much larger national lung cancer screening trial"
+    ],
+    "bts": [
+      "guidelines developed by the british thoracic society"
+    ],
+    "pet": [
+      "the latter recommend a lesser reliance on positron emission tomography"
+    ],
+    "nice": [
+      "national institute for health and care\nexcellence"
+    ],
+    "accp": [
+      "american college of chest physicians"
+    ],
+    "rcri": [
+      "evaluation of the cardiac risk assessment for lung resections\nby the recalibrated thoracic revised cardiac risk index"
+    ],
+    "lcsg": [
+      "based on the lung cancer study group"
+    ],
+    "egfr": [
+      "for cases with mutation in epidermal growth factor receptor"
+    ],
+    "rtog": [
+      "radiation therapy oncology group",
+      "data from a completed prospective\nradiation therapy oncology group"
+    ],
+    "esge": [
+      "european society of gastrointestinal endoscopy"
+    ],
+    "ers": [
+      "european respiratory society"
+    ],
+    "ests": [
+      "european\nsociety of thoracic surgeons",
+      "european society of thoracic surgeons"
+    ],
+    "thoracoscore": [
+      "the thoracic surgery scoring\nsystem",
+      "the thoracic surgery scoring system"
+    ],
+    "pulmonology": [
+      "respiratory oncology",
+      "respiratory oncology unit"
+    ],
+    "acs": [
+      "lung cancer screening guidelines published by the\namerican cancer society"
+    ],
+    "ialt": [
+      "some trials"
+    ],
+    "anita": [
+      "adjuvant navelbine international trialist association"
+    ],
+    "sabr": [
+      "radiographic changes after lung stereotactic\nablative radiotherapy"
+    ],
+    "vumc": [
+      "vrije\nuniversity medical centre",
+      "university medical centre"
+    ],
+    "ub": [
+      "bemeneed"
+    ],
+    "bms": [
+      "myers\nsquibb",
+      "bristol myers squibb",
+      "bristol myers\nsquibb",
+      "bristol\nmyers squibb"
+    ],
+    "msd": [
+      "dohme"
+    ],
+    "eortc": [
+      "chair of the european\norganisation for research and treatment of cancer",
+      "european\norganisation for research and treatment of cancer",
+      "treatment of cancer"
+    ],
+    "cpg": [
+      "clinical practice guideline"
+    ],
+    "escat": [
+      "targets",
+      "scale for clinical actionability of\nmolecular targets",
+      "scale for clinical actionability of molecular targets"
+    ],
+    "alk": [
+      "positive anaplastic lymphoma kinase"
+    ],
+    "ish": [
+      "detection is reliable by\nin situ hybridisation"
+    ],
+    "cns": [
+      "imaging of the central nervous system"
+    ],
+    "ajcc": [
+      "american joint committee on cancer",
+      "american joint\ncommittee on cancer"
+    ],
+    "vb": [
+      "ch cl"
+    ],
+    "ae": [
+      "serious adverse event"
+    ],
+    "ild": [
+      "interstitial lung disease"
+    ],
+    "kras": [
+      "the\nkirsten rat sarcoma virus"
+    ],
+    "lat": [
+      "data regarding the role of local ablative therapy"
+    ],
+    "eano": [
+      "oncology"
+    ],
+    "gsk": [
+      "kline"
+    ],
+    "nvalt": [
+      "nederlandse vereniging van artsen voor longziekten en tuberculose",
+      "lung cancer group and past secretary and current\nchair of the nederlandse vereniging van artsen voor longziekten en tuberculose"
+    ],
+    "atorg": [
+      "asian\nthoracic oncology research group"
+    ],
+    "clcrf": [
+      "chinese lung\ncancer research foundation limited"
+    ],
+    "csco": [
+      "chinese society of clinical oncology",
+      "chinese\nsociety of clinical oncology",
+      "china"
+    ],
+    "hkcf": [
+      "hong kong cancer fund"
+    ],
+    "hkcts": [
+      "hong kong cancer therapy society"
+    ],
+    "per": [
+      "education resource"
+    ],
+    "prime": [
+      "partnerships in international medical education"
+    ],
+    "rtp": [
+      "research\nto practice"
+    ],
+    "samo": [
+      "president of swiss\nacademy of multidisciplinary oncology"
+    ],
+    "sakk": [
+      "research",
+      "president of lung group for swiss group for clinical cancer\nresearch"
+    ],
+    "etop": [
+      "european thoracic oncology platform"
+    ],
+    "ibcsg": [
+      "international breast cancer study group"
+    ],
+    "aacr": [
+      "partners member\nof american association of cancer research"
+    ],
+    "asmac": [
+      "clinique"
+    ],
+    "basel": [
+      "cancers"
+    ],
+    "chmp": [
+      "summary of opinion",
+      "products for human use"
+    ],
+    "paga": [
+      "asian adapted"
+    ],
+    "ishmo": [
+      "indonesian society\nof hematology and medical oncology",
+      "indonesia"
+    ],
+    "jsmo": [
+      "japan",
+      "japanese society of medical oncology"
+    ],
+    "ksmo": [
+      "korean society for medical oncology",
+      "korea"
+    ],
+    "mos": [
+      "malaysia",
+      "malaysian oncological society"
+    ],
+    "psmo": [
+      "philippine society of medical\noncology",
+      "philippine society of\nmedical oncology",
+      "philippines"
+    ],
+    "sso": [
+      "singapore",
+      "singapore society of\noncology"
+    ],
+    "tos": [
+      "taiwan oncology society",
+      "taiwan"
+    ],
+    "tsco": [
+      "thai society of clinical oncology",
+      "thailand"
+    ],
+    "ismpo": [
+      "indian\nsociety of medical and paediatric oncology"
+    ],
+    "nsmpa": [
+      "national medical products\nadministration"
+    ],
+    "nmpa": [
+      "chinese national\nmedical products administration"
+    ],
+    "jkn": [
+      "the jaminan kesehatan nasional"
+    ],
+    "fornas": [
+      "national standard of\nmedication list",
+      "na\ntional drug formulary"
+    ],
+    "pap": [
+      "open\n\n\n\nprogram"
+    ],
+    "pdma": [
+      "it may take\nfrom several months to years for the pharmaceuticals and\nmedical devices agency"
+    ],
+    "nhi": [
+      "national health insurance"
+    ],
+    "torg": [
+      "thoracic oncology research\ngroup"
+    ],
+    "wjog": [
+      "west japan oncology group"
+    ],
+    "tsc": [
+      "trial steering committee"
+    ],
+    "idmc": [
+      "committee"
+    ],
+    "ukiog": [
+      "ireland oesophagogastric group"
+    ],
+    "lond": [
+      "cancer commun",
+      "commun"
+    ],
+    "nivo": [
+      "nivolumab"
+    ],
+    "pa": [
+      "philadelphia"
+    ],
+    "idibaps": [
+      "august pi i sunyer"
+    ],
+    "swog": [
+      "an historical southwest\n\noncology group"
+    ],
+    "cr": [
+      "patients with a complete response"
+    ],
+    "rova-t": [
+      "rovalpituzumab tesirine"
+    ],
+    "lausanne": [
+      "front med"
+    ],
+    "coordinating": [
+      "united kingdom"
+    ],
+    "ukcccr": [
+      "research"
+    ],
+    "icf": [
+      "international cancer foundation"
+    ],
+    "gbci": [
+      "global breast cancer initiative"
+    ],
+    "esco": [
+      "college of the european school of oncology"
+    ],
+    "eso": [
+      "european school of oncology"
+    ],
+    "sitc": [
+      "society for immunotherapy and cancer"
+    ],
+    "actionability": [
+      "scale for clinical"
+    ],
+    "disease": [
+      "centers for"
+    ],
+    "nlcsp": [
+      "australian national lung cancer screening program"
+    ],
+    "iii": [
+      "locally advanced"
+    ],
+    "iv": [
+      "advanced"
+    ],
+    "nsclc": [
+      "small cell lung cancer"
+    ],
+    "uk": [
+      "guidelines"
+    ]
+  }
+}

logs/app.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -36,3 +36,7 @@ torch==2.2.2+cpu
 python-docx==1.1.2
 reportlab==4.2.5

 python-docx==1.1.2
 reportlab==4.2.5
+# Authentication
+python-multipart>=0.0.18
+itsdangerous==2.2.0