Spaces:

benardo0
/

Nurses

Runtime error

App Files Files Community

benardo0 commited on Jan 22, 2025

Commit

b4ff37d

verified ·

1 Parent(s): e53bd9c

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -280

app.py CHANGED Viewed

@@ -1,305 +1,146 @@
-from fastapi import FastAPI, HTTPException, Request
-from pydantic import BaseModel
-from typing import List, Optional, Dict
-import gradio as gr
-import json
-from enum import Enum
-import re
 import os
-import time
-import gc
-from contextlib import asynccontextmanager
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-# Configuration variables that can be set through environment variables
-# These allow for flexible deployment configuration without code changes
-MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "mradermacher/Llama3-Med42-8B-GGUF")
-MODEL_FILENAME = os.getenv("MODEL_FILENAME", "Llama3-Med42-8B.Q5_K_M.gguf")
-N_THREADS = int(os.getenv("N_THREADS", "4"))
-# Data models for API request/response handling
-class ConsultationState(Enum):
-    INITIAL = "initial"
-    GATHERING_INFO = "gathering_info"
-    DIAGNOSIS = "diagnosis"
-class Message(BaseModel):
-    role: str
-    content: str
-class ChatRequest(BaseModel):
-    messages: List[Message]
-class ChatResponse(BaseModel):
-    response: str
-    finished: bool
-# Standardized health assessment questions for consistent patient evaluation
-HEALTH_ASSESSMENT_QUESTIONS = [
-    "What are your current symptoms and how long have you been experiencing them?",
-    "Do you have any pre-existing medical conditions or chronic illnesses?",
-    "Are you currently taking any medications? If yes, please list them.",
-    "Is there any relevant family medical history I should know about?",
-    "Have you had any similar symptoms in the past? If yes, what treatments worked?"
-]
-# AI assistant's identity and role definition
-NURSE_OGE_IDENTITY = """
-You are Nurse Oge, a medical AI assistant focused on serving patients in Nigeria. Always be empathetic,
-professional, and thorough in your assessments. When asked about your identity, explain that you are
-Nurse Oge, a medical AI assistant serving Nigerian communities. Remember that you must gather complete
-health information before providing any medical advice.
-"""
-class NurseOgeAssistant:
-    """
-    Main assistant class that handles conversation management and medical consultations
-    """
     def __init__(self):
         try:
-            # Initialize the Llama model using from_pretrained as per documentation
-            self.llm = Llama.from_pretrained(
-                repo_id=MODEL_REPO_ID,
-                filename=MODEL_FILENAME,
-                n_ctx=2048,      # Context window size
-                n_threads=N_THREADS,  # CPU threads to use
-                n_gpu_layers=0   # CPU-only inference
             )
-        except Exception as e:
-            raise RuntimeError(f"Failed to initialize the model: {str(e)}")
-        # State management for multiple concurrent conversations
-        self.consultation_states = {}
-        self.gathered_info = {}
-    def _is_identity_question(self, message: str) -> bool:
-        """Detect if the user is asking about the assistant's identity"""
-        identity_patterns = [
-            r"who are you",
-            r"what are you",
-            r"your name",
-            r"what should I call you",
-            r"tell me about yourself"
-        ]
-        return any(re.search(pattern, message.lower()) for pattern in identity_patterns)
-    def _is_location_question(self, message: str) -> bool:
-        """Detect if the user is asking about the assistant's location"""
-        location_patterns = [
-            r"where are you",
-            r"which country",
-            r"your location",
-            r"where do you work",
-            r"where are you based"
-        ]
-        return any(re.search(pattern, message.lower()) for pattern in location_patterns)
-    def _get_next_assessment_question(self, conversation_id: str) -> Optional[str]:
-        """Get the next health assessment question based on conversation progress"""
-        if conversation_id not in self.gathered_info:
-            self.gathered_info[conversation_id] = []
-        questions_asked = len(self.gathered_info[conversation_id])
-        if questions_asked < len(HEALTH_ASSESSMENT_QUESTIONS):
-            return HEALTH_ASSESSMENT_QUESTIONS[questions_asked]
-        return None
-    async def process_message(self, conversation_id: str, message: str, history: List[Dict]) -> ChatResponse:
-        """
-        Process incoming messages and manage the conversation flow
-        """
         try:
-            # Initialize state for new conversations
-            if conversation_id not in self.consultation_states:
-                self.consultation_states[conversation_id] = ConsultationState.INITIAL
-            # Handle identity questions
-            if self._is_identity_question(message):
-                return ChatResponse(
-                    response="I am Nurse Oge, a medical AI assistant dedicated to helping patients in Nigeria. "
-                            "I'm here to provide medical guidance while ensuring I gather all necessary health information "
-                            "for accurate assessments.",
-                    finished=True
-                )
-            # Handle location questions
-            if self._is_location_question(message):
-                return ChatResponse(
-                    response="I am based in Nigeria and specifically trained to serve Nigerian communities, "
-                            "taking into account local healthcare contexts and needs.",
-                    finished=True
-                )
-            # Start health assessment for medical queries
-            if self.consultation_states[conversation_id] == ConsultationState.INITIAL:
-                self.consultation_states[conversation_id] = ConsultationState.GATHERING_INFO
-                next_question = self._get_next_assessment_question(conversation_id)
-                return ChatResponse(
-                    response=f"Before I can provide any medical advice, I need to gather some important health information. "
-                            f"{next_question}",
-                    finished=False
                 )
-            # Continue gathering information
-            if self.consultation_states[conversation_id] == ConsultationState.GATHERING_INFO:
-                self.gathered_info[conversation_id].append(message)
-                next_question = self._get_next_assessment_question(conversation_id)
-                if next_question:
-                    return ChatResponse(
-                        response=f"Thank you for that information. {next_question}",
-                        finished=False
-                    )
-                else:
-                    self.consultation_states[conversation_id] = ConsultationState.DIAGNOSIS
-                    # Prepare context from gathered information
-                    context = "\n".join([
-                        f"Q: {q}\nA: {a}" for q, a in
-                        zip(HEALTH_ASSESSMENT_QUESTIONS, self.gathered_info[conversation_id])
-                    ])
-                    # Prepare messages for the model
-                    messages = [
-                        {"role": "system", "content": NURSE_OGE_IDENTITY},
-                        {"role": "user", "content": f"Based on the following patient information, provide thorough assessment, diagnosis and recommendations:\n\n{context}\n\nOriginal query: {message}"}
-                    ]
-                    # Implement retry logic for model inference
-                    max_retries = 3
-                    retry_delay = 2
-                    for attempt in range(max_retries):
-                        try:
-                            response = self.llm.create_chat_completion(
-                                messages=messages,
-                                max_tokens=512,
-                                temperature=0.7,
-                                top_p=0.95,
-                                stop=["</s>"]
-                            )
-                            break
-                        except Exception as e:
-                            if attempt < max_retries - 1:
-                                time.sleep(retry_delay)
-                                continue
-                            return ChatResponse(
-                                response="I'm sorry, I'm experiencing some technical difficulties. Please try again in a moment.",
-                                finished=True
-                            )
-                    # Reset conversation state
-                    self.consultation_states[conversation_id] = ConsultationState.INITIAL
-                    self.gathered_info[conversation_id] = []
-                    return ChatResponse(
-                        response=response['choices'][0]['message']['content'],
-                        finished=True
-                    )
-        except Exception as e:
-            return ChatResponse(
-                response=f"An error occurred while processing your request. Please try again.",
-                finished=True
             )
-# Define FastAPI lifespan for startup/shutdown events
-@asynccontextmanager
-async def lifespan(app: FastAPI):
-    # Initialize on startup
-    global nurse_oge
     try:
-        nurse_oge = NurseOgeAssistant()
     except Exception as e:
-        print(f"Failed to initialize NurseOgeAssistant: {e}")
-    yield
-    # Clean up on shutdown if needed
-    # Add cleanup code here
-# Initialize FastAPI with lifespan
-app = FastAPI(lifespan=lifespan)
-# Add memory management middleware
-@app.middleware("http")
-async def add_memory_management(request: Request, call_next):
-    """Middleware to help manage memory usage"""
-    gc.collect()
-    response = await call_next(request)
-    gc.collect()
-    return response
-# Health check endpoint
-@app.get("/health")
-async def health_check():
-    """Endpoint to verify service health"""
-    return {"status": "healthy", "model_loaded": nurse_oge is not None}
-# Chat endpoint
-@app.post("/chat")
-async def chat_endpoint(request: ChatRequest):
-    """Main chat endpoint for API interactions"""
-    if nurse_oge is None:
-        raise HTTPException(
-            status_code=503,
-            detail="The medical assistant is not available at the moment. Please try again later."
-        )
-    if not request.messages:
-        raise HTTPException(status_code=400, detail="No messages provided")
-    latest_message = request.messages[-1].content
-    response = await nurse_oge.process_message(
-        conversation_id="default",
-        message=latest_message,
-        history=request.messages[:-1]
-    )
-    return response
-# Gradio chat interface function
-async def gradio_chat(message, history):
-    """Handler for Gradio chat interface"""
-    if nurse_oge is None:
-        return "The medical assistant is not available at the moment. Please try again later."
-    response = await nurse_oge.process_message("gradio_user", message, history)
-    return response.response
-# Create and configure Gradio interface
 demo = gr.ChatInterface(
-    fn=gradio_chat,
-    title="Nurse Oge - Medical Assistant",
-    description="""Welcome to Nurse Oge, your AI medical assistant specialized in serving Nigerian communities.
-                   This system provides medical guidance while ensuring comprehensive health information gathering.""",
     examples=[
-        ["What are the common symptoms of malaria?"],
-        ["I've been having headaches for the past week"],
-        ["How can I prevent typhoid fever?"],
     ],
-    theme=gr.themes.Soft(
-        primary_hue="blue",
-        secondary_hue="purple",
-    )
 )
-# Add custom CSS for better appearance
-demo.css = """
-    .gradio-container {
-        font-family: 'Arial', sans-serif;
-    }
-    .chat-message {
-        padding: 1rem;
-        border-radius: 0.5rem;
-        margin-bottom: 0.5rem;
-    }
-"""
-# Mount both FastAPI and Gradio
-app = gr.mount_gradio_app(app, demo, path="/gradio")
-# Run the application
 if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import os
+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from typing import List, Dict
+import logging
+# Set up logging to help us debug model loading and inference
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class MedicalAssistant:
     def __init__(self):
+        """Initialize the medical assistant with model and tokenizer"""
         try:
+            logger.info("Starting model initialization...")
+            # Model configuration - adjust these based on your available compute
+            self.model_name = "mradermacher/Llama3-Med42-8B-GGUF"
+            self.max_length = 1048
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Using device: {self.device}")
+            # Load tokenizer first - this is typically faster and can catch issues early
+            logger.info("Loading tokenizer...")
+            self.tokenizer = AutoTokenizer.from_pretrained(
+                self.model_name,
+                padding_side="left",
+                trust_remote_code=True
             )
+            # Set padding token if not set
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Load model with memory optimizations
+            logger.info("Loading model...")
+            self.model = AutoModelForCausalLM.from_pretrained(
+                self.model_name,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                load_in_8bit=True,
+                trust_remote_code=True
+            )
+            logger.info("Model initialization completed successfully!")
+        except Exception as e:
+            logger.error(f"Error during initialization: {str(e)}")
+            raise
+    def generate_response(self, message: str, chat_history: List[Dict] = None) -> str:
+        """Generate a response to the user's message"""
         try:
+            # Prepare the prompt
+            system_prompt = """You are a medical AI assistant. Respond to medical queries
+            professionally and accurately. If you're unsure, always recommend consulting
+            with a healthcare provider."""
+            # Combine system prompt, chat history, and current message
+            full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:"
+            # Tokenize input
+            inputs = self.tokenizer(
+                full_prompt,
+                return_tensors="pt",
+                padding=True,
+                truncation=True,
+                max_length=self.max_length
+            ).to(self.device)
+            # Generate response
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=512,
+                    do_sample=True,
+                    temperature=0.7,
+                    top_p=0.95,
+                    pad_token_id=self.tokenizer.pad_token_id,
+                    repetition_penalty=1.1
                 )
+            # Decode and clean up response
+            response = self.tokenizer.decode(
+                outputs[0],
+                skip_special_tokens=True
             )
+            # Extract just the assistant's response
+            response = response.split("Assistant:")[-1].strip()
+            return response
+        except Exception as e:
+            logger.error(f"Error during response generation: {str(e)}")
+            return f"I apologize, but I encountered an error. Please try again."
+# Initialize the assistant
+assistant = None
+def initialize_assistant():
+    """Initialize the assistant and handle any errors"""
+    global assistant
     try:
+        assistant = MedicalAssistant()
+        return True
     except Exception as e:
+        logger.error(f"Failed to initialize assistant: {str(e)}")
+        return False
+def chat_response(message: str, history: List[Dict]):
+    """Handle chat messages and return responses"""
+    global assistant
+    # Check if assistant is initialized
+    if assistant is None:
+        if not initialize_assistant():
+            return "I apologize, but I'm currently unavailable. Please try again later."
+    try:
+        return assistant.generate_response(message, history)
+    except Exception as e:
+        logger.error(f"Error in chat response: {str(e)}")
+        return "I encountered an error. Please try again."
+# Create Gradio interface
 demo = gr.ChatInterface(
+    fn=chat_response,
+    title="Medical Assistant (Test Version)",
+    description="""This is a test version of the medical assistant.
+                   Please use it to verify basic functionality.""",
     examples=[
+        "What are the symptoms of malaria?",
+        "How can I prevent type 2 diabetes?",
+        "What should I do for a mild headache?"
     ],
+    # retry_btn=None,
+    # undo_btn=None,
+    # clear_btn="Clear"
 )
+# Launch the interface
 if __name__ == "__main__":
+    demo.launch()