Spaces:

benardo0
/

Nurses

Runtime error

App Files Files Community

benardo0 commited on Jan 22, 2025

Commit

e53bd9c

verified ·

1 Parent(s): 94dc8bb

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -35

app.py CHANGED Viewed

@@ -8,15 +8,17 @@ import re
 import os
 import time
 import gc
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 # Configuration variables that can be set through environment variables
 MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "mradermacher/Llama3-Med42-8B-GGUF")
 MODEL_FILENAME = os.getenv("MODEL_FILENAME", "Llama3-Med42-8B.Q5_K_M.gguf")
 N_THREADS = int(os.getenv("N_THREADS", "4"))
-# Define our data models for API requests and responses
 class ConsultationState(Enum):
     INITIAL = "initial"
     GATHERING_INFO = "gathering_info"
@@ -33,7 +35,7 @@ class ChatResponse(BaseModel):
     response: str
     finished: bool
-# Define our standard health assessment questions
 HEALTH_ASSESSMENT_QUESTIONS = [
     "What are your current symptoms and how long have you been experiencing them?",
     "Do you have any pre-existing medical conditions or chronic illnesses?",
@@ -42,7 +44,7 @@ HEALTH_ASSESSMENT_QUESTIONS = [
     "Have you had any similar symptoms in the past? If yes, what treatments worked?"
 ]
-# Define the AI assistant's identity and role
 NURSE_OGE_IDENTITY = """
 You are Nurse Oge, a medical AI assistant focused on serving patients in Nigeria. Always be empathetic,
 professional, and thorough in your assessments. When asked about your identity, explain that you are
@@ -51,32 +53,29 @@ health information before providing any medical advice.
 """
 class NurseOgeAssistant:
     def __init__(self):
         try:
-            # Download the model file from Hugging Face
-            model_path = hf_hub_download(
                 repo_id=MODEL_REPO_ID,
                 filename=MODEL_FILENAME,
-                resume_download=True
-            )
-            # Initialize the Llama model with appropriate parameters
-            self.llm = Llama(
-                model_path=model_path,
                 n_ctx=2048,      # Context window size
                 n_threads=N_THREADS,  # CPU threads to use
-                n_gpu_layers=0,  # CPU-only inference
-                verbose=False    # Set to True for debugging
             )
         except Exception as e:
             raise RuntimeError(f"Failed to initialize the model: {str(e)}")
-        # Initialize conversation state management
         self.consultation_states = {}
         self.gathered_info = {}
     def _is_identity_question(self, message: str) -> bool:
         identity_patterns = [
             r"who are you",
             r"what are you",
@@ -87,6 +86,7 @@ class NurseOgeAssistant:
         return any(re.search(pattern, message.lower()) for pattern in identity_patterns)
     def _is_location_question(self, message: str) -> bool:
         location_patterns = [
             r"where are you",
             r"which country",
@@ -97,6 +97,7 @@ class NurseOgeAssistant:
         return any(re.search(pattern, message.lower()) for pattern in location_patterns)
     def _get_next_assessment_question(self, conversation_id: str) -> Optional[str]:
         if conversation_id not in self.gathered_info:
             self.gathered_info[conversation_id] = []
@@ -106,6 +107,9 @@ class NurseOgeAssistant:
         return None
     async def process_message(self, conversation_id: str, message: str, history: List[Dict]) -> ChatResponse:
         try:
             # Initialize state for new conversations
             if conversation_id not in self.consultation_states:
@@ -159,7 +163,7 @@ class NurseOgeAssistant:
                     # Prepare messages for the model
                     messages = [
                         {"role": "system", "content": NURSE_OGE_IDENTITY},
-                        {"role": "user", "content": f"Based on the following patient information, provide a thorough assessment and recommendations:\n\n{context}\n\nOriginal query: {message}"}
                     ]
                     # Implement retry logic for model inference
@@ -200,37 +204,41 @@ class NurseOgeAssistant:
                 finished=True
             )
-# Initialize FastAPI
-app = FastAPI()
-# Create a global variable for our assistant
-nurse_oge = None
 # Add memory management middleware
 @app.middleware("http")
 async def add_memory_management(request: Request, call_next):
     gc.collect()
     response = await call_next(request)
     gc.collect()
     return response
-# Initialize the assistant during startup
-@app.on_event("startup")
-async def startup_event():
-    global nurse_oge
-    try:
-        nurse_oge = NurseOgeAssistant()
-    except Exception as e:
-        print(f"Failed to initialize NurseOgeAssistant: {e}")
 # Health check endpoint
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "model_loaded": nurse_oge is not None}
 # Chat endpoint
 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
     if nurse_oge is None:
         raise HTTPException(
             status_code=503,
@@ -251,14 +259,15 @@ async def chat_endpoint(request: ChatRequest):
     return response
 # Gradio chat interface function
-def gradio_chat(message, history):
     if nurse_oge is None:
         return "The medical assistant is not available at the moment. Please try again later."
-    response = nurse_oge.process_message("gradio_user", message, history)
     return response.response
-# Create and configure Gradio interface with enhanced styling
 demo = gr.ChatInterface(
     fn=gradio_chat,
     title="Nurse Oge - Medical Assistant",
@@ -272,10 +281,7 @@ demo = gr.ChatInterface(
     theme=gr.themes.Soft(
         primary_hue="blue",
         secondary_hue="purple",
-    ),
-    retry_btn="Try Again",
-    undo_btn="Undo Last",
-    clear_btn="Clear Chat"
 )
 # Add custom CSS for better appearance

 import os
 import time
 import gc
+from contextlib import asynccontextmanager
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 # Configuration variables that can be set through environment variables
+# These allow for flexible deployment configuration without code changes
 MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "mradermacher/Llama3-Med42-8B-GGUF")
 MODEL_FILENAME = os.getenv("MODEL_FILENAME", "Llama3-Med42-8B.Q5_K_M.gguf")
 N_THREADS = int(os.getenv("N_THREADS", "4"))
+# Data models for API request/response handling
 class ConsultationState(Enum):
     INITIAL = "initial"
     GATHERING_INFO = "gathering_info"
     response: str
     finished: bool
+# Standardized health assessment questions for consistent patient evaluation
 HEALTH_ASSESSMENT_QUESTIONS = [
     "What are your current symptoms and how long have you been experiencing them?",
     "Do you have any pre-existing medical conditions or chronic illnesses?",
     "Have you had any similar symptoms in the past? If yes, what treatments worked?"
 ]
+# AI assistant's identity and role definition
 NURSE_OGE_IDENTITY = """
 You are Nurse Oge, a medical AI assistant focused on serving patients in Nigeria. Always be empathetic,
 professional, and thorough in your assessments. When asked about your identity, explain that you are
 """
 class NurseOgeAssistant:
+    """
+    Main assistant class that handles conversation management and medical consultations
+    """
     def __init__(self):
         try:
+            # Initialize the Llama model using from_pretrained as per documentation
+            self.llm = Llama.from_pretrained(
                 repo_id=MODEL_REPO_ID,
                 filename=MODEL_FILENAME,
                 n_ctx=2048,      # Context window size
                 n_threads=N_THREADS,  # CPU threads to use
+                n_gpu_layers=0   # CPU-only inference
             )
         except Exception as e:
             raise RuntimeError(f"Failed to initialize the model: {str(e)}")
+        # State management for multiple concurrent conversations
         self.consultation_states = {}
         self.gathered_info = {}
     def _is_identity_question(self, message: str) -> bool:
+        """Detect if the user is asking about the assistant's identity"""
         identity_patterns = [
             r"who are you",
             r"what are you",
         return any(re.search(pattern, message.lower()) for pattern in identity_patterns)
     def _is_location_question(self, message: str) -> bool:
+        """Detect if the user is asking about the assistant's location"""
         location_patterns = [
             r"where are you",
             r"which country",
         return any(re.search(pattern, message.lower()) for pattern in location_patterns)
     def _get_next_assessment_question(self, conversation_id: str) -> Optional[str]:
+        """Get the next health assessment question based on conversation progress"""
         if conversation_id not in self.gathered_info:
             self.gathered_info[conversation_id] = []
         return None
     async def process_message(self, conversation_id: str, message: str, history: List[Dict]) -> ChatResponse:
+        """
+        Process incoming messages and manage the conversation flow
+        """
         try:
             # Initialize state for new conversations
             if conversation_id not in self.consultation_states:
                     # Prepare messages for the model
                     messages = [
                         {"role": "system", "content": NURSE_OGE_IDENTITY},
+                        {"role": "user", "content": f"Based on the following patient information, provide thorough assessment, diagnosis and recommendations:\n\n{context}\n\nOriginal query: {message}"}
                     ]
                     # Implement retry logic for model inference
                 finished=True
             )
+# Define FastAPI lifespan for startup/shutdown events
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Initialize on startup
+    global nurse_oge
+    try:
+        nurse_oge = NurseOgeAssistant()
+    except Exception as e:
+        print(f"Failed to initialize NurseOgeAssistant: {e}")
+    yield
+    # Clean up on shutdown if needed
+    # Add cleanup code here
+# Initialize FastAPI with lifespan
+app = FastAPI(lifespan=lifespan)
 # Add memory management middleware
 @app.middleware("http")
 async def add_memory_management(request: Request, call_next):
+    """Middleware to help manage memory usage"""
     gc.collect()
     response = await call_next(request)
     gc.collect()
     return response
 # Health check endpoint
 @app.get("/health")
 async def health_check():
+    """Endpoint to verify service health"""
     return {"status": "healthy", "model_loaded": nurse_oge is not None}
 # Chat endpoint
 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
+    """Main chat endpoint for API interactions"""
     if nurse_oge is None:
         raise HTTPException(
             status_code=503,
     return response
 # Gradio chat interface function
+async def gradio_chat(message, history):
+    """Handler for Gradio chat interface"""
     if nurse_oge is None:
         return "The medical assistant is not available at the moment. Please try again later."
+    response = await nurse_oge.process_message("gradio_user", message, history)
     return response.response
+# Create and configure Gradio interface
 demo = gr.ChatInterface(
     fn=gradio_chat,
     title="Nurse Oge - Medical Assistant",
     theme=gr.themes.Soft(
         primary_hue="blue",
         secondary_hue="purple",
+    )
 )
 # Add custom CSS for better appearance