Spaces:

benardo0
/

Nurses

Runtime error

App Files Files Community

benardo0 commited on Jan 22, 2025

Commit

94dc8bb

verified ·

1 Parent(s): b1de9b2

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -28

app.py CHANGED Viewed

@@ -9,20 +9,14 @@ import os
 import time
 import gc
 from huggingface_hub import hf_hub_download
-# Environment variables for configuration
 MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "mradermacher/Llama3-Med42-8B-GGUF")
-MODEL_FILENAME = os.getenv("MODEL_FILENAME", "Llama3-Med42-8B.Q4_K_M.gguf")
 N_THREADS = int(os.getenv("N_THREADS", "4"))
-# Import llama_cpp with error handling for better debugging
-try:
-    from llama_cpp import Llama
-    LLAMA_IMPORT_ERROR = None
-except Exception as e:
-    LLAMA_IMPORT_ERROR = str(e)
-    print(f"Warning: Failed to import llama_cpp: {e}")
 class ConsultationState(Enum):
     INITIAL = "initial"
     GATHERING_INFO = "gathering_info"
@@ -39,7 +33,7 @@ class ChatResponse(BaseModel):
     response: str
     finished: bool
-# Standard health assessment questions for thorough patient evaluation
 HEALTH_ASSESSMENT_QUESTIONS = [
     "What are your current symptoms and how long have you been experiencing them?",
     "Do you have any pre-existing medical conditions or chronic illnesses?",
@@ -58,22 +52,27 @@ health information before providing any medical advice.
 class NurseOgeAssistant:
     def __init__(self):
-        if LLAMA_IMPORT_ERROR:
-            raise ImportError(f"Cannot initialize NurseOgeAssistant due to llama_cpp import error: {LLAMA_IMPORT_ERROR}")
         try:
-            # Initialize the model using from_pretrained for better compatibility with free tier
-            self.llm = Llama.from_pretrained(
                 repo_id=MODEL_REPO_ID,
                 filename=MODEL_FILENAME,
                 n_ctx=2048,      # Context window size
-                n_threads=N_THREADS,  # Adjust based on available CPU resources
-                n_gpu_layers=0   # CPU-only inference for free tier
             )
         except Exception as e:
             raise RuntimeError(f"Failed to initialize the model: {str(e)}")
         self.consultation_states = {}
         self.gathered_info = {}
@@ -151,17 +150,19 @@ class NurseOgeAssistant:
                     )
                 else:
                     self.consultation_states[conversation_id] = ConsultationState.DIAGNOSIS
                     context = "\n".join([
                         f"Q: {q}\nA: {a}" for q, a in
                         zip(HEALTH_ASSESSMENT_QUESTIONS, self.gathered_info[conversation_id])
                     ])
                     messages = [
                         {"role": "system", "content": NURSE_OGE_IDENTITY},
                         {"role": "user", "content": f"Based on the following patient information, provide a thorough assessment and recommendations:\n\n{context}\n\nOriginal query: {message}"}
                     ]
-                    # Implement retry logic for API calls
                     max_retries = 3
                     retry_delay = 2
@@ -169,8 +170,10 @@ class NurseOgeAssistant:
                         try:
                             response = self.llm.create_chat_completion(
                                 messages=messages,
-                                max_tokens=512,  # Reduced for free tier
-                                temperature=0.7
                             )
                             break
                         except Exception as e:
@@ -182,6 +185,7 @@ class NurseOgeAssistant:
                                 finished=True
                             )
                     self.consultation_states[conversation_id] = ConsultationState.INITIAL
                     self.gathered_info[conversation_id] = []
@@ -205,11 +209,12 @@ nurse_oge = None
 # Add memory management middleware
 @app.middleware("http")
 async def add_memory_management(request: Request, call_next):
-    gc.collect()  # Force garbage collection before processing request
     response = await call_next(request)
-    gc.collect()  # Clean up after request
     return response
 @app.on_event("startup")
 async def startup_event():
     global nurse_oge
@@ -218,10 +223,12 @@ async def startup_event():
     except Exception as e:
         print(f"Failed to initialize NurseOgeAssistant: {e}")
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "model_loaded": nurse_oge is not None}
 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
     if nurse_oge is None:
@@ -243,7 +250,7 @@ async def chat_endpoint(request: ChatRequest):
     return response
-# Gradio interface
 def gradio_chat(message, history):
     if nurse_oge is None:
         return "The medical assistant is not available at the moment. Please try again later."
@@ -251,17 +258,42 @@ def gradio_chat(message, history):
     response = nurse_oge.process_message("gradio_user", message, history)
     return response.response
-# Create and configure Gradio interface
 demo = gr.ChatInterface(
     fn=gradio_chat,
-    title="Nurse Oge",
-    description="Finetuned llama 3.0 for medical diagnosis and all. This is just a demo",
-    theme="soft"
 )
 # Mount both FastAPI and Gradio
 app = gr.mount_gradio_app(app, demo, path="/gradio")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

 import time
 import gc
 from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+# Configuration variables that can be set through environment variables
 MODEL_REPO_ID = os.getenv("MODEL_REPO_ID", "mradermacher/Llama3-Med42-8B-GGUF")
+MODEL_FILENAME = os.getenv("MODEL_FILENAME", "Llama3-Med42-8B.Q5_K_M.gguf")
 N_THREADS = int(os.getenv("N_THREADS", "4"))
+# Define our data models for API requests and responses
 class ConsultationState(Enum):
     INITIAL = "initial"
     GATHERING_INFO = "gathering_info"
     response: str
     finished: bool
+# Define our standard health assessment questions
 HEALTH_ASSESSMENT_QUESTIONS = [
     "What are your current symptoms and how long have you been experiencing them?",
     "Do you have any pre-existing medical conditions or chronic illnesses?",
 class NurseOgeAssistant:
     def __init__(self):
         try:
+            # Download the model file from Hugging Face
+            model_path = hf_hub_download(
                 repo_id=MODEL_REPO_ID,
                 filename=MODEL_FILENAME,
+                resume_download=True
+            )
+            # Initialize the Llama model with appropriate parameters
+            self.llm = Llama(
+                model_path=model_path,
                 n_ctx=2048,      # Context window size
+                n_threads=N_THREADS,  # CPU threads to use
+                n_gpu_layers=0,  # CPU-only inference
+                verbose=False    # Set to True for debugging
             )
         except Exception as e:
             raise RuntimeError(f"Failed to initialize the model: {str(e)}")
+        # Initialize conversation state management
         self.consultation_states = {}
         self.gathered_info = {}
                     )
                 else:
                     self.consultation_states[conversation_id] = ConsultationState.DIAGNOSIS
+                    # Prepare context from gathered information
                     context = "\n".join([
                         f"Q: {q}\nA: {a}" for q, a in
                         zip(HEALTH_ASSESSMENT_QUESTIONS, self.gathered_info[conversation_id])
                     ])
+                    # Prepare messages for the model
                     messages = [
                         {"role": "system", "content": NURSE_OGE_IDENTITY},
                         {"role": "user", "content": f"Based on the following patient information, provide a thorough assessment and recommendations:\n\n{context}\n\nOriginal query: {message}"}
                     ]
+                    # Implement retry logic for model inference
                     max_retries = 3
                     retry_delay = 2
                         try:
                             response = self.llm.create_chat_completion(
                                 messages=messages,
+                                max_tokens=512,
+                                temperature=0.7,
+                                top_p=0.95,
+                                stop=["</s>"]
                             )
                             break
                         except Exception as e:
                                 finished=True
                             )
+                    # Reset conversation state
                     self.consultation_states[conversation_id] = ConsultationState.INITIAL
                     self.gathered_info[conversation_id] = []
 # Add memory management middleware
 @app.middleware("http")
 async def add_memory_management(request: Request, call_next):
+    gc.collect()
     response = await call_next(request)
+    gc.collect()
     return response
+# Initialize the assistant during startup
 @app.on_event("startup")
 async def startup_event():
     global nurse_oge
     except Exception as e:
         print(f"Failed to initialize NurseOgeAssistant: {e}")
+# Health check endpoint
 @app.get("/health")
 async def health_check():
     return {"status": "healthy", "model_loaded": nurse_oge is not None}
+# Chat endpoint
 @app.post("/chat")
 async def chat_endpoint(request: ChatRequest):
     if nurse_oge is None:
     return response
+# Gradio chat interface function
 def gradio_chat(message, history):
     if nurse_oge is None:
         return "The medical assistant is not available at the moment. Please try again later."
     response = nurse_oge.process_message("gradio_user", message, history)
     return response.response
+# Create and configure Gradio interface with enhanced styling
 demo = gr.ChatInterface(
     fn=gradio_chat,
+    title="Nurse Oge - Medical Assistant",
+    description="""Welcome to Nurse Oge, your AI medical assistant specialized in serving Nigerian communities.
+                   This system provides medical guidance while ensuring comprehensive health information gathering.""",
+    examples=[
+        ["What are the common symptoms of malaria?"],
+        ["I've been having headaches for the past week"],
+        ["How can I prevent typhoid fever?"],
+    ],
+    theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="purple",
+    ),
+    retry_btn="Try Again",
+    undo_btn="Undo Last",
+    clear_btn="Clear Chat"
 )
+# Add custom CSS for better appearance
+demo.css = """
+    .gradio-container {
+        font-family: 'Arial', sans-serif;
+    }
+    .chat-message {
+        padding: 1rem;
+        border-radius: 0.5rem;
+        margin-bottom: 0.5rem;
+    }
+"""
 # Mount both FastAPI and Gradio
 app = gr.mount_gradio_app(app, demo, path="/gradio")
+# Run the application
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)