Spaces:

Axcel1
/

Medical_Chatbot

Sleeping

App Files Files Community

Axcel1 commited on Jul 22, 2025

Commit

67b8b63

verified ·

1 Parent(s): 867915a

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -19

app.py CHANGED Viewed

@@ -17,6 +17,16 @@ except ImportError:
 model = None
 model_loaded = False
 # HuggingFace repository information
 HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
 HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
@@ -148,7 +158,7 @@ def load_model_from_gguf(gguf_path=None, filename=None, n_ctx=2048, use_hf_downl
         print(error_msg)
         return False, f"❌ {error_msg}"
-def generate_response_stream(message, history, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
     """Generate response from the model with streaming"""
     global model, model_loaded
@@ -160,6 +170,10 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
         # Format the conversation history for Llama-3
         conversation = []
         # Add conversation history
         for human, assistant in history:
             conversation.append({"role": "user", "content": human})
@@ -190,7 +204,7 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
     except Exception as e:
         yield f"Error generating response: {str(e)}"
-def chat_interface(message, history, max_tokens, temperature, top_p, repeat_penalty):
     """Main chat interface function"""
     if not message.strip():
         return history, ""
@@ -203,7 +217,7 @@ def chat_interface(message, history, max_tokens, temperature, top_p, repeat_pena
     history = history + [(message, "")]
     # Generate response
-    for response in generate_response_stream(message, history[:-1], max_tokens, temperature, top_p, repeat_penalty):
         history[-1] = (message, response)
         yield history, ""
@@ -211,6 +225,10 @@ def clear_chat():
     """Clear the chat history"""
     return [], ""
 def load_model_interface(context_size, selected_model):
     """Interface function to load model with configurable context size"""
     success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
@@ -272,9 +290,25 @@ def create_interface():
         with gr.Row():
             with gr.Column(scale=4):
                 # Chat interface
                 chatbot = gr.Chatbot(
-                    height=500,
                     show_copy_button=True,
                     bubble_full_width=False,
                     show_label=False,
@@ -295,8 +329,6 @@ def create_interface():
                 # Model loading section
                 gr.HTML("<h3>🔧 Model Control</h3>")
-                # gr.HTML(f"<p style='font-size: 0.9em; color: #666;'><strong>Repository:</strong> {HF_REPO_ID}</p>")
                 # Model selection dropdown
                 model_dropdown = gr.Dropdown(
                     choices=initial_choices,
@@ -305,6 +337,16 @@ def create_interface():
                     info="Choose from available models in the repository",
                     interactive=True
                 )
                 load_btn = gr.Button("Load Model", variant="primary", size="lg")
                 model_status = gr.Textbox(
@@ -316,16 +358,7 @@ def create_interface():
                 # Generation parameters
                 gr.HTML("<h3>⚙️ Generation Settings</h3>")
-                # Context size (limited for Spaces)
-                context_size = gr.Slider(
-                    minimum=512,
-                    maximum=4096,
-                    value=2048,
-                    step=256,
-                    label="Context Size",
-                    info="Token context window (requires model reload)"
-                )
                 max_tokens = gr.Slider(
                     minimum=50,
@@ -367,7 +400,7 @@ def create_interface():
                     <p><strong>Quantization:</strong> Q4_K_M</p>
                     <p><strong>Format:</strong> GGUF (optimized)</p>
                     <p><strong>Backend:</strong> llama-cpp-python</p>
-                    <p><strong>Features:</strong> CPU/GPU support, streaming</p>
                     <p><strong>Specialty:</strong> Medical assistance</p>
                     <p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
                 """)
@@ -392,13 +425,13 @@ def create_interface():
         submit_btn.click(
             chat_interface,
-            inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
             outputs=[chatbot, msg]
         )
         msg.submit(
             chat_interface,
-            inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
             outputs=[chatbot, msg]
         )
@@ -407,6 +440,11 @@ def create_interface():
             outputs=[chatbot, msg]
         )
     return demo
 if __name__ == "__main__":

 model = None
 model_loaded = False
+# Default system prompt
+DEFAULT_SYSTEM_PROMPT = """You are MMed-Llama-Alpaca, a helpful AI assistant specialized in medical and healthcare topics. You provide accurate, evidence-based information while being empathetic and understanding.
+Important guidelines:
+- Always remind users that your responses are for educational purposes only
+- Encourage users to consult healthcare professionals for medical advice
+- Be thorough but clear in your explanations
+- If unsure about medical information, acknowledge limitations
+- Maintain a professional yet caring tone"""
 # HuggingFace repository information
 HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
 HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
         print(error_msg)
         return False, f"❌ {error_msg}"
+def generate_response_stream(message, history, system_prompt, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
     """Generate response from the model with streaming"""
     global model, model_loaded
         # Format the conversation history for Llama-3
         conversation = []
+        # Add system prompt if provided
+        if system_prompt and system_prompt.strip():
+            conversation.append({"role": "system", "content": system_prompt.strip()})
         # Add conversation history
         for human, assistant in history:
             conversation.append({"role": "user", "content": human})
     except Exception as e:
         yield f"Error generating response: {str(e)}"
+def chat_interface(message, history, system_prompt, max_tokens, temperature, top_p, repeat_penalty):
     """Main chat interface function"""
     if not message.strip():
         return history, ""
     history = history + [(message, "")]
     # Generate response
+    for response in generate_response_stream(message, history[:-1], system_prompt, max_tokens, temperature, top_p, repeat_penalty):
         history[-1] = (message, response)
         yield history, ""
     """Clear the chat history"""
     return [], ""
+def reset_system_prompt():
+    """Reset system prompt to default"""
+    return DEFAULT_SYSTEM_PROMPT
 def load_model_interface(context_size, selected_model):
     """Interface function to load model with configurable context size"""
     success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
         with gr.Row():
             with gr.Column(scale=4):
+                # System prompt configuration
+                gr.HTML("<h3>🎯 System Prompt Configuration</h3>")
+                with gr.Row():
+                    system_prompt = gr.Textbox(
+                        label="System Prompt",
+                        value=DEFAULT_SYSTEM_PROMPT,
+                        placeholder="Enter system prompt to define the AI's behavior and role...",
+                        lines=4,
+                        max_lines=8,
+                        scale=4,
+                        autoscroll=True,
+                    )
+                    # with gr.Column(scale=1):
+                    #     reset_prompt_btn = gr.Button("Reset to Default", variant="secondary", size="sm")
+                    #     gr.HTML("<p style='font-size: 0.8em; color: #666; margin-top: 10px;'>The system prompt defines how the AI should behave and respond. Changes apply to new conversations.</p>")
                 # Chat interface
                 chatbot = gr.Chatbot(
+                    height=400,
                     show_copy_button=True,
                     bubble_full_width=False,
                     show_label=False,
                 # Model loading section
                 gr.HTML("<h3>🔧 Model Control</h3>")
                 # Model selection dropdown
                 model_dropdown = gr.Dropdown(
                     choices=initial_choices,
                     info="Choose from available models in the repository",
                     interactive=True
                 )
+                # Context size (limited for Spaces)
+                context_size = gr.Slider(
+                    minimum=512,
+                    maximum=8192,
+                    value=2048,
+                    step=256,
+                    label="Context Size",
+                    info="Token context window (requires model reload)"
+                )
                 load_btn = gr.Button("Load Model", variant="primary", size="lg")
                 model_status = gr.Textbox(
                 # Generation parameters
                 gr.HTML("<h3>⚙️ Generation Settings</h3>")
                 max_tokens = gr.Slider(
                     minimum=50,
                     <p><strong>Quantization:</strong> Q4_K_M</p>
                     <p><strong>Format:</strong> GGUF (optimized)</p>
                     <p><strong>Backend:</strong> llama-cpp-python</p>
+                    <p><strong>Features:</strong> CPU/GPU support, streaming, system prompts</p>
                     <p><strong>Specialty:</strong> Medical assistance</p>
                     <p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
                 """)
         submit_btn.click(
             chat_interface,
+            inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
             outputs=[chatbot, msg]
         )
         msg.submit(
             chat_interface,
+            inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
             outputs=[chatbot, msg]
         )
             outputs=[chatbot, msg]
         )
+        # reset_prompt_btn.click(
+        #     reset_system_prompt,
+        #     outputs=system_prompt
+        # )
     return demo
 if __name__ == "__main__":