Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,6 +17,16 @@ except ImportError:
|
|
| 17 |
model = None
|
| 18 |
model_loaded = False
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# HuggingFace repository information
|
| 21 |
HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
|
| 22 |
HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
|
|
@@ -148,7 +158,7 @@ def load_model_from_gguf(gguf_path=None, filename=None, n_ctx=2048, use_hf_downl
|
|
| 148 |
print(error_msg)
|
| 149 |
return False, f"❌ {error_msg}"
|
| 150 |
|
| 151 |
-
def generate_response_stream(message, history, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
|
| 152 |
"""Generate response from the model with streaming"""
|
| 153 |
global model, model_loaded
|
| 154 |
|
|
@@ -160,6 +170,10 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
|
|
| 160 |
# Format the conversation history for Llama-3
|
| 161 |
conversation = []
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
# Add conversation history
|
| 164 |
for human, assistant in history:
|
| 165 |
conversation.append({"role": "user", "content": human})
|
|
@@ -190,7 +204,7 @@ def generate_response_stream(message, history, max_tokens=512, temperature=0.7,
|
|
| 190 |
except Exception as e:
|
| 191 |
yield f"Error generating response: {str(e)}"
|
| 192 |
|
| 193 |
-
def chat_interface(message, history, max_tokens, temperature, top_p, repeat_penalty):
|
| 194 |
"""Main chat interface function"""
|
| 195 |
if not message.strip():
|
| 196 |
return history, ""
|
|
@@ -203,7 +217,7 @@ def chat_interface(message, history, max_tokens, temperature, top_p, repeat_pena
|
|
| 203 |
history = history + [(message, "")]
|
| 204 |
|
| 205 |
# Generate response
|
| 206 |
-
for response in generate_response_stream(message, history[:-1], max_tokens, temperature, top_p, repeat_penalty):
|
| 207 |
history[-1] = (message, response)
|
| 208 |
yield history, ""
|
| 209 |
|
|
@@ -211,6 +225,10 @@ def clear_chat():
|
|
| 211 |
"""Clear the chat history"""
|
| 212 |
return [], ""
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
def load_model_interface(context_size, selected_model):
|
| 215 |
"""Interface function to load model with configurable context size"""
|
| 216 |
success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
|
|
@@ -272,9 +290,25 @@ def create_interface():
|
|
| 272 |
|
| 273 |
with gr.Row():
|
| 274 |
with gr.Column(scale=4):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
# Chat interface
|
| 276 |
chatbot = gr.Chatbot(
|
| 277 |
-
height=
|
| 278 |
show_copy_button=True,
|
| 279 |
bubble_full_width=False,
|
| 280 |
show_label=False,
|
|
@@ -295,8 +329,6 @@ def create_interface():
|
|
| 295 |
# Model loading section
|
| 296 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
| 297 |
|
| 298 |
-
# gr.HTML(f"<p style='font-size: 0.9em; color: #666;'><strong>Repository:</strong> {HF_REPO_ID}</p>")
|
| 299 |
-
|
| 300 |
# Model selection dropdown
|
| 301 |
model_dropdown = gr.Dropdown(
|
| 302 |
choices=initial_choices,
|
|
@@ -305,6 +337,16 @@ def create_interface():
|
|
| 305 |
info="Choose from available models in the repository",
|
| 306 |
interactive=True
|
| 307 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
|
| 309 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
| 310 |
model_status = gr.Textbox(
|
|
@@ -316,16 +358,7 @@ def create_interface():
|
|
| 316 |
|
| 317 |
# Generation parameters
|
| 318 |
gr.HTML("<h3>⚙️ Generation Settings</h3>")
|
| 319 |
-
|
| 320 |
-
# Context size (limited for Spaces)
|
| 321 |
-
context_size = gr.Slider(
|
| 322 |
-
minimum=512,
|
| 323 |
-
maximum=4096,
|
| 324 |
-
value=2048,
|
| 325 |
-
step=256,
|
| 326 |
-
label="Context Size",
|
| 327 |
-
info="Token context window (requires model reload)"
|
| 328 |
-
)
|
| 329 |
|
| 330 |
max_tokens = gr.Slider(
|
| 331 |
minimum=50,
|
|
@@ -367,7 +400,7 @@ def create_interface():
|
|
| 367 |
<p><strong>Quantization:</strong> Q4_K_M</p>
|
| 368 |
<p><strong>Format:</strong> GGUF (optimized)</p>
|
| 369 |
<p><strong>Backend:</strong> llama-cpp-python</p>
|
| 370 |
-
<p><strong>Features:</strong> CPU/GPU support, streaming</p>
|
| 371 |
<p><strong>Specialty:</strong> Medical assistance</p>
|
| 372 |
<p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
|
| 373 |
""")
|
|
@@ -392,13 +425,13 @@ def create_interface():
|
|
| 392 |
|
| 393 |
submit_btn.click(
|
| 394 |
chat_interface,
|
| 395 |
-
inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
|
| 396 |
outputs=[chatbot, msg]
|
| 397 |
)
|
| 398 |
|
| 399 |
msg.submit(
|
| 400 |
chat_interface,
|
| 401 |
-
inputs=[msg, chatbot, max_tokens, temperature, top_p, repeat_penalty],
|
| 402 |
outputs=[chatbot, msg]
|
| 403 |
)
|
| 404 |
|
|
@@ -407,6 +440,11 @@ def create_interface():
|
|
| 407 |
outputs=[chatbot, msg]
|
| 408 |
)
|
| 409 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
return demo
|
| 411 |
|
| 412 |
if __name__ == "__main__":
|
|
|
|
| 17 |
model = None
|
| 18 |
model_loaded = False
|
| 19 |
|
| 20 |
+
# Default system prompt
|
| 21 |
+
DEFAULT_SYSTEM_PROMPT = """You are MMed-Llama-Alpaca, a helpful AI assistant specialized in medical and healthcare topics. You provide accurate, evidence-based information while being empathetic and understanding.
|
| 22 |
+
|
| 23 |
+
Important guidelines:
|
| 24 |
+
- Always remind users that your responses are for educational purposes only
|
| 25 |
+
- Encourage users to consult healthcare professionals for medical advice
|
| 26 |
+
- Be thorough but clear in your explanations
|
| 27 |
+
- If unsure about medical information, acknowledge limitations
|
| 28 |
+
- Maintain a professional yet caring tone"""
|
| 29 |
+
|
| 30 |
# HuggingFace repository information
|
| 31 |
HF_REPO_ID = "Axcel1/MMed-llama-alpaca-Q4_K_M-GGUF"
|
| 32 |
HF_FILENAME = "mmed-llama-alpaca-q4_k_m.gguf"
|
|
|
|
| 158 |
print(error_msg)
|
| 159 |
return False, f"❌ {error_msg}"
|
| 160 |
|
| 161 |
+
def generate_response_stream(message, history, system_prompt, max_tokens=512, temperature=0.7, top_p=0.9, repeat_penalty=1.1):
|
| 162 |
"""Generate response from the model with streaming"""
|
| 163 |
global model, model_loaded
|
| 164 |
|
|
|
|
| 170 |
# Format the conversation history for Llama-3
|
| 171 |
conversation = []
|
| 172 |
|
| 173 |
+
# Add system prompt if provided
|
| 174 |
+
if system_prompt and system_prompt.strip():
|
| 175 |
+
conversation.append({"role": "system", "content": system_prompt.strip()})
|
| 176 |
+
|
| 177 |
# Add conversation history
|
| 178 |
for human, assistant in history:
|
| 179 |
conversation.append({"role": "user", "content": human})
|
|
|
|
| 204 |
except Exception as e:
|
| 205 |
yield f"Error generating response: {str(e)}"
|
| 206 |
|
| 207 |
+
def chat_interface(message, history, system_prompt, max_tokens, temperature, top_p, repeat_penalty):
|
| 208 |
"""Main chat interface function"""
|
| 209 |
if not message.strip():
|
| 210 |
return history, ""
|
|
|
|
| 217 |
history = history + [(message, "")]
|
| 218 |
|
| 219 |
# Generate response
|
| 220 |
+
for response in generate_response_stream(message, history[:-1], system_prompt, max_tokens, temperature, top_p, repeat_penalty):
|
| 221 |
history[-1] = (message, response)
|
| 222 |
yield history, ""
|
| 223 |
|
|
|
|
| 225 |
"""Clear the chat history"""
|
| 226 |
return [], ""
|
| 227 |
|
| 228 |
+
def reset_system_prompt():
|
| 229 |
+
"""Reset system prompt to default"""
|
| 230 |
+
return DEFAULT_SYSTEM_PROMPT
|
| 231 |
+
|
| 232 |
def load_model_interface(context_size, selected_model):
|
| 233 |
"""Interface function to load model with configurable context size"""
|
| 234 |
success, message = load_model_from_gguf(gguf_path=None, filename=selected_model, n_ctx=int(context_size), use_hf_download=True)
|
|
|
|
| 290 |
|
| 291 |
with gr.Row():
|
| 292 |
with gr.Column(scale=4):
|
| 293 |
+
# System prompt configuration
|
| 294 |
+
gr.HTML("<h3>🎯 System Prompt Configuration</h3>")
|
| 295 |
+
with gr.Row():
|
| 296 |
+
system_prompt = gr.Textbox(
|
| 297 |
+
label="System Prompt",
|
| 298 |
+
value=DEFAULT_SYSTEM_PROMPT,
|
| 299 |
+
placeholder="Enter system prompt to define the AI's behavior and role...",
|
| 300 |
+
lines=4,
|
| 301 |
+
max_lines=8,
|
| 302 |
+
scale=4,
|
| 303 |
+
autoscroll=True,
|
| 304 |
+
)
|
| 305 |
+
# with gr.Column(scale=1):
|
| 306 |
+
# reset_prompt_btn = gr.Button("Reset to Default", variant="secondary", size="sm")
|
| 307 |
+
# gr.HTML("<p style='font-size: 0.8em; color: #666; margin-top: 10px;'>The system prompt defines how the AI should behave and respond. Changes apply to new conversations.</p>")
|
| 308 |
+
|
| 309 |
# Chat interface
|
| 310 |
chatbot = gr.Chatbot(
|
| 311 |
+
height=400,
|
| 312 |
show_copy_button=True,
|
| 313 |
bubble_full_width=False,
|
| 314 |
show_label=False,
|
|
|
|
| 329 |
# Model loading section
|
| 330 |
gr.HTML("<h3>🔧 Model Control</h3>")
|
| 331 |
|
|
|
|
|
|
|
| 332 |
# Model selection dropdown
|
| 333 |
model_dropdown = gr.Dropdown(
|
| 334 |
choices=initial_choices,
|
|
|
|
| 337 |
info="Choose from available models in the repository",
|
| 338 |
interactive=True
|
| 339 |
)
|
| 340 |
+
|
| 341 |
+
# Context size (limited for Spaces)
|
| 342 |
+
context_size = gr.Slider(
|
| 343 |
+
minimum=512,
|
| 344 |
+
maximum=8192,
|
| 345 |
+
value=2048,
|
| 346 |
+
step=256,
|
| 347 |
+
label="Context Size",
|
| 348 |
+
info="Token context window (requires model reload)"
|
| 349 |
+
)
|
| 350 |
|
| 351 |
load_btn = gr.Button("Load Model", variant="primary", size="lg")
|
| 352 |
model_status = gr.Textbox(
|
|
|
|
| 358 |
|
| 359 |
# Generation parameters
|
| 360 |
gr.HTML("<h3>⚙️ Generation Settings</h3>")
|
| 361 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
max_tokens = gr.Slider(
|
| 364 |
minimum=50,
|
|
|
|
| 400 |
<p><strong>Quantization:</strong> Q4_K_M</p>
|
| 401 |
<p><strong>Format:</strong> GGUF (optimized)</p>
|
| 402 |
<p><strong>Backend:</strong> llama-cpp-python</p>
|
| 403 |
+
<p><strong>Features:</strong> CPU/GPU support, streaming, system prompts</p>
|
| 404 |
<p><strong>Specialty:</strong> Medical assistance</p>
|
| 405 |
<p><strong>Auto-Optimization:</strong> CPU threads & GPU layers detected automatically</p>
|
| 406 |
""")
|
|
|
|
| 425 |
|
| 426 |
submit_btn.click(
|
| 427 |
chat_interface,
|
| 428 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
|
| 429 |
outputs=[chatbot, msg]
|
| 430 |
)
|
| 431 |
|
| 432 |
msg.submit(
|
| 433 |
chat_interface,
|
| 434 |
+
inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p, repeat_penalty],
|
| 435 |
outputs=[chatbot, msg]
|
| 436 |
)
|
| 437 |
|
|
|
|
| 440 |
outputs=[chatbot, msg]
|
| 441 |
)
|
| 442 |
|
| 443 |
+
# reset_prompt_btn.click(
|
| 444 |
+
# reset_system_prompt,
|
| 445 |
+
# outputs=system_prompt
|
| 446 |
+
# )
|
| 447 |
+
|
| 448 |
return demo
|
| 449 |
|
| 450 |
if __name__ == "__main__":
|