Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -28,6 +28,7 @@ except Exception:
|
|
| 28 |
# Config / defaults
|
| 29 |
# ---------------------------------------------------------------------------
|
| 30 |
DEFAULT_MODEL = "PioTio/Nanbeige2.5"
|
|
|
|
| 31 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful, honest assistant. Answer succinctly unless asked otherwise."
|
| 32 |
|
| 33 |
# globals populated by load_model()
|
|
@@ -306,6 +307,18 @@ def submit_message(user_message: str, history, system_prompt: str, temperature:
|
|
| 306 |
|
| 307 |
prompt = build_prompt(pairs[:-1], user_message, system_prompt, max_history)
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
if stream:
|
| 310 |
# stream partial assistant outputs
|
| 311 |
for partial in _generate_stream(prompt, temperature, top_p, top_k, max_new_tokens):
|
|
@@ -363,6 +376,7 @@ with gr.Blocks(title="Nanbeige2.5 — Chat UI") as demo:
|
|
| 363 |
with gr.Row():
|
| 364 |
model_input = gr.Textbox(value=DEFAULT_MODEL, label="Model repo (HF)", interactive=True)
|
| 365 |
load_btn = gr.Button("Load model")
|
|
|
|
| 366 |
model_status = gr.Textbox(value="Model not loaded", label="Status", interactive=False)
|
| 367 |
|
| 368 |
with gr.Row():
|
|
@@ -427,6 +441,17 @@ with gr.Blocks(title="Nanbeige2.5 — Chat UI") as demo:
|
|
| 427 |
else:
|
| 428 |
model_status.value = _bg_initial_load()
|
| 429 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
gr.Markdown("---\n**Tips:** select GPU hardware for smoother streaming and enable 4-bit bitsandbytes by installing `bitsandbytes` in `requirements.txt`.")
|
| 431 |
|
| 432 |
|
|
|
|
| 28 |
# Config / defaults
|
| 29 |
# ---------------------------------------------------------------------------
|
| 30 |
DEFAULT_MODEL = "PioTio/Nanbeige2.5"
|
| 31 |
+
CPU_DEMO_MODEL = "distilgpt2" # fast, small CPU-friendly fallback for demos
|
| 32 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful, honest assistant. Answer succinctly unless asked otherwise."
|
| 33 |
|
| 34 |
# globals populated by load_model()
|
|
|
|
| 307 |
|
| 308 |
prompt = build_prompt(pairs[:-1], user_message, system_prompt, max_history)
|
| 309 |
|
| 310 |
+
# If user is running the full Nanbeige model on CPU, warn and suggest options
|
| 311 |
+
if MODEL_NAME == DEFAULT_MODEL and DEVICE == "cpu":
|
| 312 |
+
warning = (
|
| 313 |
+
"⚠️ **Nanbeige is too large for CPU inference and will be extremely slow.**\n\n"
|
| 314 |
+
"Options:\n"
|
| 315 |
+
"- Enable GPU in Space settings (recommended)\n"
|
| 316 |
+
f"- Click **Load fast CPU demo ({CPU_DEMO_MODEL})** for a quick, low-cost demo\n"
|
| 317 |
+
"- Or set `ALLOW_CPU_NANBEIGE=1` in the server env to force CPU generation (not recommended)")
|
| 318 |
+
pairs[-1] = (user_message, warning)
|
| 319 |
+
yield pairs, ""
|
| 320 |
+
return
|
| 321 |
+
|
| 322 |
if stream:
|
| 323 |
# stream partial assistant outputs
|
| 324 |
for partial in _generate_stream(prompt, temperature, top_p, top_k, max_new_tokens):
|
|
|
|
| 376 |
with gr.Row():
|
| 377 |
model_input = gr.Textbox(value=DEFAULT_MODEL, label="Model repo (HF)", interactive=True)
|
| 378 |
load_btn = gr.Button("Load model")
|
| 379 |
+
model_demo_btn = gr.Button(f"Load fast CPU demo ({CPU_DEMO_MODEL})")
|
| 380 |
model_status = gr.Textbox(value="Model not loaded", label="Status", interactive=False)
|
| 381 |
|
| 382 |
with gr.Row():
|
|
|
|
| 441 |
else:
|
| 442 |
model_status.value = _bg_initial_load()
|
| 443 |
|
| 444 |
+
# CPU warning / demo hint (visible in UI)
|
| 445 |
+
gr.Markdown("""
|
| 446 |
+
**⚠️ If this Space is running on CPU, `PioTio/Nanbeige2.5` will be extremely slow.**
|
| 447 |
+
- Enable GPU in Space Settings for real-time use.
|
| 448 |
+
- Or click **Load fast CPU demo (distilgpt2)** for an immediate, low-cost demo reply.
|
| 449 |
+
""")
|
| 450 |
+
|
| 451 |
+
# wire demo button
|
| 452 |
+
model_demo_btn.click(fn=lambda: load_model_ui(CPU_DEMO_MODEL), inputs=None, outputs=model_status)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
gr.Markdown("---\n**Tips:** select GPU hardware for smoother streaming and enable 4-bit bitsandbytes by installing `bitsandbytes` in `requirements.txt`.")
|
| 456 |
|
| 457 |
|