PioTio commited on
Commit
e989a17
·
verified ·
1 Parent(s): c39961e

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +25 -0
app.py CHANGED
@@ -28,6 +28,7 @@ except Exception:
28
  # Config / defaults
29
  # ---------------------------------------------------------------------------
30
  DEFAULT_MODEL = "PioTio/Nanbeige2.5"
 
31
  DEFAULT_SYSTEM_PROMPT = "You are a helpful, honest assistant. Answer succinctly unless asked otherwise."
32
 
33
  # globals populated by load_model()
@@ -306,6 +307,18 @@ def submit_message(user_message: str, history, system_prompt: str, temperature:
306
 
307
  prompt = build_prompt(pairs[:-1], user_message, system_prompt, max_history)
308
 
 
 
 
 
 
 
 
 
 
 
 
 
309
  if stream:
310
  # stream partial assistant outputs
311
  for partial in _generate_stream(prompt, temperature, top_p, top_k, max_new_tokens):
@@ -363,6 +376,7 @@ with gr.Blocks(title="Nanbeige2.5 — Chat UI") as demo:
363
  with gr.Row():
364
  model_input = gr.Textbox(value=DEFAULT_MODEL, label="Model repo (HF)", interactive=True)
365
  load_btn = gr.Button("Load model")
 
366
  model_status = gr.Textbox(value="Model not loaded", label="Status", interactive=False)
367
 
368
  with gr.Row():
@@ -427,6 +441,17 @@ with gr.Blocks(title="Nanbeige2.5 — Chat UI") as demo:
427
  else:
428
  model_status.value = _bg_initial_load()
429
 
 
 
 
 
 
 
 
 
 
 
 
430
  gr.Markdown("---\n**Tips:** select GPU hardware for smoother streaming and enable 4-bit bitsandbytes by installing `bitsandbytes` in `requirements.txt`.")
431
 
432
 
 
28
  # Config / defaults
29
  # ---------------------------------------------------------------------------
30
  DEFAULT_MODEL = "PioTio/Nanbeige2.5"
31
+ CPU_DEMO_MODEL = "distilgpt2" # fast, small CPU-friendly fallback for demos
32
  DEFAULT_SYSTEM_PROMPT = "You are a helpful, honest assistant. Answer succinctly unless asked otherwise."
33
 
34
  # globals populated by load_model()
 
307
 
308
  prompt = build_prompt(pairs[:-1], user_message, system_prompt, max_history)
309
 
310
+ # If user is running the full Nanbeige model on CPU, warn and suggest options
311
+ if MODEL_NAME == DEFAULT_MODEL and DEVICE == "cpu":
312
+ warning = (
313
+ "⚠️ **Nanbeige is too large for CPU inference and will be extremely slow.**\n\n"
314
+ "Options:\n"
315
+ "- Enable GPU in Space settings (recommended)\n"
316
+ f"- Click **Load fast CPU demo ({CPU_DEMO_MODEL})** for a quick, low-cost demo\n"
317
+ "- Or set `ALLOW_CPU_NANBEIGE=1` in the server env to force CPU generation (not recommended)")
318
+ pairs[-1] = (user_message, warning)
319
+ yield pairs, ""
320
+ return
321
+
322
  if stream:
323
  # stream partial assistant outputs
324
  for partial in _generate_stream(prompt, temperature, top_p, top_k, max_new_tokens):
 
376
  with gr.Row():
377
  model_input = gr.Textbox(value=DEFAULT_MODEL, label="Model repo (HF)", interactive=True)
378
  load_btn = gr.Button("Load model")
379
+ model_demo_btn = gr.Button(f"Load fast CPU demo ({CPU_DEMO_MODEL})")
380
  model_status = gr.Textbox(value="Model not loaded", label="Status", interactive=False)
381
 
382
  with gr.Row():
 
441
  else:
442
  model_status.value = _bg_initial_load()
443
 
444
+ # CPU warning / demo hint (visible in UI)
445
+ gr.Markdown("""
446
+ **⚠️ If this Space is running on CPU, `PioTio/Nanbeige2.5` will be extremely slow.**
447
+ - Enable GPU in Space Settings for real-time use.
448
+ - Or click **Load fast CPU demo (distilgpt2)** for an immediate, low-cost demo reply.
449
+ """)
450
+
451
+ # wire demo button
452
+ model_demo_btn.click(fn=lambda: load_model_ui(CPU_DEMO_MODEL), inputs=None, outputs=model_status)
453
+
454
+
455
  gr.Markdown("---\n**Tips:** select GPU hardware for smoother streaming and enable 4-bit bitsandbytes by installing `bitsandbytes` in `requirements.txt`.")
456
 
457