Spaces:

Reza2kn
/

MiniCPM5-1B-WebGPU

Running

App Files Files Community

Reza2kn commited on 14 days ago

Commit

80a9976

verified ·

1 Parent(s): 328e8d9

Add MiniCPM5-1B browser Space scaffold

Browse files

Files changed (3) hide show

README.md +2 -3
__pycache__/app.cpython-311.pyc +0 -0
app.py +41 -11

README.md CHANGED Viewed

@@ -8,11 +8,10 @@ sdk_version: 5.49.1
 pinned: false
 license: apache-2.0
 models:
-- Reza2kn/MiniCPM5-1B-MLX-DWQ-4bit
-- Reza2kn/MiniCPM5-1B-ONNX-Web
 - openbmb/MiniCPM5-1B
 ---
 # MiniCPM5-1B Chat
-Gradio demo for MiniCPM5-1B with visible generation settings and sample prompts.

 pinned: false
 license: apache-2.0
 models:
+- openbmb/MiniCPM5-1B-SFT
 - openbmb/MiniCPM5-1B
 ---
 # MiniCPM5-1B Chat
+Gradio demo for MiniCPM5-1B with visible generation settings and sample prompts. Current demo target is the SFT checkpoint because it behaves better on the local validation matrix.

__pycache__/app.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -6,19 +6,18 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-MODEL_ID = os.environ.get("MODEL_ID", "openbmb/MiniCPM5-1B")
 SYSTEM_NOTE = (
-    "MiniCPM5-1B is a text-only language model. "
-    "This demo validates chat, multilingual text, code, math, and tool-planning prompts; it does not accept image/audio/video inputs."
 )
 EXAMPLES = [
-    ["Briefly introduce yourself as a local AI assistant in two sentences.", 96, 0.2, 0.9],
-    ["请用中文用三点总结：为什么本地小模型对隐私有帮助？", 128, 0.3, 0.9],
-    ["به فارسی، خیلی کوتاه توضیح بده چطور یک مدل محلی می‌تواند به برنامه‌نویس کمک کند.", 128, 0.3, 0.9],
-    ["Write a small Python function that reads a JSONL file and returns the number of rows.", 160, 0.2, 0.9],
-    ["You need to inspect a local README and then summarize it. Give a safe two-step tool-use plan.", 128, 0.2, 0.9],
 ]
@@ -44,7 +43,19 @@ def generate(prompt, max_new_tokens, temperature, top_p):
         return "Enter a prompt first.", ""
     load_model()
     start = time.time()
-    inputs = tokenizer(prompt, return_tensors="pt")
     inputs = {k: v.to(model.device) for k, v in inputs.items()}
     do_sample = temperature > 0
     with torch.no_grad():
@@ -57,14 +68,33 @@ def generate(prompt, max_new_tokens, temperature, top_p):
             pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
         )
     text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     new_tokens = max(0, output_ids.shape[-1] - inputs["input_ids"].shape[-1])
     elapsed = max(time.time() - start, 1e-6)
     metrics = f"{new_tokens} new tokens | {new_tokens / elapsed:.2f} tok/s | {elapsed:.2f}s | model: {MODEL_ID}"
     return text, metrics
-with gr.Blocks(title="MiniCPM5-1B Chat", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# MiniCPM5-1B Chat\n" + SYSTEM_NOTE)
     with gr.Row():
         with gr.Column(scale=3):
             prompt = gr.Textbox(label="Prompt", lines=8, value=EXAMPLES[0][0])

 from transformers import AutoModelForCausalLM, AutoTokenizer
+MODEL_ID = os.environ.get("MODEL_ID", "openbmb/MiniCPM5-1B-SFT")
 SYSTEM_NOTE = (
+    "MiniCPM5-1B is a text-only language model. Local validation is currently cleanest for English, Chinese, "
+    "code snippets with explicit constraints, and tool-planning prompts. Persian and native Arabic are not marked supported yet."
 )
 EXAMPLES = [
+    ["Briefly introduce yourself as a local AI assistant in two sentences.", 96, 0.2, 0.95],
+    ["请用中文用三点总结：为什么本地小模型对隐私有帮助？", 160, 0.3, 0.95],
+    ["Return only Python code. Write count_jsonl_rows(path) that counts lines in a JSONL file without using json.load.", 160, 0.2, 0.95],
+    ["Give exactly two numbered steps to inspect a local README and summarize it safely. Do not say you cannot inspect files; write the tool-use plan.", 192, 0.2, 0.95],
 ]
         return "Enter a prompt first.", ""
     load_model()
     start = time.time()
+    rendered = tokenizer.apply_chat_template(
+        [
+            {
+                "role": "system",
+                "content": "Answer directly and concisely. Do not include hidden reasoning or thinking process text.",
+            },
+            {"role": "user", "content": prompt},
+        ],
+        tokenize=False,
+        add_generation_prompt=True,
+        enable_thinking=False,
+    )
+    inputs = tokenizer(rendered, return_tensors="pt")
     inputs = {k: v.to(model.device) for k, v in inputs.items()}
     do_sample = temperature > 0
     with torch.no_grad():
             pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
         )
     text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    if "</think>" in text:
+        text = text.split("</think>", 1)[1].strip()
+    elif rendered in text:
+        text = text.split(rendered, 1)[1].strip()
     new_tokens = max(0, output_ids.shape[-1] - inputs["input_ids"].shape[-1])
     elapsed = max(time.time() - start, 1e-6)
     metrics = f"{new_tokens} new tokens | {new_tokens / elapsed:.2f} tok/s | {elapsed:.2f}s | model: {MODEL_ID}"
     return text, metrics
+css = """
+.status-box {
+  border: 1px solid #d8dee8;
+  border-radius: 8px;
+  padding: 12px 14px;
+  background: #f8fafc;
+  color: #263244;
+}
+.status-box strong {
+  color: #101827;
+}
+"""
+with gr.Blocks(title="MiniCPM5-1B Chat", theme=gr.themes.Soft(), css=css) as demo:
+    gr.Markdown("# MiniCPM5-1B Chat")
+    gr.HTML(f"<div class='status-box'><strong>Validation status:</strong> {SYSTEM_NOTE}<br><strong>Runtime model:</strong> {MODEL_ID}</div>")
     with gr.Row():
         with gr.Column(scale=3):
             prompt = gr.Textbox(label="Prompt", lines=8, value=EXAMPLES[0][0])