Spaces:
Running
Running
Add MiniCPM5-1B browser Space scaffold
Browse files- README.md +2 -3
- __pycache__/app.cpython-311.pyc +0 -0
- app.py +41 -11
README.md
CHANGED
|
@@ -8,11 +8,10 @@ sdk_version: 5.49.1
|
|
| 8 |
pinned: false
|
| 9 |
license: apache-2.0
|
| 10 |
models:
|
| 11 |
-
-
|
| 12 |
-
- Reza2kn/MiniCPM5-1B-ONNX-Web
|
| 13 |
- openbmb/MiniCPM5-1B
|
| 14 |
---
|
| 15 |
|
| 16 |
# MiniCPM5-1B Chat
|
| 17 |
|
| 18 |
-
Gradio demo for MiniCPM5-1B with visible generation settings and sample prompts.
|
|
|
|
| 8 |
pinned: false
|
| 9 |
license: apache-2.0
|
| 10 |
models:
|
| 11 |
+
- openbmb/MiniCPM5-1B-SFT
|
|
|
|
| 12 |
- openbmb/MiniCPM5-1B
|
| 13 |
---
|
| 14 |
|
| 15 |
# MiniCPM5-1B Chat
|
| 16 |
|
| 17 |
+
Gradio demo for MiniCPM5-1B with visible generation settings and sample prompts. Current demo target is the SFT checkpoint because it behaves better on the local validation matrix.
|
__pycache__/app.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
|
|
|
app.py
CHANGED
|
@@ -6,19 +6,18 @@ import torch
|
|
| 6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 7 |
|
| 8 |
|
| 9 |
-
MODEL_ID = os.environ.get("MODEL_ID", "openbmb/MiniCPM5-1B")
|
| 10 |
|
| 11 |
SYSTEM_NOTE = (
|
| 12 |
-
"MiniCPM5-1B is a text-only language model. "
|
| 13 |
-
"
|
| 14 |
)
|
| 15 |
|
| 16 |
EXAMPLES = [
|
| 17 |
-
["Briefly introduce yourself as a local AI assistant in two sentences.", 96, 0.2, 0.
|
| 18 |
-
["请用中文用三点总结:为什么本地小模型对隐私有帮助?",
|
| 19 |
-
["
|
| 20 |
-
["
|
| 21 |
-
["You need to inspect a local README and then summarize it. Give a safe two-step tool-use plan.", 128, 0.2, 0.9],
|
| 22 |
]
|
| 23 |
|
| 24 |
|
|
@@ -44,7 +43,19 @@ def generate(prompt, max_new_tokens, temperature, top_p):
|
|
| 44 |
return "Enter a prompt first.", ""
|
| 45 |
load_model()
|
| 46 |
start = time.time()
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 49 |
do_sample = temperature > 0
|
| 50 |
with torch.no_grad():
|
|
@@ -57,14 +68,33 @@ def generate(prompt, max_new_tokens, temperature, top_p):
|
|
| 57 |
pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
|
| 58 |
)
|
| 59 |
text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
new_tokens = max(0, output_ids.shape[-1] - inputs["input_ids"].shape[-1])
|
| 61 |
elapsed = max(time.time() - start, 1e-6)
|
| 62 |
metrics = f"{new_tokens} new tokens | {new_tokens / elapsed:.2f} tok/s | {elapsed:.2f}s | model: {MODEL_ID}"
|
| 63 |
return text, metrics
|
| 64 |
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
with gr.Row():
|
| 69 |
with gr.Column(scale=3):
|
| 70 |
prompt = gr.Textbox(label="Prompt", lines=8, value=EXAMPLES[0][0])
|
|
|
|
| 6 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 7 |
|
| 8 |
|
| 9 |
+
MODEL_ID = os.environ.get("MODEL_ID", "openbmb/MiniCPM5-1B-SFT")
|
| 10 |
|
| 11 |
SYSTEM_NOTE = (
|
| 12 |
+
"MiniCPM5-1B is a text-only language model. Local validation is currently cleanest for English, Chinese, "
|
| 13 |
+
"code snippets with explicit constraints, and tool-planning prompts. Persian and native Arabic are not marked supported yet."
|
| 14 |
)
|
| 15 |
|
| 16 |
EXAMPLES = [
|
| 17 |
+
["Briefly introduce yourself as a local AI assistant in two sentences.", 96, 0.2, 0.95],
|
| 18 |
+
["请用中文用三点总结:为什么本地小模型对隐私有帮助?", 160, 0.3, 0.95],
|
| 19 |
+
["Return only Python code. Write count_jsonl_rows(path) that counts lines in a JSONL file without using json.load.", 160, 0.2, 0.95],
|
| 20 |
+
["Give exactly two numbered steps to inspect a local README and summarize it safely. Do not say you cannot inspect files; write the tool-use plan.", 192, 0.2, 0.95],
|
|
|
|
| 21 |
]
|
| 22 |
|
| 23 |
|
|
|
|
| 43 |
return "Enter a prompt first.", ""
|
| 44 |
load_model()
|
| 45 |
start = time.time()
|
| 46 |
+
rendered = tokenizer.apply_chat_template(
|
| 47 |
+
[
|
| 48 |
+
{
|
| 49 |
+
"role": "system",
|
| 50 |
+
"content": "Answer directly and concisely. Do not include hidden reasoning or thinking process text.",
|
| 51 |
+
},
|
| 52 |
+
{"role": "user", "content": prompt},
|
| 53 |
+
],
|
| 54 |
+
tokenize=False,
|
| 55 |
+
add_generation_prompt=True,
|
| 56 |
+
enable_thinking=False,
|
| 57 |
+
)
|
| 58 |
+
inputs = tokenizer(rendered, return_tensors="pt")
|
| 59 |
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 60 |
do_sample = temperature > 0
|
| 61 |
with torch.no_grad():
|
|
|
|
| 68 |
pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
|
| 69 |
)
|
| 70 |
text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 71 |
+
if "</think>" in text:
|
| 72 |
+
text = text.split("</think>", 1)[1].strip()
|
| 73 |
+
elif rendered in text:
|
| 74 |
+
text = text.split(rendered, 1)[1].strip()
|
| 75 |
new_tokens = max(0, output_ids.shape[-1] - inputs["input_ids"].shape[-1])
|
| 76 |
elapsed = max(time.time() - start, 1e-6)
|
| 77 |
metrics = f"{new_tokens} new tokens | {new_tokens / elapsed:.2f} tok/s | {elapsed:.2f}s | model: {MODEL_ID}"
|
| 78 |
return text, metrics
|
| 79 |
|
| 80 |
|
| 81 |
+
css = """
|
| 82 |
+
.status-box {
|
| 83 |
+
border: 1px solid #d8dee8;
|
| 84 |
+
border-radius: 8px;
|
| 85 |
+
padding: 12px 14px;
|
| 86 |
+
background: #f8fafc;
|
| 87 |
+
color: #263244;
|
| 88 |
+
}
|
| 89 |
+
.status-box strong {
|
| 90 |
+
color: #101827;
|
| 91 |
+
}
|
| 92 |
+
"""
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
with gr.Blocks(title="MiniCPM5-1B Chat", theme=gr.themes.Soft(), css=css) as demo:
|
| 96 |
+
gr.Markdown("# MiniCPM5-1B Chat")
|
| 97 |
+
gr.HTML(f"<div class='status-box'><strong>Validation status:</strong> {SYSTEM_NOTE}<br><strong>Runtime model:</strong> {MODEL_ID}</div>")
|
| 98 |
with gr.Row():
|
| 99 |
with gr.Column(scale=3):
|
| 100 |
prompt = gr.Textbox(label="Prompt", lines=8, value=EXAMPLES[0][0])
|