Reza2kn commited on
Commit
80a9976
·
verified ·
1 Parent(s): 328e8d9

Add MiniCPM5-1B browser Space scaffold

Browse files
Files changed (3) hide show
  1. README.md +2 -3
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. app.py +41 -11
README.md CHANGED
@@ -8,11 +8,10 @@ sdk_version: 5.49.1
8
  pinned: false
9
  license: apache-2.0
10
  models:
11
- - Reza2kn/MiniCPM5-1B-MLX-DWQ-4bit
12
- - Reza2kn/MiniCPM5-1B-ONNX-Web
13
  - openbmb/MiniCPM5-1B
14
  ---
15
 
16
  # MiniCPM5-1B Chat
17
 
18
- Gradio demo for MiniCPM5-1B with visible generation settings and sample prompts.
 
8
  pinned: false
9
  license: apache-2.0
10
  models:
11
+ - openbmb/MiniCPM5-1B-SFT
 
12
  - openbmb/MiniCPM5-1B
13
  ---
14
 
15
  # MiniCPM5-1B Chat
16
 
17
+ Gradio demo for MiniCPM5-1B with visible generation settings and sample prompts. Current demo target is the SFT checkpoint because it behaves better on the local validation matrix.
__pycache__/app.cpython-311.pyc CHANGED
Binary files a/__pycache__/app.cpython-311.pyc and b/__pycache__/app.cpython-311.pyc differ
 
app.py CHANGED
@@ -6,19 +6,18 @@ import torch
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
 
9
- MODEL_ID = os.environ.get("MODEL_ID", "openbmb/MiniCPM5-1B")
10
 
11
  SYSTEM_NOTE = (
12
- "MiniCPM5-1B is a text-only language model. "
13
- "This demo validates chat, multilingual text, code, math, and tool-planning prompts; it does not accept image/audio/video inputs."
14
  )
15
 
16
  EXAMPLES = [
17
- ["Briefly introduce yourself as a local AI assistant in two sentences.", 96, 0.2, 0.9],
18
- ["请用中文用三点总结:为什么本地小模型对隐私有帮助?", 128, 0.3, 0.9],
19
- ["به فارسی، خیلی کوتاه توضیح بده چطور یک مدل محلی می‌تواند به برنامه‌نویس کمک کند.", 128, 0.3, 0.9],
20
- ["Write a small Python function that reads a JSONL file and returns the number of rows.", 160, 0.2, 0.9],
21
- ["You need to inspect a local README and then summarize it. Give a safe two-step tool-use plan.", 128, 0.2, 0.9],
22
  ]
23
 
24
 
@@ -44,7 +43,19 @@ def generate(prompt, max_new_tokens, temperature, top_p):
44
  return "Enter a prompt first.", ""
45
  load_model()
46
  start = time.time()
47
- inputs = tokenizer(prompt, return_tensors="pt")
 
 
 
 
 
 
 
 
 
 
 
 
48
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
49
  do_sample = temperature > 0
50
  with torch.no_grad():
@@ -57,14 +68,33 @@ def generate(prompt, max_new_tokens, temperature, top_p):
57
  pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
58
  )
59
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
 
 
 
60
  new_tokens = max(0, output_ids.shape[-1] - inputs["input_ids"].shape[-1])
61
  elapsed = max(time.time() - start, 1e-6)
62
  metrics = f"{new_tokens} new tokens | {new_tokens / elapsed:.2f} tok/s | {elapsed:.2f}s | model: {MODEL_ID}"
63
  return text, metrics
64
 
65
 
66
- with gr.Blocks(title="MiniCPM5-1B Chat", theme=gr.themes.Soft()) as demo:
67
- gr.Markdown("# MiniCPM5-1B Chat\n" + SYSTEM_NOTE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  with gr.Row():
69
  with gr.Column(scale=3):
70
  prompt = gr.Textbox(label="Prompt", lines=8, value=EXAMPLES[0][0])
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
 
9
+ MODEL_ID = os.environ.get("MODEL_ID", "openbmb/MiniCPM5-1B-SFT")
10
 
11
  SYSTEM_NOTE = (
12
+ "MiniCPM5-1B is a text-only language model. Local validation is currently cleanest for English, Chinese, "
13
+ "code snippets with explicit constraints, and tool-planning prompts. Persian and native Arabic are not marked supported yet."
14
  )
15
 
16
  EXAMPLES = [
17
+ ["Briefly introduce yourself as a local AI assistant in two sentences.", 96, 0.2, 0.95],
18
+ ["请用中文用三点总结:为什么本地小模型对隐私有帮助?", 160, 0.3, 0.95],
19
+ ["Return only Python code. Write count_jsonl_rows(path) that counts lines in a JSONL file without using json.load.", 160, 0.2, 0.95],
20
+ ["Give exactly two numbered steps to inspect a local README and summarize it safely. Do not say you cannot inspect files; write the tool-use plan.", 192, 0.2, 0.95],
 
21
  ]
22
 
23
 
 
43
  return "Enter a prompt first.", ""
44
  load_model()
45
  start = time.time()
46
+ rendered = tokenizer.apply_chat_template(
47
+ [
48
+ {
49
+ "role": "system",
50
+ "content": "Answer directly and concisely. Do not include hidden reasoning or thinking process text.",
51
+ },
52
+ {"role": "user", "content": prompt},
53
+ ],
54
+ tokenize=False,
55
+ add_generation_prompt=True,
56
+ enable_thinking=False,
57
+ )
58
+ inputs = tokenizer(rendered, return_tensors="pt")
59
  inputs = {k: v.to(model.device) for k, v in inputs.items()}
60
  do_sample = temperature > 0
61
  with torch.no_grad():
 
68
  pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
69
  )
70
  text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
71
+ if "</think>" in text:
72
+ text = text.split("</think>", 1)[1].strip()
73
+ elif rendered in text:
74
+ text = text.split(rendered, 1)[1].strip()
75
  new_tokens = max(0, output_ids.shape[-1] - inputs["input_ids"].shape[-1])
76
  elapsed = max(time.time() - start, 1e-6)
77
  metrics = f"{new_tokens} new tokens | {new_tokens / elapsed:.2f} tok/s | {elapsed:.2f}s | model: {MODEL_ID}"
78
  return text, metrics
79
 
80
 
81
+ css = """
82
+ .status-box {
83
+ border: 1px solid #d8dee8;
84
+ border-radius: 8px;
85
+ padding: 12px 14px;
86
+ background: #f8fafc;
87
+ color: #263244;
88
+ }
89
+ .status-box strong {
90
+ color: #101827;
91
+ }
92
+ """
93
+
94
+
95
+ with gr.Blocks(title="MiniCPM5-1B Chat", theme=gr.themes.Soft(), css=css) as demo:
96
+ gr.Markdown("# MiniCPM5-1B Chat")
97
+ gr.HTML(f"<div class='status-box'><strong>Validation status:</strong> {SYSTEM_NOTE}<br><strong>Runtime model:</strong> {MODEL_ID}</div>")
98
  with gr.Row():
99
  with gr.Column(scale=3):
100
  prompt = gr.Textbox(label="Prompt", lines=8, value=EXAMPLES[0][0])