Reza2kn commited on
Commit
328e8d9
·
verified ·
1 Parent(s): aa937f3

Add MiniCPM5-1B browser Space scaffold

Browse files
Files changed (4) hide show
  1. README.md +7 -6
  2. __pycache__/app.cpython-311.pyc +0 -0
  3. app.py +83 -0
  4. requirements.txt +6 -0
README.md CHANGED
@@ -1,17 +1,18 @@
1
  ---
2
- title: MiniCPM5 1B WebGPU
3
  emoji: ⚡
4
  colorFrom: blue
5
  colorTo: green
6
- sdk: static
 
7
  pinned: false
8
  license: apache-2.0
9
  models:
 
10
  - Reza2kn/MiniCPM5-1B-ONNX-Web
 
11
  ---
12
 
13
- # MiniCPM5-1B WebGPU
14
 
15
- Static browser demo scaffold for `Reza2kn/MiniCPM5-1B-ONNX-Web`.
16
-
17
- The browser runtime loads tokenizer/config/model assets from Hugging Face and runs generation with `onnxruntime-web` when the ONNX artifact is available.
 
1
  ---
2
+ title: MiniCPM5 1B Chat
3
  emoji: ⚡
4
  colorFrom: blue
5
  colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
  pinned: false
9
  license: apache-2.0
10
  models:
11
+ - Reza2kn/MiniCPM5-1B-MLX-DWQ-4bit
12
  - Reza2kn/MiniCPM5-1B-ONNX-Web
13
+ - openbmb/MiniCPM5-1B
14
  ---
15
 
16
+ # MiniCPM5-1B Chat
17
 
18
+ Gradio demo for MiniCPM5-1B with visible generation settings and sample prompts.
 
 
__pycache__/app.cpython-311.pyc ADDED
Binary file (6.45 kB). View file
 
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+
4
+ import gradio as gr
5
+ import torch
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer
7
+
8
+
9
+ MODEL_ID = os.environ.get("MODEL_ID", "openbmb/MiniCPM5-1B")
10
+
11
+ SYSTEM_NOTE = (
12
+ "MiniCPM5-1B is a text-only language model. "
13
+ "This demo validates chat, multilingual text, code, math, and tool-planning prompts; it does not accept image/audio/video inputs."
14
+ )
15
+
16
+ EXAMPLES = [
17
+ ["Briefly introduce yourself as a local AI assistant in two sentences.", 96, 0.2, 0.9],
18
+ ["请用中文用三点总结:为什么本地小模型对隐私有帮助?", 128, 0.3, 0.9],
19
+ ["به فارسی، خیلی کوتاه توضیح بده چطور یک مدل محلی می‌تواند به برنامه‌نویس کمک کند.", 128, 0.3, 0.9],
20
+ ["Write a small Python function that reads a JSONL file and returns the number of rows.", 160, 0.2, 0.9],
21
+ ["You need to inspect a local README and then summarize it. Give a safe two-step tool-use plan.", 128, 0.2, 0.9],
22
+ ]
23
+
24
+
25
+ tokenizer = None
26
+ model = None
27
+
28
+
29
+ def load_model():
30
+ global tokenizer, model
31
+ if model is not None:
32
+ return
33
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
34
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
35
+ model = AutoModelForCausalLM.from_pretrained(
36
+ MODEL_ID,
37
+ torch_dtype=dtype,
38
+ device_map="auto" if torch.cuda.is_available() else None,
39
+ ).eval()
40
+
41
+
42
+ def generate(prompt, max_new_tokens, temperature, top_p):
43
+ if not prompt.strip():
44
+ return "Enter a prompt first.", ""
45
+ load_model()
46
+ start = time.time()
47
+ inputs = tokenizer(prompt, return_tensors="pt")
48
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
49
+ do_sample = temperature > 0
50
+ with torch.no_grad():
51
+ output_ids = model.generate(
52
+ **inputs,
53
+ max_new_tokens=int(max_new_tokens),
54
+ temperature=float(temperature) if do_sample else None,
55
+ top_p=float(top_p) if do_sample else None,
56
+ do_sample=do_sample,
57
+ pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
58
+ )
59
+ text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
60
+ new_tokens = max(0, output_ids.shape[-1] - inputs["input_ids"].shape[-1])
61
+ elapsed = max(time.time() - start, 1e-6)
62
+ metrics = f"{new_tokens} new tokens | {new_tokens / elapsed:.2f} tok/s | {elapsed:.2f}s | model: {MODEL_ID}"
63
+ return text, metrics
64
+
65
+
66
+ with gr.Blocks(title="MiniCPM5-1B Chat", theme=gr.themes.Soft()) as demo:
67
+ gr.Markdown("# MiniCPM5-1B Chat\n" + SYSTEM_NOTE)
68
+ with gr.Row():
69
+ with gr.Column(scale=3):
70
+ prompt = gr.Textbox(label="Prompt", lines=8, value=EXAMPLES[0][0])
71
+ run = gr.Button("Generate", variant="primary")
72
+ with gr.Column(scale=1):
73
+ max_new_tokens = gr.Slider(16, 512, value=128, step=1, label="Max new tokens")
74
+ temperature = gr.Slider(0, 1.5, value=0.2, step=0.05, label="Temperature")
75
+ top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
76
+ output = gr.Textbox(label="Output", lines=14)
77
+ metrics = gr.Textbox(label="Run metrics", interactive=False)
78
+ gr.Examples(EXAMPLES, inputs=[prompt, max_new_tokens, temperature, top_p])
79
+ run.click(generate, inputs=[prompt, max_new_tokens, temperature, top_p], outputs=[output, metrics])
80
+
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==5.49.1
2
+ transformers>=5.6
3
+ torch
4
+ accelerate
5
+ safetensors
6
+ huggingface_hub