CGQN commited on
Commit
547dee4
·
verified ·
1 Parent(s): 8b5eee1

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +150 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from PIL import Image
5
+ from typing import List, Dict, Any
6
+ from transformers import AutoModel, AutoTokenizer
7
+
8
+ """
9
+ Gradio app to run MiniCPM-V-4_5 int4 on CPU for image+text chat.
10
+ - Requires: pip install transformers accelerate gradio pillow
11
+ - Model: openbmb/MiniCPM-V-4_5-int4 (quantized, CPU-friendly)
12
+ - This script is self-contained and uses a simple multi-turn chat interface.
13
+ """
14
+
15
+ MODEL_ID = os.environ.get("MINICPM_MODEL_ID", "openbmb/MiniCPM-V-4_5-int4")
16
+
17
+ # Global model/tokenizer, loaded once
18
+ model = None
19
+ tokenizer = None
20
+
21
+ def load_model():
22
+ global model, tokenizer
23
+ if model is not None and tokenizer is not None:
24
+ return
25
+
26
+ # For CPU inference, keep it simple and avoid .cuda() / bfloat16
27
+ # trust_remote_code is required because MiniCPM implements custom .chat()
28
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
29
+ model = AutoModel.from_pretrained(
30
+ MODEL_ID,
31
+ trust_remote_code=True,
32
+ attn_implementation="sdpa", # SDPA is fine on CPU; avoid flash-attn on CPU
33
+ torch_dtype=torch.float32, # Safer default for CPU
34
+ device_map="cpu" # Ensure CPU execution
35
+ )
36
+ model.eval()
37
+
38
+
39
+ def build_messages(history: List[Dict[str, Any]], image: Image.Image, user_input: str) -> List[Dict[str, Any]]:
40
+ """
41
+ Convert Gradio chat history + current inputs into the message format expected by MiniCPM's .chat().
42
+ history: List of {"role": "user"/"assistant", "content": "..."} pairs (text-only transcript).
43
+ image: PIL.Image or None for the current turn.
44
+ user_input: current user text.
45
+ Returns a msgs list with roles and content arrays [image?, text].
46
+ """
47
+ msgs = []
48
+ # Reconstruct multi-turn context: interleave user/assistant turns
49
+ # We assume each user message is text-only and assistant reply is text-only in history.
50
+ # For the current turn, we can attach an image (if provided) and the user's text.
51
+ for turn in history:
52
+ # Each turn in history is a tuple (user_text, assistant_text) from gr.Chatbot
53
+ user_text, assistant_text = turn
54
+ if user_text is not None:
55
+ msgs.append({"role": "user", "content": [user_text]})
56
+ if assistant_text is not None:
57
+ msgs.append({"role": "assistant", "content": [assistant_text]})
58
+
59
+ # Append current user turn (with optional image)
60
+ content = []
61
+ if image is not None:
62
+ # Ensure RGB
63
+ if image.mode != "RGB":
64
+ image = image.convert("RGB")
65
+ content.append(image)
66
+ if user_input and user_input.strip():
67
+ content.append(user_input.strip())
68
+ else:
69
+ # Ensure there is at least something in the content
70
+ content.append("")
71
+
72
+ msgs.append({"role": "user", "content": content})
73
+ return msgs
74
+
75
+
76
+ def respond(user_text: str, image: Image.Image, chat_history: List[List[str]], enable_thinking: bool):
77
+ """
78
+ Inference handler for Gradio. Returns updated chat history and clears the user textbox.
79
+ """
80
+ load_model()
81
+
82
+ # Build MiniCPM messages
83
+ msgs = build_messages(chat_history or [], image, user_text)
84
+
85
+ # Run model.chat
86
+ with torch.inference_mode():
87
+ answer = model.chat(
88
+ msgs=msgs,
89
+ tokenizer=tokenizer,
90
+ enable_thinking=enable_thinking
91
+ )
92
+
93
+ # Update history shown in Chatbot: append (user_text, answer)
94
+ # If user_text is empty but image provided, show a placeholder text.
95
+ shown_user_msg = user_text.strip() if (user_text and user_text.strip()) else "[Image]"
96
+ chat_history = chat_history + [[shown_user_msg, answer]]
97
+ return chat_history, ""
98
+
99
+
100
+ def clear_history():
101
+ return [], None, ""
102
+
103
+
104
+ def demo_app():
105
+ with gr.Blocks(title="MiniCPM-V-4_5-int4 (CPU) - Gradio", theme="soft") as demo:
106
+ gr.Markdown("## MiniCPM-V-4_5-int4 (CPU) Demo\nUpload an image (optional) and ask a question.")
107
+ with gr.Row():
108
+ with gr.Column(scale=3):
109
+ chatbot = gr.Chatbot(height=420, type="messages", avatar_images=(None, None))
110
+ with gr.Row():
111
+ img = gr.Image(type="pil", label="Image (optional)", height=240)
112
+ user_in = gr.Textbox(
113
+ label="Your message",
114
+ placeholder="Ask something about the image or chat without an image...",
115
+ lines=3
116
+ )
117
+ with gr.Row():
118
+ enable_thinking = gr.Checkbox(value=False, label="Enable thinking mode")
119
+ send_btn = gr.Button("Send", variant="primary")
120
+ clear_btn = gr.Button("Clear")
121
+
122
+ with gr.Column(scale=1):
123
+ gr.Markdown("### Model")
124
+ gr.Markdown(f"- ID: `{MODEL_ID}`\n- Device: CPU\n- Quant: int4")
125
+
126
+ # Events
127
+ send_btn.click(
128
+ fn=respond,
129
+ inputs=[user_in, img, chatbot, enable_thinking],
130
+ outputs=[chatbot, user_in]
131
+ )
132
+ user_in.submit(
133
+ fn=respond,
134
+ inputs=[user_in, img, chatbot, enable_thinking],
135
+ outputs=[chatbot, user_in]
136
+ )
137
+ clear_btn.click(
138
+ fn=clear_history,
139
+ inputs=[],
140
+ outputs=[chatbot, img, user_in]
141
+ )
142
+
143
+ return demo
144
+
145
+
146
+ if __name__ == "__main__":
147
+ # Make sure we don't accidentally spawn CUDA context
148
+ os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
149
+ demo = demo_app()
150
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))