Derr11 commited on
Commit
3b87138
·
verified ·
1 Parent(s): d421e34

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -139
app.py CHANGED
@@ -3,7 +3,7 @@ import torch
3
  import gradio as gr
4
  import spaces
5
  from PIL import Image
6
- from transformers import AutoModel, AutoTokenizer
7
  import warnings
8
  warnings.filterwarnings("ignore")
9
 
@@ -27,25 +27,25 @@ def load_model():
27
 
28
  print(f"Loading {MODEL_ID}...")
29
 
30
- # استخدام float16 بدلاً من bfloat16 للتوافق مع ZeroGPU
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
  dtype = torch.float16 if torch.cuda.is_available() else torch.float32
33
 
34
  try:
35
- # تحميل tokenizer
36
  tokenizer = AutoTokenizer.from_pretrained(
37
  MODEL_ID,
38
  trust_remote_code=True,
39
  use_fast=False
40
  )
41
 
42
- # تحميل النموذج مع إعدادات آمنة لـ ZeroGPU
43
  model = AutoModel.from_pretrained(
44
  MODEL_ID,
45
  trust_remote_code=True,
46
  torch_dtype=dtype,
47
  low_cpu_mem_usage=True,
48
- attn_implementation="eager", # استخدام eager بدلاً من flash_attention
49
  ).eval()
50
 
51
  if torch.cuda.is_available():
@@ -54,23 +54,41 @@ def load_model():
54
  print("Model loaded successfully!")
55
 
56
  except Exception as e:
57
- print(f"Error loading model: {e}")
58
- # محاولة تحميل بديلة بدون trust_remote_code
 
59
  try:
60
- from transformers import AutoModelForCausalLM
61
  model = AutoModelForCausalLM.from_pretrained(
62
  MODEL_ID,
 
63
  torch_dtype=dtype,
64
  low_cpu_mem_usage=True,
 
65
  ).eval()
66
 
67
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
68
-
69
  if torch.cuda.is_available():
70
  model = model.cuda()
 
 
71
 
72
  except Exception as e2:
73
- raise RuntimeError(f"Failed to load model: {e2}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  # =========================================================
@@ -96,75 +114,78 @@ def generate_response(
96
  load_model()
97
  global model, tokenizer
98
 
99
- # إعداد الرسائل
100
  if image_input is not None:
101
  # معالجة الصورة + النص
 
 
102
  if not text_input:
103
  text_input = "What is shown in this image? Please describe in detail."
104
 
105
- # تحضير المدخل للنموذج
106
- msgs = [{"role": "user", "content": [image_input, text_input]}]
107
-
108
- # استخدام طريقة chat الخاصة بالنموذج
109
- with torch.no_grad():
110
- if hasattr(model, 'chat'):
111
- response = model.chat(
112
- image=image_input,
113
- msgs=msgs,
114
- tokenizer=tokenizer,
115
- sampling=True,
116
- temperature=temperature,
117
- top_p=top_p,
118
- max_new_tokens=max_new_tokens
119
- )
120
- else:
121
- # fallback للنماذج التي لا تدعم chat
122
- inputs = tokenizer(text_input, return_tensors="pt")
123
- if torch.cuda.is_available():
124
- inputs = inputs.to("cuda")
125
 
126
- outputs = model.generate(
127
- **inputs,
128
- max_new_tokens=max_new_tokens,
129
- temperature=temperature,
130
- top_p=top_p,
131
- do_sample=True
132
- )
 
 
 
133
 
134
- response = tokenizer.decode(
135
- outputs[0][inputs['input_ids'].shape[1]:],
136
- skip_special_tokens=True
137
- )
138
- else:
139
- # نص فقط
140
- inputs = tokenizer(
141
- text_input,
142
- return_tensors="pt",
143
- padding=True,
144
- truncation=True,
145
- max_length=2048
146
- )
147
-
148
- if torch.cuda.is_available():
149
- inputs = inputs.to("cuda")
150
 
151
- with torch.no_grad():
152
- outputs = model.generate(
153
- **inputs,
154
- max_new_tokens=max_new_tokens,
155
- temperature=temperature,
156
- top_p=top_p,
157
- do_sample=True,
158
- pad_token_id=tokenizer.pad_token_id,
159
- eos_token_id=tokenizer.eos_token_id
160
- )
161
 
162
- response = tokenizer.decode(
163
- outputs[0][inputs['input_ids'].shape[1]:],
164
- skip_special_tokens=True
165
- )
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  except Exception as e:
170
  import traceback
@@ -172,6 +193,20 @@ def generate_response(
172
  return f"Error: {str(e)}"
173
 
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  # =========================================================
176
  # واجهة Gradio
177
  # =========================================================
@@ -179,83 +214,142 @@ def generate_response(
179
  def create_demo():
180
  """إنشاء واجهة Gradio البسيطة"""
181
 
182
- with gr.Blocks(title="MiniCPM-o-2.6") as demo:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  gr.Markdown(
184
  """
185
- # 🤖 MiniCPM-o-2.6 - Multimodal AI
186
 
187
- **Capabilities:**
188
- - 🖼️ Image Understanding (OCR, description, analysis)
189
- - 💬 Text Generation
190
- - 🧠 8B parameters with GPT-4 level performance
191
-
192
- Enter your text or upload an image to start!
193
  """
194
  )
195
 
196
  with gr.Row():
 
197
  with gr.Column(scale=2):
198
- text_input = gr.Textbox(
199
- label="Text Input",
200
- placeholder="Enter your question or prompt...",
201
- lines=3
202
- )
203
-
204
- image_input = gr.Image(
205
- label="Image Input (Optional)",
206
- type="pil"
207
- )
 
 
 
208
 
209
  with gr.Row():
210
- submit_btn = gr.Button("🚀 Generate", variant="primary")
211
- clear_btn = gr.Button("🗑️ Clear")
 
 
 
 
 
 
 
 
212
 
213
  output = gr.Textbox(
214
- label="Response",
215
- lines=8,
216
- interactive=False
 
217
  )
218
 
 
219
  with gr.Column(scale=1):
220
- gr.Markdown("### ⚙️ Settings")
221
-
222
- temperature = gr.Slider(
223
- label="Temperature",
224
- minimum=0.1,
225
- maximum=1.0,
226
- value=0.7,
227
- step=0.1,
228
- info="Higher = more creative"
229
- )
230
-
231
- top_p = gr.Slider(
232
- label="Top-p",
233
- minimum=0.1,
234
- maximum=1.0,
235
- value=0.9,
236
- step=0.05,
237
- info="Nucleus sampling"
238
- )
239
-
240
- max_new_tokens = gr.Slider(
241
- label="Max Tokens",
242
- minimum=50,
243
- maximum=1024,
244
- value=512,
245
- step=50,
246
- info="Maximum response length"
247
- )
 
248
 
249
  gr.Markdown(
250
  """
251
- ### 📝 Tips:
252
- - For images: Upload and ask questions
253
- - Supports OCR and image analysis
254
- - Can handle multiple languages
 
 
 
 
 
 
 
 
 
 
 
 
255
  """
256
  )
257
 
258
- # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  submit_btn.click(
260
  fn=generate_response,
261
  inputs=[text_input, image_input, temperature, top_p, max_new_tokens],
@@ -263,31 +357,36 @@ def create_demo():
263
  api_name="generate"
264
  )
265
 
 
 
 
 
 
 
266
  clear_btn.click(
267
- fn=lambda: (None, None, ""),
268
  inputs=[],
269
  outputs=[text_input, image_input, output]
270
  )
271
 
272
- # Examples
273
- gr.Examples(
274
- examples=[
275
- ["What is artificial intelligence?", None],
276
- ["Explain quantum computing in simple terms", None],
277
- ["Write a poem about nature", None],
278
- ],
279
- inputs=[text_input, image_input],
280
- outputs=output,
281
- fn=lambda t, i: generate_response(t, i, 0.7, 0.9, 512),
282
- cache_examples=False
283
  )
284
 
285
  return demo
286
 
287
 
 
 
 
 
288
  if __name__ == "__main__":
289
  demo = create_demo()
290
  demo.launch(
291
  ssr_mode=False,
292
- show_error=True
 
293
  )
 
3
  import gradio as gr
4
  import spaces
5
  from PIL import Image
6
+ from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM
7
  import warnings
8
  warnings.filterwarnings("ignore")
9
 
 
27
 
28
  print(f"Loading {MODEL_ID}...")
29
 
30
+ # استخدام float16 للتوافق مع ZeroGPU
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
  dtype = torch.float16 if torch.cuda.is_available() else torch.float32
33
 
34
  try:
35
+ # تحميل tokenizer أولاً
36
  tokenizer = AutoTokenizer.from_pretrained(
37
  MODEL_ID,
38
  trust_remote_code=True,
39
  use_fast=False
40
  )
41
 
42
+ # تحميل النموذج مع trust_remote_code=True
43
  model = AutoModel.from_pretrained(
44
  MODEL_ID,
45
  trust_remote_code=True,
46
  torch_dtype=dtype,
47
  low_cpu_mem_usage=True,
48
+ attn_implementation="eager",
49
  ).eval()
50
 
51
  if torch.cuda.is_available():
 
54
  print("Model loaded successfully!")
55
 
56
  except Exception as e:
57
+ print(f"Error with AutoModel, trying AutoModelForCausalLM: {e}")
58
+
59
+ # محاولة بديلة مع AutoModelForCausalLM
60
  try:
 
61
  model = AutoModelForCausalLM.from_pretrained(
62
  MODEL_ID,
63
+ trust_remote_code=True, # مهم جداً!
64
  torch_dtype=dtype,
65
  low_cpu_mem_usage=True,
66
+ attn_implementation="eager"
67
  ).eval()
68
 
 
 
69
  if torch.cuda.is_available():
70
  model = model.cuda()
71
+
72
+ print("Model loaded successfully with AutoModelForCausalLM!")
73
 
74
  except Exception as e2:
75
+ print(f"Failed to load model: {e2}")
76
+ raise RuntimeError(f"Could not load model: {e2}")
77
+
78
+
79
+ # =========================================================
80
+ # دالة معالجة الصور
81
+ # =========================================================
82
+
83
+ def process_image(image_input):
84
+ """معالجة الصورة للنموذج"""
85
+ if image_input is None:
86
+ return None
87
+
88
+ if isinstance(image_input, str):
89
+ return Image.open(image_input).convert('RGB')
90
+ else:
91
+ return image_input.convert('RGB')
92
 
93
 
94
  # =========================================================
 
114
  load_model()
115
  global model, tokenizer
116
 
117
+ # إعداد المدخلات
118
  if image_input is not None:
119
  # معالجة الصورة + النص
120
+ image = process_image(image_input)
121
+
122
  if not text_input:
123
  text_input = "What is shown in this image? Please describe in detail."
124
 
125
+ # التحقق من وجود دالة chat في النموذج
126
+ if hasattr(model, 'chat'):
127
+ try:
128
+ # استخدام دالة chat المخصصة
129
+ msgs = [{"role": "user", "content": [image, text_input]}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ with torch.no_grad():
132
+ response = model.chat(
133
+ image=image,
134
+ msgs=msgs,
135
+ tokenizer=tokenizer,
136
+ sampling=True,
137
+ temperature=temperature,
138
+ top_p=top_p,
139
+ max_new_tokens=max_new_tokens
140
+ )
141
 
142
+ return response
143
+
144
+ except Exception as e:
145
+ print(f"Chat method failed: {e}")
146
+ # السقوط إلى الطريقة العادية
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ # الطريقة البديلة للصور
149
+ # دمج النص مع وصف الصورة
150
+ prompt = f"Image: [Image will be processed]\n\nQuestion: {text_input}\n\nAnswer:"
 
 
 
 
 
 
 
151
 
152
+ else:
153
+ # نص فقط
154
+ prompt = text_input
155
+
156
+ # المعالجة العادية للنص
157
+ inputs = tokenizer(
158
+ prompt,
159
+ return_tensors="pt",
160
+ padding=True,
161
+ truncation=True,
162
+ max_length=2048
163
+ )
164
+
165
+ if torch.cuda.is_available():
166
+ inputs = {k: v.cuda() for k, v in inputs.items() if v is not None}
167
 
168
+ # إعدادات التوليد
169
+ gen_kwargs = {
170
+ "max_new_tokens": max_new_tokens,
171
+ "temperature": temperature if temperature > 0 else 1e-7,
172
+ "top_p": top_p,
173
+ "do_sample": temperature > 0,
174
+ "pad_token_id": tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id,
175
+ "eos_token_id": tokenizer.eos_token_id,
176
+ }
177
+
178
+ # التوليد
179
+ with torch.no_grad():
180
+ outputs = model.generate(**inputs, **gen_kwargs)
181
+
182
+ # فك التشفير
183
+ response = tokenizer.decode(
184
+ outputs[0][inputs['input_ids'].shape[1]:],
185
+ skip_special_tokens=True
186
+ )
187
+
188
+ return response.strip()
189
 
190
  except Exception as e:
191
  import traceback
 
193
  return f"Error: {str(e)}"
194
 
195
 
196
+ # =========================================================
197
+ # دوال مساعدة للواجهة
198
+ # =========================================================
199
+
200
+ def clear_all():
201
+ """مسح جميع المدخلات والمخرجات"""
202
+ return "", None, ""
203
+
204
+
205
+ def update_examples_visibility(show_examples):
206
+ """تحديث رؤية الأمثلة"""
207
+ return gr.update(visible=show_examples)
208
+
209
+
210
  # =========================================================
211
  # واجهة Gradio
212
  # =========================================================
 
214
  def create_demo():
215
  """إنشاء واجهة Gradio البسيطة"""
216
 
217
+ with gr.Blocks(title="MiniCPM-o-2.6", css="""
218
+ .gradio-container {
219
+ max-width: 1200px;
220
+ margin: auto;
221
+ }
222
+ h1 {
223
+ text-align: center;
224
+ }
225
+ .contain {
226
+ background: white;
227
+ border-radius: 10px;
228
+ padding: 20px;
229
+ }
230
+ """) as demo:
231
+
232
  gr.Markdown(
233
  """
234
+ # 🤖 MiniCPM-o-2.6 - Multimodal AI Assistant
235
 
236
+ <div style="text-align: center;">
237
+ <p>
238
+ <b>8B parameters model</b> with GPT-4 level performance<br>
239
+ Supports: Text Generation, Image Understanding, OCR, and Multi-lingual conversations
240
+ </p>
241
+ </div>
242
  """
243
  )
244
 
245
  with gr.Row():
246
+ # العمود الرئيسي
247
  with gr.Column(scale=2):
248
+ with gr.Group():
249
+ text_input = gr.Textbox(
250
+ label="💭 Text Input",
251
+ placeholder="Enter your question or prompt here...\nYou can ask about images, request text generation, or have a conversation.",
252
+ lines=4,
253
+ elem_id="text_input"
254
+ )
255
+
256
+ image_input = gr.Image(
257
+ label="📷 Image Input (Optional)",
258
+ type="pil",
259
+ elem_id="image_input"
260
+ )
261
 
262
  with gr.Row():
263
+ submit_btn = gr.Button(
264
+ "🚀 Generate Response",
265
+ variant="primary",
266
+ scale=2
267
+ )
268
+ clear_btn = gr.Button(
269
+ "🗑️ Clear All",
270
+ variant="secondary",
271
+ scale=1
272
+ )
273
 
274
  output = gr.Textbox(
275
+ label="🤖 AI Response",
276
+ lines=10,
277
+ interactive=False,
278
+ elem_id="output"
279
  )
280
 
281
+ # عمود الإعدادات
282
  with gr.Column(scale=1):
283
+ with gr.Group():
284
+ gr.Markdown("### ⚙️ Generation Settings")
285
+
286
+ temperature = gr.Slider(
287
+ label="Temperature",
288
+ minimum=0.0,
289
+ maximum=1.5,
290
+ value=0.7,
291
+ step=0.1,
292
+ info="Controls randomness (0=deterministic, 1.5=very creative)"
293
+ )
294
+
295
+ top_p = gr.Slider(
296
+ label="Top-p (Nucleus Sampling)",
297
+ minimum=0.1,
298
+ maximum=1.0,
299
+ value=0.9,
300
+ step=0.05,
301
+ info="Controls diversity of output"
302
+ )
303
+
304
+ max_new_tokens = gr.Slider(
305
+ label="Max New Tokens",
306
+ minimum=50,
307
+ maximum=2048,
308
+ value=512,
309
+ step=50,
310
+ info="Maximum length of generated response"
311
+ )
312
 
313
  gr.Markdown(
314
  """
315
+ ### 📚 Quick Tips:
316
+
317
+ **Text Generation:**
318
+ - Ask questions
319
+ - Request explanations
320
+ - Generate creative content
321
+
322
+ **Image Understanding:**
323
+ - Upload an image
324
+ - Ask about contents
325
+ - Request OCR/text extraction
326
+ - Get detailed descriptions
327
+
328
+ **Languages:**
329
+ - English, Chinese, Arabic
330
+ - And many more!
331
  """
332
  )
333
 
334
+ # أمثلة
335
+ with gr.Group():
336
+ gr.Markdown("### 💡 Example Prompts")
337
+ gr.Examples(
338
+ examples=[
339
+ ["Explain quantum computing in simple terms for a beginner.", None],
340
+ ["Write a short story about a robot learning to paint.", None],
341
+ ["What are the main differences between Python and JavaScript?", None],
342
+ ["Create a healthy meal plan for one week.", None],
343
+ ["Translate 'Hello, how are you?' to French, Spanish, and Arabic.", None],
344
+ ],
345
+ inputs=[text_input, image_input],
346
+ outputs=output,
347
+ fn=lambda t, i: generate_response(t, i, 0.7, 0.9, 512),
348
+ cache_examples=False,
349
+ label="Click any example to try it"
350
+ )
351
+
352
+ # ربط الأحداث
353
  submit_btn.click(
354
  fn=generate_response,
355
  inputs=[text_input, image_input, temperature, top_p, max_new_tokens],
 
357
  api_name="generate"
358
  )
359
 
360
+ text_input.submit(
361
+ fn=generate_response,
362
+ inputs=[text_input, image_input, temperature, top_p, max_new_tokens],
363
+ outputs=output
364
+ )
365
+
366
  clear_btn.click(
367
+ fn=clear_all,
368
  inputs=[],
369
  outputs=[text_input, image_input, output]
370
  )
371
 
372
+ # رسالة ترحيبية عند التحميل
373
+ demo.load(
374
+ lambda: gr.Info("Model is loading... This may take a moment on first use."),
375
+ inputs=None,
376
+ outputs=None
 
 
 
 
 
 
377
  )
378
 
379
  return demo
380
 
381
 
382
+ # =========================================================
383
+ # تشغيل التطبيق
384
+ # =========================================================
385
+
386
  if __name__ == "__main__":
387
  demo = create_demo()
388
  demo.launch(
389
  ssr_mode=False,
390
+ show_error=True,
391
+ share=False
392
  )