Derr11 commited on
Commit
5f4403f
·
verified ·
1 Parent(s): de94a22

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -21
app.py CHANGED
@@ -23,7 +23,10 @@ processor = None
23
 
24
 
25
  def load_model():
26
- """تحميل Qwen3-Omni والمعالج عند أول استدعاء فقط (على ZeroGPU)."""
 
 
 
27
  global model, processor
28
 
29
  if model is not None and processor is not None:
@@ -31,27 +34,28 @@ def load_model():
31
 
32
  print(f"[ZeroGPU] Loading model from: {MODEL_PATH}")
33
 
34
- try:
35
- local_model = Qwen3OmniMoeForConditionalGeneration.from_pretrained(
36
- MODEL_PATH,
37
- dtype="auto",
38
- device_map="auto",
39
- attn_implementation="flash_attention_2",
40
- )
41
- print("[ZeroGPU] Model loaded with flash_attention_2.")
42
- except Exception as e:
43
- print(f"[ZeroGPU] flash_attention_2 failed, falling back. Error: {e}")
44
- local_model = Qwen3OmniMoeForConditionalGeneration.from_pretrained(
45
- MODEL_PATH,
46
- dtype="auto",
47
- device_map="auto",
48
- )
49
- print("[ZeroGPU] Model loaded with default attention.")
50
 
51
  local_processor = Qwen3OmniMoeProcessor.from_pretrained(MODEL_PATH)
52
 
53
  model = local_model
54
  processor = local_processor
 
55
 
56
 
57
  def build_messages_from_history(history, system_prompt, user_text, image, audio_path, video_path):
@@ -122,10 +126,14 @@ def qwen3_omni_inference(
122
  top_p,
123
  max_tokens,
124
  ):
125
- """تنفيذ الاستدلال الفعلي على ZeroGPU."""
 
 
 
 
126
 
127
  if not (user_text or image is not None or audio_path or video_path):
128
- # لا يوجد مدخل
129
  return history, None, "", None, None, None
130
 
131
  load_model()
@@ -140,17 +148,20 @@ def qwen3_omni_inference(
140
  video_path=video_path,
141
  )
142
 
 
143
  text_prompt = processor.apply_chat_template(
144
  messages,
145
  add_generation_prompt=True,
146
  tokenize=False,
147
  )
148
 
 
149
  audios, images, videos = process_mm_info(
150
  messages,
151
  use_audio_in_video=USE_AUDIO_IN_VIDEO,
152
  )
153
 
 
154
  inputs = processor(
155
  text=text_prompt,
156
  audio=audios,
@@ -161,8 +172,13 @@ def qwen3_omni_inference(
161
  use_audio_in_video=USE_AUDIO_IN_VIDEO,
162
  )
163
 
164
- inputs = inputs.to(model.device).to(model.dtype)
 
 
 
 
165
 
 
166
  gen_kwargs = dict(
167
  temperature=float(temperature),
168
  top_p=float(top_p),
@@ -171,6 +187,7 @@ def qwen3_omni_inference(
171
  use_audio_in_video=USE_AUDIO_IN_VIDEO,
172
  )
173
 
 
174
  if not return_audio:
175
  gen_kwargs["return_audio"] = False
176
  text_ids, _ = model.generate(**inputs, **gen_kwargs)
@@ -179,15 +196,19 @@ def qwen3_omni_inference(
179
  gen_kwargs["speaker"] = speaker
180
  text_ids, audio_out = model.generate(**inputs, **gen_kwargs)
181
 
 
 
182
  generated_text = processor.batch_decode(
183
- text_ids.sequences[:, inputs["input_ids"].shape[1]:],
184
  skip_special_tokens=True,
185
  clean_up_tokenization_spaces=False,
186
  )[0]
187
 
 
188
  user_display = user_text if (user_text and user_text.strip()) else "[Multimodal message]"
189
  history = history + [[user_display, generated_text]]
190
 
 
191
  gr_audio = None
192
  if audio_out is not None:
193
  audio_np = audio_out.reshape(-1).detach().cpu().numpy()
 
23
 
24
 
25
  def load_model():
26
+ """
27
+ تحميل Qwen3-Omni والمعالج عند أول استدعاء فقط (على ZeroGPU).
28
+ تم إلغاء flash_attention_2 و device_map='auto' لتجنب مشاكل الاستدعاء.
29
+ """
30
  global model, processor
31
 
32
  if model is not None and processor is not None:
 
34
 
35
  print(f"[ZeroGPU] Loading model from: {MODEL_PATH}")
36
 
37
+ # نحدد نوع البيانات والجهاز
38
+ if torch.cuda.is_available():
39
+ torch_dtype = torch.bfloat16
40
+ device = "cuda"
41
+ else:
42
+ torch_dtype = torch.float32
43
+ device = "cpu"
44
+
45
+ # تحميل النموذج بدون flash_attention_2 ولا device_map="auto"
46
+ local_model = Qwen3OmniMoeForConditionalGeneration.from_pretrained(
47
+ MODEL_PATH,
48
+ torch_dtype=torch_dtype,
49
+ attn_implementation="eager", # الأكثر أماناً في هذه البيئة
50
+ )
51
+
52
+ local_model.to(device)
53
 
54
  local_processor = Qwen3OmniMoeProcessor.from_pretrained(MODEL_PATH)
55
 
56
  model = local_model
57
  processor = local_processor
58
+ print(f"[ZeroGPU] Model loaded on {device} with dtype {torch_dtype}.")
59
 
60
 
61
  def build_messages_from_history(history, system_prompt, user_text, image, audio_path, video_path):
 
126
  top_p,
127
  max_tokens,
128
  ):
129
+ """
130
+ تنفيذ الاستدلال الفعلي على ZeroGPU:
131
+ - نص + صورة + صوت + فيديو
132
+ - مخرج نصي دائماً، وصوتي عند الحاجة
133
+ """
134
 
135
  if not (user_text or image is not None or audio_path or video_path):
136
+ # لا يوجد مدخل من المستخدم
137
  return history, None, "", None, None, None
138
 
139
  load_model()
 
148
  video_path=video_path,
149
  )
150
 
151
+ # بناء النص من المحادثة (chat template)
152
  text_prompt = processor.apply_chat_template(
153
  messages,
154
  add_generation_prompt=True,
155
  tokenize=False,
156
  )
157
 
158
+ # تجهيز الوسائط المتعددة (صوت/صورة/فيديو)
159
  audios, images, videos = process_mm_info(
160
  messages,
161
  use_audio_in_video=USE_AUDIO_IN_VIDEO,
162
  )
163
 
164
+ # تحويل إلى تينسورات
165
  inputs = processor(
166
  text=text_prompt,
167
  audio=audios,
 
172
  use_audio_in_video=USE_AUDIO_IN_VIDEO,
173
  )
174
 
175
+ # نقل المدخلات إلى نفس الجهاز ونفس dtype للنموذج
176
+ first_param = next(model.parameters())
177
+ device = first_param.device
178
+ dtype = first_param.dtype
179
+ inputs = inputs.to(device=device, dtype=dtype)
180
 
181
+ # بارامترات التوليد
182
  gen_kwargs = dict(
183
  temperature=float(temperature),
184
  top_p=float(top_p),
 
187
  use_audio_in_video=USE_AUDIO_IN_VIDEO,
188
  )
189
 
190
+ # توليد النص فقط أو نص + صوت
191
  if not return_audio:
192
  gen_kwargs["return_audio"] = False
193
  text_ids, _ = model.generate(**inputs, **gen_kwargs)
 
196
  gen_kwargs["speaker"] = speaker
197
  text_ids, audio_out = model.generate(**inputs, **gen_kwargs)
198
 
199
+ # فك ترميز النص الناتج
200
+ input_len = inputs["input_ids"].shape[1]
201
  generated_text = processor.batch_decode(
202
+ text_ids.sequences[:, input_len:],
203
  skip_special_tokens=True,
204
  clean_up_tokenization_spaces=False,
205
  )[0]
206
 
207
+ # تحديث تاريخ الدردشة
208
  user_display = user_text if (user_text and user_text.strip()) else "[Multimodal message]"
209
  history = history + [[user_display, generated_text]]
210
 
211
+ # تجهيز الصوت لمخرج Gradio (إن وجد)
212
  gr_audio = None
213
  if audio_out is not None:
214
  audio_np = audio_out.reshape(-1).detach().cpu().numpy()