Hug0endob commited on
Commit
b9f10dd
·
verified ·
1 Parent(s): 62ffc11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -144
app.py CHANGED
@@ -11,15 +11,16 @@ from io import BytesIO
11
  from typing import List, Tuple
12
  from PIL import Image, ImageFile, UnidentifiedImageError
13
  import gradio as gr
 
 
14
 
15
- # Import Mistral client in the same way original code did.
16
- # If you have a different client interface, adjust get_client/upload_file_to_mistral accordingly.
17
  try:
18
  from mistralai import Mistral
19
  except Exception:
20
- Mistral = None # Fallback; upload will use raw HTTP if needed
21
 
22
- # --- Configuration / constants ---
23
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
24
  PIXTRAL_MODEL = "pixtral-12b-2409"
25
  VIDEO_MODEL = "voxtral-mini-latest"
@@ -35,16 +36,15 @@ SYSTEM_INSTRUCTION = (
35
  "Do not invent sensory information not present in the media."
36
  )
37
 
38
- # Pillow config
39
  ImageFile.LOAD_TRUNCATED_IMAGES = True
40
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
41
 
42
- # --- Utilities ---
43
  def get_client(key: str | None = None):
44
  api_key = (key or "").strip() or DEFAULT_KEY
45
  if Mistral is None:
46
- # If mistralai package is not available, return a thin object with api_key attr for upload fallback.
47
- class Dummy:
48
  def __init__(self, k): self.api_key = k
49
  return Dummy(api_key)
50
  return Mistral(api_key=api_key)
@@ -58,7 +58,7 @@ def ext_from_src(src: str) -> str:
58
  _, ext = os.path.splitext((src or "").split("?")[0])
59
  return ext.lower()
60
 
61
- def safe_head(url: str, timeout: int = 6) -> requests.Response | None:
62
  try:
63
  r = requests.head(url, timeout=timeout, allow_redirects=True)
64
  if r.status_code >= 400:
@@ -67,20 +67,18 @@ def safe_head(url: str, timeout: int = 6) -> requests.Response | None:
67
  except Exception:
68
  return None
69
 
70
- def safe_get(url: str, timeout: int = 15) -> requests.Response:
71
  r = requests.get(url, timeout=timeout)
72
  r.raise_for_status()
73
  return r
74
 
75
  def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60) -> bytes:
76
  if is_remote(src):
77
- # try HEAD to learn content-length
78
  head = safe_head(src)
79
  if head is not None:
80
  cl = head.headers.get("content-length")
81
  try:
82
  if cl and int(cl) > stream_threshold:
83
- # stream download to temp file to avoid memory spike
84
  with requests.get(src, timeout=timeout, stream=True) as r:
85
  r.raise_for_status()
86
  fd, p = tempfile.mkstemp()
@@ -96,9 +94,7 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
96
  try: os.remove(p)
97
  except Exception: pass
98
  except Exception:
99
- # fallthrough to simple get
100
  pass
101
- # regular GET
102
  r = safe_get(src, timeout=timeout)
103
  return r.content
104
  else:
@@ -122,15 +118,14 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
122
  if img.mode != "RGB":
123
  img = img.convert("RGB")
124
  h = base_h
125
- # maintain aspect
126
  w = max(1, int(img.width * (h / img.height)))
127
  img = img.resize((w, h), Image.LANCZOS)
128
  buf = BytesIO()
129
  img.save(buf, format="JPEG", quality=85)
130
  return buf.getvalue()
131
 
132
- def b64_jpeg(img_bytes: bytes) -> str:
133
- return base64.b64encode(img_bytes).decode("utf-8")
134
 
135
  def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
136
  frames = []
@@ -153,59 +148,37 @@ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_ex
153
  except Exception: pass
154
  return frames
155
 
156
- # --- Mistral interaction helpers ---
157
- def build_messages_for_image(prompt: str, b64_jpg: str):
158
- content = f"{prompt}\n\nImage (base64): data:image/jpeg;base64,{b64_jpg}"
159
- return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
160
-
161
- def build_messages_for_text(prompt: str, extra: str):
162
- return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": f"{prompt}\n\n{extra}"}]
163
-
164
- def extract_text_from_response(res, parts: list):
165
- try:
166
- choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
167
- except Exception:
168
- choices = []
169
- if not choices:
170
- parts.append(str(res))
171
- return
172
- try:
173
- first = choices[0]
174
- msg = first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first)
175
- if isinstance(msg, dict):
176
- content = msg.get("content")
177
- else:
178
- content = getattr(msg, "content", None)
179
- if isinstance(content, str):
180
- parts.append(content)
181
- else:
182
- parts.append(str(content))
183
- except Exception:
184
- parts.append(str(res))
185
-
186
- def chat_complete(client, model: str, messages: list) -> str:
187
- # Prefer client.chat.complete if available; otherwise attempt REST call
188
  parts = []
189
  try:
190
  if hasattr(client, "chat") and hasattr(client.chat, "complete"):
191
  res = client.chat.complete(model=model, messages=messages, stream=False)
 
192
  else:
193
- # Try basic HTTP request (Mistral REST)
194
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
195
- url = f"https://api.mistral.ai/v1/chat/completions"
196
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
197
- payload = {"model": model, "messages": messages}
198
- r = requests.post(url, json=payload, headers=headers, timeout=120)
199
  r.raise_for_status()
200
  res = r.json()
201
- extract_text_from_response(res, parts)
 
 
 
 
 
 
 
 
 
 
 
202
  except Exception as e:
203
- parts.append(f"Error during model call: {e}")
204
- return "".join(parts).strip()
205
 
206
  def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch") -> str:
207
  fname = filename or os.path.basename(path)
208
- # Prefer SDK upload if available
209
  try:
210
  if hasattr(client, "files") and hasattr(client.files, "upload"):
211
  with open(path, "rb") as fh:
@@ -216,7 +189,6 @@ def upload_file_to_mistral(client, path: str, filename: str | None = None, purpo
216
  return fid
217
  except Exception:
218
  pass
219
- # Fallback to HTTP upload
220
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
221
  url = "https://api.mistral.ai/v1/files"
222
  headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
@@ -228,106 +200,112 @@ def upload_file_to_mistral(client, path: str, filename: str | None = None, purpo
228
  jr = r.json()
229
  return jr.get("id") or jr.get("data", [{}])[0].get("id")
230
 
231
- def analyze_image(client, img_bytes: bytes, prompt: str) -> str:
232
- jpeg = convert_to_jpeg_bytes(img_bytes, base_h=640)
233
- b64 = b64_jpeg(jpeg)
234
- msgs = build_messages_for_image(prompt, b64)
235
- return chat_complete(client, PIXTRAL_MODEL, msgs)
236
-
237
- def analyze_frames_and_consolidate(client, frames: List[bytes], prompt: str) -> str:
238
- per_frame = []
239
- for i, fb in enumerate(frames):
240
- txt = analyze_image(client, fb, f"{prompt}\n\nFrame index: {i + 1}")
241
- per_frame.append(f"Frame {i + 1} analysis:\n{txt}")
242
- consolidation = (
243
- f"{prompt}\n\n"
244
- "Consolidate the key consistent observations across the provided frame analyses below. "
245
- "List consistent findings first, then note any differences between frames.\n\n"
246
- + "\n\n".join(per_frame)
247
- )
248
- msgs = build_messages_for_text(consolidation, "")
249
- summary = chat_complete(client, PIXTRAL_MODEL, msgs)
250
- return "\n\n".join(per_frame + [f"Consolidated summary:\n{summary}"])
251
-
252
- # --- Core processing ---
253
- def determine_media_type_from_remote(url: str) -> Tuple[bool, bool]:
254
- """
255
- Returns (is_image, is_video) based on HEAD content-type or URL extension
256
- """
257
- is_image = False
258
- is_video = False
259
- if not url:
260
- return is_image, is_video
261
- ext = ext_from_src(url)
262
- if ext in IMAGE_EXTS:
263
- is_image = True
264
- if ext in VIDEO_EXTS:
265
- is_video = True
266
- head = safe_head(url)
267
- if head is not None:
268
- ctype = (head.headers.get("content-type") or "").lower()
269
- if ctype.startswith("video/"):
270
- is_video = True; is_image = False
271
- elif ctype.startswith("image/"):
272
- is_image = True; is_video = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  return is_image, is_video
274
 
275
  def process_media(src: str, custom_prompt: str, api_key: str) -> str:
276
  client = get_client(api_key)
277
- prompt = custom_prompt.strip() or "Please provide a detailed visual review."
278
- ext = ext_from_src(src)
279
- is_image = ext in IMAGE_EXTS
280
- is_video = ext in VIDEO_EXTS
281
- if is_remote(src):
282
- ri, rv = determine_media_type_from_remote(src)
283
- if ri or rv:
284
- is_image, is_video = ri, rv
285
  if is_image:
286
  try:
287
  raw = fetch_bytes(src)
288
  except Exception as e:
289
  return f"Error fetching image: {e}"
290
  try:
291
- return analyze_image(client, raw, prompt)
292
  except UnidentifiedImageError:
293
  return "Error: provided file is not a valid image."
294
  except Exception as e:
295
- return f"Error processing image: {e}"
296
  if is_video:
297
  try:
298
  raw = fetch_bytes(src, timeout=120)
299
  except Exception as e:
300
  return f"Error fetching video: {e}"
301
- tmp_suffix = ext or ".mp4"
302
- tmp_path = save_bytes_to_temp(raw, suffix=tmp_suffix)
303
  try:
304
- # Try uploading file to Mistral first
305
- try:
306
- file_id = upload_file_to_mistral(client, tmp_path, filename=os.path.basename(src.split("?")[0]))
307
- extra = f"Uploaded video to Mistral Files with id: {file_id}\n\nInstruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
308
- msgs = build_messages_for_text(prompt, extra)
309
- return chat_complete(client, VIDEO_MODEL, msgs)
310
- except Exception:
311
- # fallback to extracting frames
312
- frames = extract_best_frames_bytes(tmp_path, sample_count=5)
313
- if not frames:
314
- return "Error: could not upload remote video and no frames extracted (ffmpeg missing or extraction failed)."
315
- return analyze_frames_and_consolidate(client, frames, prompt)
316
  finally:
317
- try: os.remove(tmp_path)
318
  except Exception: pass
319
- return "Unable to determine media type from the provided URL or file extension."
 
 
 
 
 
320
 
321
- # --- Gradio app UI helpers ---
322
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
323
 
324
  def load_preview(url: str):
325
- # Returns (preview_image, preview_video) where only one is visible at a time
326
  empty_img = gr.update(value=None, visible=False)
327
  empty_vid = gr.update(value=None, visible=False)
328
  if not url:
329
  return empty_img, empty_vid
330
- # Local file
331
  if not is_remote(url) and os.path.exists(url):
332
  ext = ext_from_src(url)
333
  if ext in VIDEO_EXTS:
@@ -335,30 +313,25 @@ def load_preview(url: str):
335
  if ext in IMAGE_EXTS:
336
  try:
337
  img = Image.open(url)
338
- if getattr(img, "is_animated", False):
339
- img.seek(0)
340
  return gr.update(value=img.convert("RGB"), visible=True), empty_vid
341
  except Exception:
342
  return empty_img, empty_vid
343
- # Remote: first try HEAD
344
  head = safe_head(url)
345
  if head:
346
  ctype = (head.headers.get("content-type") or "").lower()
347
  if ctype.startswith("video/") or any(url.lower().split("?")[0].endswith(ext) for ext in VIDEO_EXTS):
348
  return empty_img, gr.update(value=url, visible=True)
349
- # Finally try GET and attempt to open as image
350
  try:
351
  r = safe_get(url, timeout=15)
352
  img = Image.open(BytesIO(r.content))
353
- if getattr(img, "is_animated", False):
354
- img.seek(0)
355
  return gr.update(value=img.convert("RGB"), visible=True), empty_vid
356
  except Exception:
357
  return empty_img, empty_vid
358
 
359
- # --- Gradio app layout ---
360
- def create_app():
361
- with gr.Blocks(title="Flux Multimodal (fixed)", css=css) as demo:
362
  with gr.Row():
363
  with gr.Column(scale=1):
364
  url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
@@ -371,19 +344,30 @@ def create_app():
371
  with gr.Column(scale=2):
372
  final_md = gr.Markdown(value="")
373
 
374
- # Update preview on change
375
  url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
376
 
377
- def submit_wrapper(url, prompt, key):
 
 
378
  try:
379
- return process_media(url or "", prompt or "", key or "")
380
  except Exception as e:
381
- return f"Unhandled error: {e}"
 
 
 
 
 
 
 
 
 
382
 
383
- submit_btn.click(fn=submit_wrapper, inputs=[url_input, custom_prompt, api_key], outputs=[final_md])
 
384
 
385
  return demo
386
 
387
  if __name__ == "__main__":
388
- demo = create_app()
389
  demo.queue().launch()
 
11
  from typing import List, Tuple
12
  from PIL import Image, ImageFile, UnidentifiedImageError
13
  import gradio as gr
14
+ import threading
15
+ import time
16
 
17
+ # Optional SDK client (works if installed)
 
18
  try:
19
  from mistralai import Mistral
20
  except Exception:
21
+ Mistral = None
22
 
23
+ # Config
24
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
25
  PIXTRAL_MODEL = "pixtral-12b-2409"
26
  VIDEO_MODEL = "voxtral-mini-latest"
 
36
  "Do not invent sensory information not present in the media."
37
  )
38
 
39
+ # Pillow
40
  ImageFile.LOAD_TRUNCATED_IMAGES = True
41
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
42
 
43
+ # --- Helpers ---
44
  def get_client(key: str | None = None):
45
  api_key = (key or "").strip() or DEFAULT_KEY
46
  if Mistral is None:
47
+ class Dummy:
 
48
  def __init__(self, k): self.api_key = k
49
  return Dummy(api_key)
50
  return Mistral(api_key=api_key)
 
58
  _, ext = os.path.splitext((src or "").split("?")[0])
59
  return ext.lower()
60
 
61
+ def safe_head(url: str, timeout: int = 6):
62
  try:
63
  r = requests.head(url, timeout=timeout, allow_redirects=True)
64
  if r.status_code >= 400:
 
67
  except Exception:
68
  return None
69
 
70
+ def safe_get(url: str, timeout: int = 15):
71
  r = requests.get(url, timeout=timeout)
72
  r.raise_for_status()
73
  return r
74
 
75
  def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60) -> bytes:
76
  if is_remote(src):
 
77
  head = safe_head(src)
78
  if head is not None:
79
  cl = head.headers.get("content-length")
80
  try:
81
  if cl and int(cl) > stream_threshold:
 
82
  with requests.get(src, timeout=timeout, stream=True) as r:
83
  r.raise_for_status()
84
  fd, p = tempfile.mkstemp()
 
94
  try: os.remove(p)
95
  except Exception: pass
96
  except Exception:
 
97
  pass
 
98
  r = safe_get(src, timeout=timeout)
99
  return r.content
100
  else:
 
118
  if img.mode != "RGB":
119
  img = img.convert("RGB")
120
  h = base_h
 
121
  w = max(1, int(img.width * (h / img.height)))
122
  img = img.resize((w, h), Image.LANCZOS)
123
  buf = BytesIO()
124
  img.save(buf, format="JPEG", quality=85)
125
  return buf.getvalue()
126
 
127
+ def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
128
+ return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
129
 
130
  def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
131
  frames = []
 
148
  except Exception: pass
149
  return frames
150
 
151
+ # --- Mistral interaction (structured multimodal messages) ---
152
+ def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  parts = []
154
  try:
155
  if hasattr(client, "chat") and hasattr(client.chat, "complete"):
156
  res = client.chat.complete(model=model, messages=messages, stream=False)
157
+ # SDK response shape expected; extract later
158
  else:
 
159
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
160
+ url = "https://api.mistral.ai/v1/chat/completions"
161
  headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
162
+ r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
 
163
  r.raise_for_status()
164
  res = r.json()
165
+ # extract text
166
+ try:
167
+ choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
168
+ first = choices[0]
169
+ msg = first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first)
170
+ content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
171
+ if isinstance(content, str):
172
+ return content.strip()
173
+ # sometimes content is list or dict; stringify neatly
174
+ return str(content)
175
+ except Exception:
176
+ return str(res)
177
  except Exception as e:
178
+ return f"Error during model call: {e}"
 
179
 
180
  def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch") -> str:
181
  fname = filename or os.path.basename(path)
 
182
  try:
183
  if hasattr(client, "files") and hasattr(client.files, "upload"):
184
  with open(path, "rb") as fh:
 
189
  return fid
190
  except Exception:
191
  pass
 
192
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
193
  url = "https://api.mistral.ai/v1/files"
194
  headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
 
200
  jr = r.json()
201
  return jr.get("id") or jr.get("data", [{}])[0].get("id")
202
 
203
+ # --- Higher-level analysis functions ---
204
+ def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
205
+ # Convert to JPEG for consistent mime and reasonable size
206
+ jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
207
+ data_url = b64_bytes(jpeg, mime="image/jpeg")
208
+ # Build structured multimodal message expected by Pixtral
209
+ messages = [
210
+ {"role": "system", "content": SYSTEM_INSTRUCTION},
211
+ {"role": "user", "content": [
212
+ {"type": "text", "text": prompt},
213
+ {"type": "image_url", "image_url": data_url}
214
+ ]}
215
+ ]
216
+ return chat_complete(client, PIXTRAL_MODEL, messages)
217
+
218
+ def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
219
+ # Try upload + single unified instruction to video model
220
+ try:
221
+ file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
222
+ extra_msg = (
223
+ f"Uploaded video file id: {file_id}\n\n"
224
+ "Instruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations, "
225
+ "noting timestamps or notable segments only where necessary. Do NOT produce separate isolated per-frame reports; produce one integrated analysis."
226
+ )
227
+ messages = [
228
+ {"role": "system", "content": SYSTEM_INSTRUCTION},
229
+ {"role": "user", "content": extra_msg + "\n\n" + prompt}
230
+ ]
231
+ return chat_complete(client, VIDEO_MODEL, messages)
232
+ except Exception:
233
+ # Fallback: extract frames and send ONE consolidation request to PIXTRAL
234
+ frames = extract_best_frames_bytes(video_path, sample_count=6)
235
+ if not frames:
236
+ return "Error: could not upload video and no frames extracted (ffmpeg missing or failed)."
237
+ # Prepare a single message that includes all frames as image_url entries in one user content list
238
+ images_entries = []
239
+ for i, fb in enumerate(frames, start=1):
240
+ try:
241
+ j = convert_to_jpeg_bytes(fb, base_h=720)
242
+ images_entries.append({"type": "image_url", "image_url": b64_bytes(j, mime="image/jpeg"), "meta": {"frame_index": i}})
243
+ except Exception:
244
+ continue
245
+ content_list = [{"type":"text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + images_entries
246
+ messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content_list}]
247
+ return chat_complete(client, PIXTRAL_MODEL, messages)
248
+
249
+ # Determine type heuristics
250
+ def determine_media_type(src: str) -> Tuple[bool, bool]:
251
+ is_image = False; is_video = False
252
+ ext = ext_from_src(src)
253
+ if ext in IMAGE_EXTS: is_image = True
254
+ if ext in VIDEO_EXTS: is_video = True
255
+ if is_remote(src):
256
+ head = safe_head(src)
257
+ if head:
258
+ ctype = (head.headers.get("content-type") or "").lower()
259
+ if ctype.startswith("image/"):
260
+ is_image, is_video = True, False
261
+ elif ctype.startswith("video/"):
262
+ is_video, is_image = True, False
263
  return is_image, is_video
264
 
265
  def process_media(src: str, custom_prompt: str, api_key: str) -> str:
266
  client = get_client(api_key)
267
+ prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
268
+ if not src:
269
+ return "No URL or path provided."
270
+ is_image, is_video = determine_media_type(src)
271
+ # If extension ambiguous but remote, prefer HEAD detection above.
 
 
 
272
  if is_image:
273
  try:
274
  raw = fetch_bytes(src)
275
  except Exception as e:
276
  return f"Error fetching image: {e}"
277
  try:
278
+ return analyze_image_structured(client, raw, prompt)
279
  except UnidentifiedImageError:
280
  return "Error: provided file is not a valid image."
281
  except Exception as e:
282
+ return f"Error analyzing image: {e}"
283
  if is_video:
284
  try:
285
  raw = fetch_bytes(src, timeout=120)
286
  except Exception as e:
287
  return f"Error fetching video: {e}"
288
+ tmp = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
 
289
  try:
290
+ return analyze_video_cohesive(client, tmp, prompt)
 
 
 
 
 
 
 
 
 
 
 
291
  finally:
292
+ try: os.remove(tmp)
293
  except Exception: pass
294
+ # As last resort, try to fetch and treat as image
295
+ try:
296
+ raw = fetch_bytes(src)
297
+ return analyze_image_structured(client, raw, prompt)
298
+ except Exception as e:
299
+ return f"Unable to determine media type or fetch file: {e}"
300
 
301
+ # --- Gradio UI ---
302
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
303
 
304
  def load_preview(url: str):
 
305
  empty_img = gr.update(value=None, visible=False)
306
  empty_vid = gr.update(value=None, visible=False)
307
  if not url:
308
  return empty_img, empty_vid
 
309
  if not is_remote(url) and os.path.exists(url):
310
  ext = ext_from_src(url)
311
  if ext in VIDEO_EXTS:
 
313
  if ext in IMAGE_EXTS:
314
  try:
315
  img = Image.open(url)
316
+ if getattr(img, "is_animated", False): img.seek(0)
 
317
  return gr.update(value=img.convert("RGB"), visible=True), empty_vid
318
  except Exception:
319
  return empty_img, empty_vid
 
320
  head = safe_head(url)
321
  if head:
322
  ctype = (head.headers.get("content-type") or "").lower()
323
  if ctype.startswith("video/") or any(url.lower().split("?")[0].endswith(ext) for ext in VIDEO_EXTS):
324
  return empty_img, gr.update(value=url, visible=True)
 
325
  try:
326
  r = safe_get(url, timeout=15)
327
  img = Image.open(BytesIO(r.content))
328
+ if getattr(img, "is_animated", False): img.seek(0)
 
329
  return gr.update(value=img.convert("RGB"), visible=True), empty_vid
330
  except Exception:
331
  return empty_img, empty_vid
332
 
333
+ def create_demo():
334
+ with gr.Blocks(title="Flux Multimodal (Pixtral fixed)", css=css) as demo:
 
335
  with gr.Row():
336
  with gr.Column(scale=1):
337
  url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
 
344
  with gr.Column(scale=2):
345
  final_md = gr.Markdown(value="")
346
 
 
347
  url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
348
 
349
+ # Disable button while processing to avoid race conditions where nothing appears
350
+ def submit_wrapper(url, prompt, key, btn):
351
+ btn.update(interactive=False)
352
  try:
353
+ out = process_media(url or "", prompt or "", key or "")
354
  except Exception as e:
355
+ out = f"Unhandled error: {e}"
356
+ finally:
357
+ # re-enable after short pause to ensure UI refresh
358
+ time.sleep(0.2)
359
+ btn.update(interactive=True)
360
+ return out
361
+
362
+ # Use a small helper to get the button object for disabling
363
+ def on_click(url, prompt, key):
364
+ return process_media(url or "", prompt or "", key or "")
365
 
366
+ # Use click with immediate handler but guard concurrent clicks using extra state
367
+ submit_btn.click(fn=submit_wrapper, inputs=[url_input, custom_prompt, api_key, submit_btn], outputs=[final_md])
368
 
369
  return demo
370
 
371
  if __name__ == "__main__":
372
+ demo = create_demo()
373
  demo.queue().launch()