Hug0endob commited on
Commit
fa51555
·
verified ·
1 Parent(s): b140dcf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -233
app.py CHANGED
@@ -10,10 +10,9 @@ from PIL import Image, ImageFile, UnidentifiedImageError
10
  import gradio as gr
11
  from mistralai import Mistral
12
 
13
- # Config
14
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
15
- PIXTRAL_MODEL = "pixtral-12b-2409" # image-capable multimodal model
16
- VIDEO_MODEL = "voxtral-mini-latest" # replace with your preferred video model
17
  STREAM_THRESHOLD = 20 * 1024 * 1024
18
  FFMPEG_BIN = shutil.which("ffmpeg")
19
 
@@ -30,7 +29,7 @@ Image.MAX_IMAGE_PIXELS = 10000 * 10000
30
  IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
31
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
32
 
33
- def get_client(key: str = None):
34
  api_key = (key or "").strip() or DEFAULT_KEY
35
  return Mistral(api_key=api_key)
36
 
@@ -63,14 +62,14 @@ def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> byte
63
  with open(src, "rb") as f:
64
  return f.read()
65
 
66
- def save_bytes_to_temp(b: bytes, suffix: str):
67
  fd, path = tempfile.mkstemp(suffix=suffix)
68
  os.close(fd)
69
  with open(path, "wb") as f:
70
  f.write(b)
71
  return path
72
 
73
- def convert_to_jpeg_bytes(img_bytes: bytes, base_h=480) -> bytes:
74
  img = Image.open(BytesIO(img_bytes))
75
  try:
76
  if getattr(img, "is_animated", False):
@@ -89,21 +88,14 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h=480) -> bytes:
89
  def b64_jpeg(img_bytes: bytes) -> str:
90
  return base64.b64encode(img_bytes).decode("utf-8")
91
 
92
- def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_probe: int = 10, timeout_extract: int = 15):
93
  if not FFMPEG_BIN or not os.path.exists(media_path):
94
  return []
95
- frames = []
96
  probe_cmd = [FFMPEG_BIN, "-v", "error", "-show_entries", "format=duration",
97
  "-of", "default=noprint_wrappers=1:nokey=1", media_path]
98
- proc = subprocess.Popen(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
99
- try:
100
- out, _ = proc.communicate(timeout=timeout_probe)
101
- except subprocess.TimeoutExpired:
102
- proc.kill()
103
- out, _ = proc.communicate()
104
- duration = None
105
  try:
106
- duration = float(out.strip().split(b"\n")[0]) if out else None
 
107
  except Exception:
108
  duration = None
109
 
@@ -112,42 +104,29 @@ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_pr
112
  else:
113
  timestamps = [0.5, 1.0, 2.0][:sample_count]
114
 
 
115
  for i, t in enumerate(timestamps):
116
- fd, tmp_frame = tempfile.mkstemp(suffix=f"_{i}.jpg")
117
  os.close(fd)
118
- cmd = [
119
- FFMPEG_BIN, "-nostdin", "-y", "-i", media_path,
120
- "-ss", str(t), "-frames:v", "1", "-q:v", "2", tmp_frame
121
- ]
122
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
123
  try:
124
- proc.communicate(timeout=timeout_extract)
125
- except subprocess.TimeoutExpired:
126
- try: proc.kill()
 
 
 
127
  except Exception: pass
128
- proc.communicate()
129
- if proc.returncode == 0 and os.path.exists(tmp_frame) and os.path.getsize(tmp_frame) > 0:
130
- with open(tmp_frame, "rb") as f:
131
- frames.append(f.read())
132
- try:
133
- if os.path.exists(tmp_frame): os.remove(tmp_frame)
134
- except Exception:
135
- pass
136
  return frames
137
 
138
- def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
139
  fname = filename or os.path.basename(path)
140
  try:
141
  with open(path, "rb") as fh:
142
  res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
143
  fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
144
  if not fid:
145
- try:
146
- fid = res["data"][0]["id"]
147
- except Exception:
148
- pass
149
- if not fid:
150
- raise RuntimeError(f"No file id returned: {res}")
151
  return fid
152
  except Exception:
153
  api_key = client.api_key if hasattr(client, "api_key") else os.getenv("MISTRAL_API_KEY", "")
@@ -159,269 +138,163 @@ def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
159
  r = requests.post(url, headers=headers, files=files, data=data, timeout=120)
160
  r.raise_for_status()
161
  jr = r.json()
162
- fid = jr.get("id") or jr.get("data", [{}])[0].get("id")
163
- if not fid:
164
- raise RuntimeError(f"Upload failed to return id: {jr}")
165
- return fid
166
 
167
- def build_messages_for_image(prompt: str, b64_jpg: str = None, image_url: str = None):
168
- if image_url:
169
- content = f"{prompt}\n\nImage: {image_url}"
170
- elif b64_jpg:
171
- content = f"{prompt}\n\nImage (base64): data:image/jpeg;base64,{b64_jpg}"
172
- else:
173
- raise ValueError("Either image_url or b64_jpg required")
174
  return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
175
 
176
- def build_messages_for_text(prompt: str, extra_text: str):
177
- return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": f"{prompt}\n\n{extra_text}"}]
178
 
179
- def extract_delta(chunk):
180
- if not chunk:
181
- return None
182
- data = getattr(chunk, "data", None) or getattr(chunk, "response", None) or getattr(chunk, "delta", None)
183
- if not data:
184
- return None
185
  try:
186
- content = data.choices[0].delta.content
187
- if content is None:
188
- return None
189
- return str(content)
190
  except Exception:
191
- pass
 
 
 
192
  try:
193
- msg = data.choices[0].message
194
  if isinstance(msg, dict):
195
  content = msg.get("content")
196
  else:
197
  content = getattr(msg, "content", None)
198
- if content is None:
199
- return None
200
- return str(content)
201
- except Exception:
202
- pass
203
- try:
204
- return str(data)
205
- except Exception:
206
- return None
207
-
208
- def extract_text_from_response(res, parts: list):
209
- try:
210
- choices = getattr(res, "choices", None) or res.get("choices", [])
211
- except Exception:
212
- choices = []
213
- if choices:
214
- try:
215
- msg = choices[0].message
216
- if isinstance(msg, dict):
217
- content = msg.get("content")
218
- else:
219
- content = getattr(msg, "content", None)
220
- if content:
221
- if isinstance(content, str):
222
- parts.append(content)
223
- else:
224
- if isinstance(content, list):
225
- for c in content:
226
- if isinstance(c, dict) and c.get("type") == "text":
227
- parts.append(c.get("text", ""))
228
- elif isinstance(content, dict):
229
- text = content.get("text") or content.get("content")
230
- if text:
231
- parts.append(text)
232
- except Exception:
233
- parts.append(str(res))
234
- else:
235
- parts.append(str(res))
236
-
237
- def stream_and_collect(client, model, messages, parts: list):
238
- norm_msgs = []
239
- for m in messages:
240
- if not isinstance(m, dict):
241
- norm_msgs.append(m)
242
- continue
243
- c = m.get("content")
244
- if isinstance(c, list):
245
- picked = []
246
- for item in c:
247
- if isinstance(item, dict):
248
- if item.get("type") == "image_url" and item.get("image_url"):
249
- picked.append(item["image_url"])
250
- elif item.get("type") == "image_base64" and item.get("image_base64"):
251
- picked.append("data:image/jpeg;base64," + item["image_base64"])
252
- elif item.get("type") == "text" and item.get("text"):
253
- picked.append(item["text"])
254
- elif isinstance(item, str):
255
- picked.append(item)
256
- newc = "\n\n".join(p for p in picked if p).strip()
257
- nm = m.copy()
258
- nm["content"] = newc
259
- norm_msgs.append(nm)
260
  else:
261
- if not isinstance(c, str):
262
- nm = m.copy()
263
- nm["content"] = str(c or "")
264
- norm_msgs.append(nm)
265
- else:
266
- norm_msgs.append(m)
267
-
268
- stream_gen = None
269
- try:
270
- stream_gen = client.chat.stream(model=model, messages=norm_msgs)
271
  except Exception:
272
- stream_gen = None
273
- if stream_gen:
274
- for chunk in stream_gen:
275
- d = extract_delta(chunk)
276
- if d is None:
277
- continue
278
- if d.strip() == "" and parts:
279
- continue
280
- parts.append(d)
281
- return
282
-
283
- res = client.chat.complete(model=model, messages=norm_msgs, stream=False)
284
- extract_text_from_response(res, parts)
285
 
286
- def analyze_image_bytes(client, img_bytes: bytes, prompt: str, model=PIXTRAL_MODEL):
287
- jpg = convert_to_jpeg_bytes(img_bytes, base_h=480)
288
- b64 = b64_jpeg(jpg)
289
- msgs = build_messages_for_image(prompt, b64_jpg=b64)
290
  parts = []
291
- stream_and_collect(client, model, msgs, parts)
 
292
  return "".join(parts).strip()
293
 
294
- def analyze_multiple_frames(client, frames_bytes_list, prompt: str, model=PIXTRAL_MODEL):
295
- results = []
296
- for i, fb in enumerate(frames_bytes_list):
297
- res = analyze_image_bytes(client, fb, f"{prompt}\n\nFrame index: {i+1}", model=model)
298
- results.append((i, res))
299
- merged = []
300
- for i, text in results:
301
- merged.append(f"Frame {i+1} analysis:\n{text}")
302
- consolidation_prompt = (
303
- prompt
304
- + "\n\nConsolidate the key consistent observations across the provided frame analyses below. "
305
- "List consistent findings first, then note any differences between frames."
306
- + "\n\n" + "\n\n".join(f"Frame {i+1}:\n{text}" for i, text in results)
 
 
 
307
  )
308
- parts = []
309
- msgs = build_messages_for_text(consolidation_prompt, "")
310
- stream_and_collect(client, PIXTRAL_MODEL, msgs, parts)
311
- consolidated = "".join(parts).strip()
312
- if consolidated:
313
- merged.append("Consolidated summary:\n" + consolidated)
314
- return "\n\n".join(merged)
315
 
316
- def generate_final_text(src: str, custom_prompt: str, api_key: str):
317
  client = get_client(api_key)
318
- prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
319
- parts = []
320
  ext = ext_from_src(src)
321
- is_image = ext in IMAGE_EXTS or (not is_remote(src) and os.path.isfile(src) and ext in IMAGE_EXTS)
322
- is_video = ext in VIDEO_EXTS or (not is_remote(src) and os.path.isfile(src) and ext in VIDEO_EXTS)
323
-
324
- # If remote and content-type suggests video, treat as video
325
  if is_remote(src):
326
  try:
327
- r = requests.head(src, timeout=10, allow_redirects=True)
328
- ctype = (r.headers.get("content-type") or "").lower()
329
  if ctype.startswith("video/"):
330
- is_video = True
331
  elif ctype.startswith("image/"):
332
- is_image = True
333
  except Exception:
334
  pass
335
-
336
  if is_image:
337
  try:
338
  raw = fetch_bytes(src)
339
  except Exception as e:
340
  return f"Error fetching image: {e}"
341
  try:
342
- return analyze_image_bytes(client, raw, prompt, model=PIXTRAL_MODEL)
343
  except UnidentifiedImageError:
344
  return "Error: provided file is not a valid image."
345
  except Exception as e:
346
  return f"Error processing image: {e}"
347
-
348
  if is_video:
349
- tmp_media = None
 
 
 
 
350
  try:
351
  try:
352
- media_bytes = fetch_bytes(src, timeout=120)
353
- except Exception as e:
354
- return f"Error fetching video: {e}"
355
- ext = ext_from_src(src) or ".mp4"
356
- tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
357
- try:
358
- file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src.split("?")[0]))
359
  extra = (
360
  f"Uploaded video to Mistral Files with id: {file_id}\n\n"
361
  "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
362
  )
363
  msgs = build_messages_for_text(prompt, extra)
364
- stream_and_collect(client, VIDEO_MODEL, msgs, parts)
365
- return "".join(parts).strip()
366
  except Exception:
367
- frames = extract_best_frames_bytes(tmp_media, sample_count=5)
368
  if not frames:
369
  return "Error: could not upload remote video and no frames extracted."
370
- return analyze_multiple_frames(client, frames, prompt, model=PIXTRAL_MODEL)
371
  finally:
372
- try:
373
- if tmp_media and os.path.exists(tmp_media):
374
- os.remove(tmp_media)
375
- except Exception:
376
- pass
377
-
378
  return "Unable to determine media type from the provided URL or file extension."
379
 
380
- # UI helpers
381
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
 
382
  def load_preview(url: str):
383
  if not url:
384
  return None, None, ""
 
 
 
 
 
 
 
 
 
 
385
  try:
386
- r = requests.get(url, timeout=30, stream=True)
387
- r.raise_for_status()
388
- ctype = (r.headers.get("content-type") or "").lower()
389
- if (ctype and ctype.startswith("video/")) or any(url.lower().split("?")[0].endswith(ext) for ext in VIDEO_EXTS):
390
  return None, url, "Video"
391
- data = r.content
392
- try:
393
- img = Image.open(BytesIO(data))
394
- if getattr(img, "is_animated", False):
395
- img.seek(0)
396
- img = img.convert("RGB")
397
- except UnidentifiedImageError:
398
- return None, None, "Preview failed"
399
- return img, None, "Image"
400
  except Exception:
401
  return None, None, "Preview failed"
402
 
403
- # Gradio app
404
- with gr.Blocks(title="Flux Multimodal", css=css) as demo:
405
  with gr.Row():
406
  with gr.Column(scale=1):
407
- url_input = gr.Textbox(label="Image or Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
408
  custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
409
  with gr.Accordion("Mistral API Key (optional)", open=False):
410
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
411
- submit = gr.Button("Submit")
412
  preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
413
  preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
414
-
415
  with gr.Column(scale=2):
416
- final_text = gr.Markdown(value="")
417
 
418
- def _preview_wrapper(url):
419
  img, vid, label = load_preview(url)
420
- return img, vid, label
421
 
422
- url_input.change(fn=_preview_wrapper, inputs=[url_input], outputs=[preview_image, preview_video, gr.Textbox(visible=False)])
423
- submit.click(fn=generate_final_text, inputs=[url_input, custom_prompt, api_key], outputs=[final_text])
424
- demo.queue()
425
 
426
  if __name__ == "__main__":
427
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))
 
10
  import gradio as gr
11
  from mistralai import Mistral
12
 
 
13
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
14
+ PIXTRAL_MODEL = "pixtral-12b-2409"
15
+ VIDEO_MODEL = "voxtral-mini-latest"
16
  STREAM_THRESHOLD = 20 * 1024 * 1024
17
  FFMPEG_BIN = shutil.which("ffmpeg")
18
 
 
29
  IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
30
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
31
 
32
+ def get_client(key: str = None) -> Mistral:
33
  api_key = (key or "").strip() or DEFAULT_KEY
34
  return Mistral(api_key=api_key)
35
 
 
62
  with open(src, "rb") as f:
63
  return f.read()
64
 
65
+ def save_bytes_to_temp(b: bytes, suffix: str) -> str:
66
  fd, path = tempfile.mkstemp(suffix=suffix)
67
  os.close(fd)
68
  with open(path, "wb") as f:
69
  f.write(b)
70
  return path
71
 
72
+ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
73
  img = Image.open(BytesIO(img_bytes))
74
  try:
75
  if getattr(img, "is_animated", False):
 
88
  def b64_jpeg(img_bytes: bytes) -> str:
89
  return base64.b64encode(img_bytes).decode("utf-8")
90
 
91
+ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_probe: int = 10, timeout_extract: int = 15) -> list:
92
  if not FFMPEG_BIN or not os.path.exists(media_path):
93
  return []
 
94
  probe_cmd = [FFMPEG_BIN, "-v", "error", "-show_entries", "format=duration",
95
  "-of", "default=noprint_wrappers=1:nokey=1", media_path]
 
 
 
 
 
 
 
96
  try:
97
+ out = subprocess.check_output(probe_cmd, timeout=timeout_probe).strip()
98
+ duration = float(out) if out else None
99
  except Exception:
100
  duration = None
101
 
 
104
  else:
105
  timestamps = [0.5, 1.0, 2.0][:sample_count]
106
 
107
+ frames = []
108
  for i, t in enumerate(timestamps):
109
+ fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
110
  os.close(fd)
111
+ cmd = [FFMPEG_BIN, "-nostdin", "-y", "-i", media_path, "-ss", str(t), "-frames:v", "1", "-q:v", "2", tmp]
 
 
 
 
112
  try:
113
+ subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
114
+ if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
115
+ with open(tmp, "rb") as f:
116
+ frames.append(f.read())
117
+ finally:
118
+ try: os.remove(tmp)
119
  except Exception: pass
 
 
 
 
 
 
 
 
120
  return frames
121
 
122
+ def upload_file_to_mistral(client: Mistral, path: str, filename: str | None = None, purpose: str = "batch") -> str:
123
  fname = filename or os.path.basename(path)
124
  try:
125
  with open(path, "rb") as fh:
126
  res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
127
  fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
128
  if not fid:
129
+ fid = res["data"][0]["id"]
 
 
 
 
 
130
  return fid
131
  except Exception:
132
  api_key = client.api_key if hasattr(client, "api_key") else os.getenv("MISTRAL_API_KEY", "")
 
138
  r = requests.post(url, headers=headers, files=files, data=data, timeout=120)
139
  r.raise_for_status()
140
  jr = r.json()
141
+ return jr.get("id") or jr.get("data", [{}])[0].get("id")
 
 
 
142
 
143
+ def build_messages_for_image(prompt: str, b64_jpg: str) -> list:
144
+ content = f"{prompt}\n\nImage (base64): data:image/jpeg;base64,{b64_jpg}"
 
 
 
 
 
145
  return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
146
 
147
+ def build_messages_for_text(prompt: str, extra: str) -> list:
148
+ return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": f"{prompt}\n\n{extra}"}]
149
 
150
+ def extract_text_from_response(res, parts: list):
 
 
 
 
 
151
  try:
152
+ choices = getattr(res, "choices", None) or res.get("choices", [])
 
 
 
153
  except Exception:
154
+ choices = []
155
+ if not choices:
156
+ parts.append(str(res))
157
+ return
158
  try:
159
+ msg = choices[0].message
160
  if isinstance(msg, dict):
161
  content = msg.get("content")
162
  else:
163
  content = getattr(msg, "content", None)
164
+ if isinstance(content, str):
165
+ parts.append(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  else:
167
+ parts.append(str(content))
 
 
 
 
 
 
 
 
 
168
  except Exception:
169
+ parts.append(str(res))
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ def chat_complete(client: Mistral, model: str, messages: list) -> str:
 
 
 
172
  parts = []
173
+ res = client.chat.complete(model=model, messages=messages, stream=False)
174
+ extract_text_from_response(res, parts)
175
  return "".join(parts).strip()
176
 
177
+ def analyze_image(client: Mistral, img_bytes: bytes, prompt: str) -> str:
178
+ jpeg = convert_to_jpeg_bytes(img_bytes, base_h=480)
179
+ b64 = b64_jpeg(jpeg)
180
+ msgs = build_messages_for_image(prompt, b64)
181
+ return chat_complete(client, PIXTRAL_MODEL, msgs)
182
+
183
+ def analyze_frames_and_consolidate(client: Mistral, frames: list, prompt: str) -> str:
184
+ per_frame = []
185
+ for i, fb in enumerate(frames):
186
+ txt = analyze_image(client, fb, f"{prompt}\n\nFrame index: {i + 1}")
187
+ per_frame.append(f"Frame {i + 1} analysis:\n{txt}")
188
+ consolidation = (
189
+ f"{prompt}\n\n"
190
+ "Consolidate the key consistent observations across the provided frame analyses below. "
191
+ "List consistent findings first, then note any differences between frames.\n\n"
192
+ + "\n\n".join(per_frame)
193
  )
194
+ msgs = build_messages_for_text(consolidation, "")
195
+ summary = chat_complete(client, PIXTRAL_MODEL, msgs)
196
+ return "\n\n".join(per_frame + [f"Consolidated summary:\n{summary}"])
 
 
 
 
197
 
198
+ def process_media(src: str, custom_prompt: str, api_key: str) -> str:
199
  client = get_client(api_key)
200
+ prompt = custom_prompt.strip() or "Please provide a detailed visual review."
 
201
  ext = ext_from_src(src)
202
+ is_image = ext in IMAGE_EXTS
203
+ is_video = ext in VIDEO_EXTS
 
 
204
  if is_remote(src):
205
  try:
206
+ h = requests.head(src, timeout=8, allow_redirects=True)
207
+ ctype = (h.headers.get("content-type") or "").lower()
208
  if ctype.startswith("video/"):
209
+ is_video = True; is_image = False
210
  elif ctype.startswith("image/"):
211
+ is_image = True; is_video = False
212
  except Exception:
213
  pass
 
214
  if is_image:
215
  try:
216
  raw = fetch_bytes(src)
217
  except Exception as e:
218
  return f"Error fetching image: {e}"
219
  try:
220
+ return analyze_image(client, raw, prompt)
221
  except UnidentifiedImageError:
222
  return "Error: provided file is not a valid image."
223
  except Exception as e:
224
  return f"Error processing image: {e}"
 
225
  if is_video:
226
+ try:
227
+ raw = fetch_bytes(src, timeout=120)
228
+ except Exception as e:
229
+ return f"Error fetching video: {e}"
230
+ tmp_path = save_bytes_to_temp(raw, suffix=ext or ".mp4")
231
  try:
232
  try:
233
+ file_id = upload_file_to_mistral(client, tmp_path, filename=os.path.basename(src.split("?")[0]))
 
 
 
 
 
 
234
  extra = (
235
  f"Uploaded video to Mistral Files with id: {file_id}\n\n"
236
  "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
237
  )
238
  msgs = build_messages_for_text(prompt, extra)
239
+ return chat_complete(client, VIDEO_MODEL, msgs)
 
240
  except Exception:
241
+ frames = extract_best_frames_bytes(tmp_path, sample_count=5)
242
  if not frames:
243
  return "Error: could not upload remote video and no frames extracted."
244
+ return analyze_frames_and_consolidate(client, frames, prompt)
245
  finally:
246
+ try: os.remove(tmp_path)
247
+ except Exception: pass
 
 
 
 
248
  return "Unable to determine media type from the provided URL or file extension."
249
 
 
250
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
251
+
252
  def load_preview(url: str):
253
  if not url:
254
  return None, None, ""
255
+ if not is_remote(url) and os.path.exists(url):
256
+ ext = ext_from_src(url)
257
+ if ext in VIDEO_EXTS:
258
+ return None, os.path.abspath(url), "Video"
259
+ if ext in IMAGE_EXTS:
260
+ try:
261
+ img = Image.open(url)
262
+ return img.convert("RGB"), None, "Image"
263
+ except Exception:
264
+ return None, None, "Preview failed"
265
  try:
266
+ h = requests.head(url, timeout=8, allow_redirects=True)
267
+ ctype = (h.headers.get("content-type") or "").lower()
268
+ if ctype.startswith("video/") or any(url.lower().split("?")[0].endswith(ext) for ext in VIDEO_EXTS):
 
269
  return None, url, "Video"
270
+ r = requests.get(url, timeout=20)
271
+ r.raise_for_status()
272
+ img = Image.open(BytesIO(r.content))
273
+ if getattr(img, "is_animated", False):
274
+ img.seek(0)
275
+ return img.convert("RGB"), None, "Image"
 
 
 
276
  except Exception:
277
  return None, None, "Preview failed"
278
 
279
+ with gr.Blocks(title="Flux Multimodal (fixed)", css=css) as demo:
 
280
  with gr.Row():
281
  with gr.Column(scale=1):
282
+ url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
283
  custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
284
  with gr.Accordion("Mistral API Key (optional)", open=False):
285
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
286
+ submit_btn = gr.Button("Submit")
287
  preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
288
  preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
 
289
  with gr.Column(scale=2):
290
+ final_md = gr.Markdown(value="")
291
 
292
+ def preview_update(url):
293
  img, vid, label = load_preview(url)
294
+ return (img if label == "Image" else None, vid if label == "Video" else None, label == "Image", label == "Video")
295
 
296
+ url_input.change(fn=preview_update, inputs=[url_input], outputs=[preview_image, preview_video, preview_image, preview_video])
297
+ submit_btn.click(fn=process_media, inputs=[url_input, custom_prompt, api_key], outputs=[final_md])
 
298
 
299
  if __name__ == "__main__":
300
+ demo.queue().launch()