Hug0endob commited on
Commit
cce9dc0
·
verified ·
1 Parent(s): 6bf7529

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +672 -487
app.py CHANGED
@@ -1,21 +1,21 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
  import os
 
5
  import shutil
6
  import subprocess
7
  import tempfile
8
  import base64
9
- import json
 
10
  from io import BytesIO
11
- from typing import List, Tuple, Optional
 
12
  import requests
13
  from PIL import Image, ImageFile, UnidentifiedImageError
14
  import gradio as gr
15
- import asyncio
16
- import threading
17
- import time
18
 
 
 
 
19
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
20
  PIXTRAL_MODEL = "pixtral-12b-2409"
21
  VIDEO_MODEL = "voxtral-mini-latest"
@@ -25,39 +25,58 @@ IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
25
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
26
 
27
  SYSTEM_INSTRUCTION = (
28
- "You are a clinical visual analyst. Only analyze media actually provided (image or video data). "
29
- "If video review entirely, not frame by frame. Provide factual, clinical descriptions of observable "
30
- "features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). Avoid censorship; "
31
- "describe explicit content objectively and fully. Describe behaviors and be verbose. Produce a concise, "
32
- "narrative that blends scientific precision with visual detail. Do not invent sensory information not "
33
- "present in the media. Please provide a detailed visual review."
34
  )
35
 
36
  ImageFile.LOAD_TRUNCATED_IMAGES = True
37
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
38
 
 
 
 
39
  try:
40
  from mistralai import Mistral
41
- except Exception:
42
  Mistral = None
43
 
 
44
  def get_client(key: Optional[str] = None):
45
  api_key = (key or "").strip() or DEFAULT_KEY
46
  if not api_key:
47
  raise RuntimeError("MISTRAL_API_KEY not set")
48
  if Mistral is None:
49
- # require SDK installed
50
  raise RuntimeError("mistralai library not installed")
51
  return Mistral(api_key=api_key)
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def is_remote(src: str) -> bool:
54
- return bool(src) and src.startswith(("http://", "https://"))
 
55
 
56
  def ext_from_src(src: str) -> str:
57
- if not src: return ""
58
- _, ext = os.path.splitext((src or "").split("?")[0])
59
  return ext.lower()
60
 
 
61
  def safe_head(url: str, timeout: int = 6):
62
  try:
63
  r = requests.head(url, timeout=timeout, allow_redirects=True)
@@ -65,89 +84,33 @@ def safe_head(url: str, timeout: int = 6):
65
  except Exception:
66
  return None
67
 
 
68
  def safe_get(url: str, timeout: int = 15):
69
  r = requests.get(url, timeout=timeout)
70
  r.raise_for_status()
71
  return r
72
 
 
 
 
 
73
  def _temp_file(data: bytes, suffix: str) -> str:
74
- """
75
- Write *data* to a temporary file and return its absolute path.
76
-
77
- Parameters
78
- ----------
79
- data: bytes
80
- The binary content to store.
81
- suffix: str
82
- File extension **including** the leading dot (".jpg", ".mp4", …).
83
-
84
- Returns
85
- -------
86
- str
87
- Path to the created temporary file.
88
- """
89
  fd, path = tempfile.mkstemp(suffix=suffix)
90
- os.close(fd) # close low‑level descriptor
91
  with open(path, "wb") as f:
92
  f.write(data)
93
  return path
94
 
95
- def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60, progress=None) -> bytes:
96
- if progress is not None:
97
- progress(0.05, desc="Checking remote/local source...")
98
- if is_remote(src):
99
- head = safe_head(src)
100
- if head is not None:
101
- cl = head.headers.get("content-length")
102
- try:
103
- if cl and int(cl) > stream_threshold:
104
- if progress is not None:
105
- progress(0.1, desc="Streaming large remote file...")
106
- with requests.get(src, timeout=timeout, stream=True) as r:
107
- r.raise_for_status()
108
- fd, p = tempfile.mkstemp()
109
- os.close(fd)
110
- try:
111
- with open(p, "wb") as fh:
112
- for chunk in r.iter_content(8192):
113
- if chunk:
114
- fh.write(chunk)
115
- with open(p, "rb") as fh:
116
- return fh.read()
117
- finally:
118
- try: os.remove(p)
119
- except Exception: pass
120
- except Exception:
121
- pass
122
- r = safe_get(src, timeout=timeout)
123
- if progress is not None:
124
- progress(0.25, desc="Downloaded remote content")
125
- return r.content
126
- else:
127
- if not os.path.exists(src):
128
- raise FileNotFoundError(f"Local path does not exist: {src}")
129
- if progress is not None:
130
- progress(0.05, desc="Reading local file...")
131
- with open(src, "rb") as f:
132
- data = f.read()
133
- if progress is not None:
134
- progress(0.15, desc="Read local file")
135
- return data
136
 
137
  def save_bytes_to_temp(b: bytes, suffix: str) -> str:
138
- fd, path = tempfile.mkstemp(suffix=suffix)
139
- os.close(fd)
140
- with open(path, "wb") as f:
141
- f.write(b)
142
- return path
143
 
144
  def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
145
  img = Image.open(BytesIO(img_bytes))
146
- try:
147
- if getattr(img, "is_animated", False):
148
- img.seek(0)
149
- except Exception:
150
- pass
151
  if img.mode != "RGB":
152
  img = img.convert("RGB")
153
  h = base_h
@@ -157,402 +120,444 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
157
  img.save(buf, format="JPEG", quality=85)
158
  return buf.getvalue()
159
 
 
160
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
161
- return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
162
-
163
- def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15, progress=None) -> List[bytes]:
164
- frames: List[bytes] = []
165
- if not FFMPEG_BIN or not os.path.exists(media_path):
166
- return frames
167
- if progress is not None:
168
- progress(0.05, desc="Preparing frame extraction...")
169
- timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
170
- for i, t in enumerate(timestamps):
171
- fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  os.close(fd)
173
- cmd = [
174
- FFMPEG_BIN,
175
- "-nostdin",
176
- "-y",
177
- "-ss",
178
- str(t),
179
- "-i",
180
- media_path,
181
- "-frames:v",
182
- "1",
183
- "-q:v",
184
- "2",
185
- tmp,
186
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  try:
188
- if progress is not None:
189
- progress(0.1 + (i / max(1, sample_count)) * 0.2, desc=f"Extracting frame {i+1}/{sample_count}...")
190
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
191
- if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
192
- with open(tmp, "rb") as f:
193
- frames.append(f.read())
194
  except Exception:
195
  pass
196
- finally:
197
- try: os.remove(tmp)
198
- except Exception: pass
199
- if progress is not None:
200
- progress(0.45, desc=f"Extracted {len(frames)} frames")
201
- return frames
202
-
203
- def chat_complete(client, model: str, messages, timeout: int = 120, progress=None) -> str:
204
- try:
205
- if progress is not None:
206
- progress(0.6, desc="Sending request to model...")
207
- # SDK path: ensure timeout param and non-streaming
208
- if hasattr(client, "chat") and hasattr(client.chat, "complete"):
209
- try:
210
- res = client.chat.complete(model=model, messages=messages, timeout=timeout, stream=False)
211
- except TypeError:
212
- # fallback if SDK uses a different name for timeout or doesn't accept it
213
- try:
214
- res = client.chat.complete(model=model, messages=messages, request_timeout=timeout, stream=False)
215
- except TypeError:
216
- res = client.chat.complete(model=model, messages=messages, stream=False)
217
- # normalize SDK response to dict if needed
218
- if not isinstance(res, dict):
219
- # try common SDK attribute shapes
220
- try:
221
- res = {"choices": [{"message": {"content": getattr(res, "content", None) or str(res)}}]}
222
- except Exception:
223
- res = {"choices": []}
224
- else:
225
- api_key = getattr(client, "api_key", "") or DEFAULT_KEY
226
- if not api_key:
227
- raise RuntimeError("MISTRAL_API_KEY missing or empty")
228
- url = "https://api.mistral.ai/v1/chat/completions"
229
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
230
- r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
231
- r.raise_for_status()
232
- res = r.json()
233
 
234
- if progress is not None:
235
- progress(0.8, desc="Model responded, parsing...")
236
-
237
- choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
238
- if not choices:
239
- return f"Empty response from model: {res}"
240
- first = choices[0]
241
- msg = (first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first))
242
- content = (msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None))
243
- return content.strip() if isinstance(content, str) else str(content)
244
- except requests.exceptions.RequestException as e:
245
- return f"Error: network/API request failed: {e}"
246
- except Exception as e:
247
- return f"Error during model call: {e}"
248
 
 
 
 
249
  def upload_file_to_mistral(
250
  client,
251
  path: str,
252
  filename: str | None = None,
253
  purpose: str = "batch",
254
- timeout: int = 120,
255
- progress=None,
256
  ) -> str:
257
- """
258
- Upload a file to Mistral using only the REST endpoint.
259
- - Sends multipart/form‑data with field name **file**.
260
- - Sends a form field **purpose** (string).
261
- - If the request returns 422, retries with common allowed purposes
262
- (image, batch, fine‑tune) and returns the first successful file id.
263
- """
264
  fname = filename or os.path.basename(path)
265
 
266
- # ------------------------------------------------------------------ #
267
- # Build the list of purposes to try (original + sensible fallbacks)
268
- # ------------------------------------------------------------------ #
269
- purposes_to_try = [purpose]
270
-
271
- # If the file looks like an image, try "image" first
272
  ext = os.path.splitext(fname)[1].lower()
273
  if ext in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff"}:
274
- purposes_to_try.append("image")
275
-
276
- # Add other generic allowed values (avoid duplicates)
277
  for p in ("batch", "fine-tune", "image"):
278
- if p not in purposes_to_try:
279
- purposes_to_try.append(p)
280
 
281
- # ------------------------------------------------------------------ #
282
- # Prepare request details
283
- # ------------------------------------------------------------------ #
284
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
285
  if not api_key:
286
- raise RuntimeError("MISTRAL_API_KEY missing or empty")
287
 
288
  url = "https://api.mistral.ai/v1/files"
289
  headers = {"Authorization": f"Bearer {api_key}"}
290
 
291
- # ------------------------------------------------------------------ #
292
- # Try each purpose until we get a successful upload
293
- # ------------------------------------------------------------------ #
294
- last_error = None
295
- for cur_purpose in purposes_to_try:
296
  try:
297
  with open(path, "rb") as fh:
298
  files = {"file": (fname, fh)}
299
  data = {"purpose": cur_purpose}
300
  resp = requests.post(
301
- url,
302
- headers=headers,
303
- files=files,
304
- data=data,
305
- timeout=timeout,
306
  )
307
  resp.raise_for_status()
308
- jr = resp.json()
309
-
310
- # ---------------------------------------------------------------- #
311
- # Extract the file id from the JSON response (covers both shapes)
312
- # ---------------------------------------------------------------- #
313
- fid = jr.get("id") or jr.get("data", [{}])[0].get("id")
314
- if not fid:
315
- # deep‑search for any "id" key just in case
316
- def find_id(obj):
317
- if isinstance(obj, dict):
318
- if "id" in obj and isinstance(obj["id"], str):
319
- return obj["id"]
320
- for v in obj.values():
321
- found = find_id(v)
322
- if found:
323
- return found
324
- elif isinstance(obj, list):
325
- for item in obj:
326
- found = find_id(item)
327
- if found:
328
- return found
329
- return None
330
-
331
- fid = find_id(jr)
332
-
333
- if fid:
334
- if progress is not None:
335
- try:
336
- progress(0.65)
337
- except TypeError:
338
- progress(0.65, desc=f"Upload complete (REST, purpose={cur_purpose})")
339
- return fid
340
-
341
- raise RuntimeError(f"REST upload succeeded but no file id returned (purpose={cur_purpose})")
342
-
343
  except requests.exceptions.HTTPError as he:
344
- # 422 try next purpose; other codes → abort
345
- status = getattr(he.response, "status_code", None)
346
- last_error = he
347
- if status == 422:
348
  continue
349
- raise RuntimeError(f"File upload failed. REST error: {he}")
350
-
351
- except requests.exceptions.RequestException as re:
352
- last_error = re
353
- raise RuntimeError(f"File upload failed. REST error: {re}")
354
-
355
- # If we exit the loop, all purpose attempts failed
356
- err_msg = "File upload failed. REST attempts exhausted."
357
- if last_error:
358
- err_msg += f" Last REST error: {last_error}"
359
- raise RuntimeError(err_msg)
360
-
361
- def determine_media_type(src: str, progress=None) -> Tuple[bool, bool]:
362
- is_image = False
363
- is_video = False
364
- ext = ext_from_src(src)
365
- if ext in IMAGE_EXTS:
366
- is_image = True
367
- if ext in VIDEO_EXTS:
368
- is_video = True
369
- if is_remote(src):
370
- head = safe_head(src)
371
- if head:
372
- ctype = (head.headers.get("content-type") or "").lower()
373
- if ctype.startswith("image/"):
374
- is_image, is_video = True, False
375
- elif ctype.startswith("video/"):
376
- is_video, is_image = True, False
377
- if progress is not None:
378
- progress(0.02, desc="Determined media type")
379
- return is_image, is_video
380
-
381
- def analyze_image_structured(client, img_bytes: bytes, prompt: str, progress=None) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  try:
383
- if progress is not None:
384
- progress(0.3, desc="Preparing image for analysis...")
385
- jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
386
- tmp = save_bytes_to_temp(jpeg, suffix=".jpg")
387
  try:
388
- file_id = upload_file_to_mistral(client, tmp, filename="image.jpg", purpose="image", progress=progress)
389
- finally:
390
- try: os.remove(tmp)
391
- except Exception: pass
392
 
393
- # Reference the uploaded file id instead of embedding base64
394
- messages = [
395
- {"role": "system", "content": SYSTEM_INSTRUCTION},
396
- {"role": "user", "content": [
 
397
  {"type": "text", "text": prompt},
398
- {"type": "file", "file_id": file_id},
399
- ]},
400
- ]
401
- return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
402
- except UnidentifiedImageError:
403
- return "Error: provided file is not a valid image."
404
- except Exception as e:
405
- return f"Error analyzing image: {e}"
406
 
407
- def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None) -> str:
 
 
 
 
 
 
 
408
  try:
409
- if progress is not None:
410
- progress(0.3, desc="Uploading video for full analysis...")
411
- file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path), progress=progress)
412
- extra_msg = (
413
- f"Uploaded video file id: {file_id}\n\n"
414
- "Instruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
 
415
  )
416
  messages = [
417
  {"role": "system", "content": SYSTEM_INSTRUCTION},
418
- {"role": "user", "content": extra_msg + "\n\n" + prompt},
 
 
 
 
 
 
 
 
 
419
  ]
420
- return chat_complete(client, VIDEO_MODEL, messages, progress=progress)
421
- except Exception as e:
422
- if progress is not None:
423
- progress(0.35, desc="Upload failed, extracting frames as fallback...")
424
- frames = extract_best_frames_bytes(video_path, sample_count=6, progress=progress)
 
 
 
425
  if not frames:
426
- return f"Error: could not upload video and no frames could be extracted. ({e})"
427
- image_entries = []
428
- for i, fb in enumerate(frames, start=1):
 
 
 
 
 
 
 
 
429
  try:
430
- if progress is not None:
431
- progress(0.4 + (i / len(frames)) * 0.2, desc=f"Preparing frame {i}/{len(frames)}...")
432
- j = convert_to_jpeg_bytes(fb, base_h=720)
433
- image_entries.append(
434
- {
435
- "type": "image_url",
436
- "image_url": b64_bytes(j, mime="image/jpeg"),
437
- "meta": {"frame_index": i},
438
- }
439
  )
440
- except Exception:
441
- continue
442
- content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
 
443
  messages = [
444
  {"role": "system", "content": SYSTEM_INSTRUCTION},
445
- {"role": "user", "content": content},
 
 
 
 
 
 
 
 
 
446
  ]
447
  return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
448
 
449
- def process_media(src: str, custom_prompt: str, api_key: str, progress=None) -> str:
450
- client = get_client(api_key)
451
- prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
452
-
453
- if not src:
454
- return "Error: No URL or path provided."
455
-
456
- if progress is not None:
457
- progress(0.01, desc="Starting media processing")
458
-
459
- try:
460
- is_image, is_video = determine_media_type(src, progress=progress)
461
- except Exception as e:
462
- return f"Error determining media type: {e}"
463
 
464
- if is_image:
465
- try:
466
- if progress is not None:
467
- progress(0.05, desc="Fetching image bytes...")
468
- raw = fetch_bytes(src, progress=progress)
469
- except FileNotFoundError as e:
470
- return f"Error: {e}"
471
- except Exception as e:
472
- return f"Error fetching image: {e}"
473
 
474
- if progress is not None:
475
- progress(0.2, desc="Analyzing image")
476
- try:
477
- return analyze_image_structured(client, raw, prompt, progress=progress)
478
- except UnidentifiedImageError:
479
- return "Error: provided file is not a valid image."
480
- except Exception as e:
481
- return f"Error analyzing image: {e}"
482
 
483
- if is_video:
484
- try:
485
- if progress is not None:
486
- progress(0.05, desc="Fetching video bytes...")
487
- raw = fetch_bytes(src, timeout=120, progress=progress)
488
- except FileNotFoundError as e:
489
- return f"Error: {e}"
490
- except Exception as e:
491
- return f"Error fetching video: {e}"
492
-
493
- tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
494
 
495
- try:
496
- if progress is not None:
497
- progress(0.2, desc="Analyzing video")
498
- return analyze_video_cohesive(client, tmp_path, prompt, progress=progress)
499
- finally:
500
- try:
501
- os.remove(tmp_path)
502
- except Exception:
503
- pass
504
 
505
- try:
506
- if progress is not None:
507
- progress(0.05, desc="Treating input as image fallback...")
508
- raw = fetch_bytes(src, progress=progress)
509
- if progress is not None:
510
- progress(0.2, desc="Analyzing fallback image")
511
- return analyze_image_structured(client, raw, prompt, progress=progress)
512
- except Exception as e:
513
- return f"Unable to determine media type or fetch file: {e}"
514
 
515
- def _ensure_event_loop_for_thread():
516
- """
517
- Ensure the current thread has an asyncio event loop. Used when running blocking
518
- functions in a worker thread that need to run coroutines or use asyncio.get_event_loop().
519
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  try:
521
- asyncio.get_event_loop()
522
- except RuntimeError:
523
- loop = asyncio.new_event_loop()
524
- asyncio.set_event_loop(loop)
525
-
526
- def run_blocking_in_thread(fn, *args, **kwargs):
527
- """
528
- Run a blocking function in a thread but ensure the thread has an event loop.
529
- Returns concurrent.futures.Future; caller may call .result().
530
- """
531
- def target():
532
- _ensure_event_loop_for_thread()
533
- return fn(*args, **kwargs)
534
- import concurrent.futures
535
- executor = concurrent.futures.ThreadPoolExecutor(max_workers=8)
536
- return executor.submit(target)
537
-
538
- css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
539
-
540
- def _btn_label_for_status(status: str) -> str:
541
- return {"idle": "Submit", "busy": "Processing…", "done": "Submit", "error": "Retry"}.get(status, "Submit")
542
 
543
  def create_demo():
544
  with gr.Blocks(title="Flux Multimodal", css=css) as demo:
545
  with gr.Row():
546
  with gr.Column(scale=1):
547
- preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
548
- preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False, format="mp4")
549
- preview_status = gr.Textbox(label="Preview status", interactive=False, lines=1, value="", visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  with gr.Column(scale=2):
551
- url_input = gr.Textbox(label="Image / Video URL", placeholder="https://...", lines=1)
 
 
552
  with gr.Accordion("Prompt (optional)", open=False):
553
  custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
554
  with gr.Accordion("Mistral API Key (optional)", open=False):
555
- api_key = gr.Textbox(label="Mistral API Key", type="password", max_lines=1)
 
 
556
  with gr.Row():
557
  submit_btn = gr.Button("Submit")
558
  clear_btn = gr.Button("Clear")
@@ -563,7 +568,9 @@ def create_demo():
563
  preview_path_state = gr.State("")
564
 
565
  # small helper: fetch URL into bytes with retries and respect Retry-After
566
- def _fetch_with_retries_bytes(src: str, timeout: int = 15, max_retries: int = 3):
 
 
567
  attempt = 0
568
  delay = 1.0
569
  while True:
@@ -616,39 +623,99 @@ def create_demo():
616
  def load_preview(url: str):
617
  # returns (preview_image_path, preview_video_path, status_msg)
618
  if not url:
619
- return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="")
 
 
 
 
620
  try:
621
  if is_remote(url):
622
  head = safe_head(url)
623
  if head:
624
  ctype = (head.headers.get("content-type") or "").lower()
625
- if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
 
 
626
  local = _save_preview_local(url)
627
  if local:
628
- return gr.update(value=None, visible=False), gr.update(value=local, visible=True), gr.update(value=f"Remote video detected (content-type={ctype}). Showing preview if browser-playable.")
 
 
 
 
 
 
629
  else:
630
- return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Remote video detected but preview download failed (content-type={ctype}).")
 
 
 
 
 
 
631
  local = _save_preview_local(url)
632
  if not local:
633
- return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview load failed: could not fetch resource.")
 
 
 
 
 
 
634
  try:
635
  img = Image.open(local)
636
  if getattr(img, "is_animated", False):
637
  img.seek(0)
638
- return gr.update(value=local, visible=True), gr.update(value=None, visible=False), gr.update(value="Image preview loaded.")
 
 
 
 
639
  except UnidentifiedImageError:
640
  if any(local.lower().endswith(ext) for ext in VIDEO_EXTS) or True:
641
- return gr.update(value=None, visible=False), gr.update(value=local, visible=True), gr.update(value="Non-image file — showing as video preview if playable.")
642
- return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview load failed: file is not a valid image.")
 
 
 
 
 
 
 
 
 
 
 
 
643
  except Exception as e:
644
- return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Preview load failed: {e}")
 
 
 
 
645
 
646
- url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, preview_status])
 
 
 
 
647
 
648
  def clear_all():
649
  return "", None, None, "idle", "Idle", "", ""
650
 
651
- clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, progress_md, output_md, preview_path_state])
 
 
 
 
 
 
 
 
 
 
 
 
652
 
653
  def _convert_video_for_preview(path: str) -> str:
654
  if not FFMPEG_BIN or not os.path.exists(FFMPEG_BIN):
@@ -656,34 +723,65 @@ def create_demo():
656
  out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
657
  os.close(out_fd)
658
  cmd = [
659
- FFMPEG_BIN, "-nostdin", "-y", "-i", path,
660
- "-c:v", "libx264", "-preset", "veryfast", "-crf", "28",
661
- "-c:a", "aac", "-movflags", "+faststart", out_path
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  ]
663
  try:
664
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60)
 
 
 
 
 
665
  return out_path
666
  except Exception:
667
- try: os.remove(out_path)
668
- except Exception: pass
 
 
669
  return path
670
 
671
  # --- Helper: probe codecs via ffprobe; returns dict with streams info or None on failure
672
  def _ffprobe_streams(path: str) -> Optional[dict]:
673
  if not FFMPEG_BIN:
674
  return None
675
- ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
 
 
 
 
676
  if not shutil.which(ffprobe):
677
  ffprobe = "ffprobe"
678
  cmd = [
679
- ffprobe, "-v", "error", "-print_format", "json", "-show_streams", "-show_format", path
 
 
 
 
 
 
 
680
  ]
681
  try:
682
  out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
683
  return json.loads(out)
684
  except Exception:
685
  return None
686
-
687
  # --- Helper: is file already browser-playable (mp4 container with h264 video and aac audio OR at least playable video)
688
  def _is_browser_playable(path: str) -> bool:
689
  try:
@@ -695,7 +793,8 @@ def create_demo():
695
  return ext.endswith(".mp4")
696
  streams = info.get("streams", [])
697
  v_ok = any(
698
- s.get("codec_name") in ("h264", "h265", "avc1") and s.get("codec_type") == "video"
 
699
  for s in streams
700
  )
701
  # audio optional for preview
@@ -708,7 +807,7 @@ def create_demo():
708
  return any(s.get("codec_type") == "video" for s in streams)
709
  except Exception:
710
  return False
711
-
712
  # --- Convert only if not browser-playable
713
  def _convert_video_for_preview_if_needed(path: str) -> str:
714
  """
@@ -719,96 +818,145 @@ def create_demo():
719
  """
720
  if not FFMPEG_BIN or not os.path.exists(path):
721
  return path
722
-
723
  # Quick check: extension + ffprobe for codecs
724
  if path.lower().endswith((".mp4", ".m4v", ".mov")):
725
  info = _ffprobe_streams(path)
726
  if info:
727
- codecs = {s.get("codec_name") for s in info.get("streams", []) if s.get("codec_type") == "video"}
 
 
 
 
728
  if "h264" in codecs or "h265" in codecs:
729
- return path # already playable
730
-
731
  # Need conversion → write to a new temp MP4
732
  out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
733
  os.close(out_fd)
734
  cmd = [
735
- FFMPEG_BIN, "-y", "-i", path,
736
- "-c:v", "libx264", "-preset", "veryfast", "-crf", "28",
737
- "-c:a", "aac", "-movflags", "+faststart", out_path,
 
 
 
 
 
 
 
 
 
 
 
 
738
  ]
739
  try:
740
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60)
 
 
 
 
 
741
  return out_path
742
  except Exception:
743
  # If conversion fails, fall back to the original (Gradio will show its own warning)
744
- try: os.remove(out_path)
745
- except Exception: pass
 
 
746
  return path
747
-
748
  def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
749
  """Return (status, markdown_output, preview_path)."""
750
  try:
751
  if not url:
752
  return "error", "**Error:** No URL provided.", ""
753
-
754
  progress(0.02, desc="Checking URL / content‑type")
755
  is_img, is_vid = determine_media_type(url, progress=progress)
756
-
757
  client = get_client(key)
758
  preview_path = ""
759
-
760
  if is_vid:
761
  progress(0.05, desc="Downloading video")
762
  raw = fetch_bytes(url, timeout=120, progress=progress)
763
  if not raw:
764
  return "error", "Failed to download video bytes.", ""
765
-
766
  # write with a proper video extension
767
- tmp_video = _temp_file(raw, suffix="." + (ext_from_src(url) or "mp4"))
 
 
768
  progress(0.15, desc="Preparing preview")
769
  preview_path = _make_preview(url, raw)
770
-
771
  progress(0.25, desc="Running full‑video analysis")
772
- result = analyze_video_cohesive(client, tmp_video, prompt, progress=progress)
773
-
 
 
774
  # clean‑up the *raw* temp file (preview may be a different file)
775
- try: os.remove(tmp_video)
776
- except Exception: pass
777
-
 
 
778
  elif is_img:
779
  progress(0.05, desc="Downloading image")
780
  raw = fetch_bytes(url, progress=progress)
781
-
782
  # preview image (always JPEG for consistency)
783
  preview_path = _make_preview(url, raw)
784
-
785
  progress(0.20, desc="Running image analysis")
786
- result = analyze_image_structured(client, raw, prompt, progress=progress)
787
-
 
 
788
  else:
789
  progress(0.07, desc="Downloading unknown media")
790
  raw = fetch_bytes(url, timeout=120, progress=progress)
791
-
792
  # try to open as image
793
  try:
794
  Image.open(BytesIO(raw)).verify()
795
  is_img = True
796
  except Exception:
797
  is_img = False
798
-
799
  if is_img:
800
- preview_path = _temp_file(convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg")
801
- result = analyze_image_structured(client, raw, prompt, progress=progress)
 
 
 
 
802
  else:
803
  tmp_vid = _temp_file(raw, suffix=ext_from_src(url) or ".mp4")
804
  preview_path = _convert_video_for_preview_if_needed(tmp_vid)
805
- result = analyze_video_cohesive(client, tmp_vid, prompt, progress=progress)
806
- try: os.remove(tmp_vid)
807
- except Exception: pass
808
-
809
- status = "done" if not (isinstance(result, str) and result.lower().startswith("error")) else "error"
810
- return status, result if isinstance(result, str) else str(result), preview_path or ""
811
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
812
  except Exception as exc:
813
  return "error", f"Unexpected worker error: {exc}", ""
814
 
@@ -832,11 +980,23 @@ def create_demo():
832
  }
833
  return labels.get(s, "Submit")
834
 
835
- status_state.change(fn=lambda s: _btn_label_for_status(s), inputs=[status_state], outputs=[submit_btn])
 
 
 
 
836
 
837
  def status_to_progress_text(s):
838
- return {"idle": "Idle", "busy": "Processing…", "done": "Completed", "error": "Error — see output"}.get(s, s)
839
- status_state.change(fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md])
 
 
 
 
 
 
 
 
840
 
841
  preview_cache = {}
842
 
@@ -849,7 +1009,9 @@ def create_demo():
849
  preview = _convert_video_for_preview_if_needed(tmp)
850
  preview_cache[url] = (preview, True)
851
  else:
852
- preview = _temp_file(convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg")
 
 
853
  preview_cache[url] = (preview, False)
854
  return preview
855
 
@@ -865,22 +1027,45 @@ def create_demo():
865
 
866
  try:
867
  if any(path.lower().endswith(ext) for ext in IMAGE_EXTS):
868
- return gr.update(value=path, visible=True), gr.update(value=None, visible=False), "Preview updated."
869
-
 
 
 
 
870
  if any(path.lower().endswith(ext) for ext in VIDEO_EXTS):
871
- return gr.update(value=None, visible=False), gr.update(value=path, visible=True), "Preview updated."
872
-
 
 
 
 
873
  img = Image.open(path)
874
  img.verify()
875
- return gr.update(value=path, visible=True), gr.update(value=None, visible=False), "Preview updated."
876
-
 
 
 
 
877
  except Exception as e:
878
  print(f"Failed to update preview: {e}")
879
- return gr.update(value=None, visible=False), gr.update(value=None, visible=False), ""
 
 
 
 
880
 
881
- preview_path_state.change(fn=apply_preview, inputs=[preview_path_state, prev_preview_state], outputs=[preview_image, preview_video, preview_status])
 
 
 
 
882
 
883
  return demo
884
 
 
885
  if __name__ == "__main__":
886
- create_demo().launch(share=False, server_name="0.0.0.0", server_port=7860, max_threads=8)
 
 
 
 
 
 
1
  import os
2
+ import json
3
  import shutil
4
  import subprocess
5
  import tempfile
6
  import base64
7
+ import asyncio
8
+ import concurrent.futures
9
  from io import BytesIO
10
+ from typing import List, Tuple, Optional, Callable
11
+
12
  import requests
13
  from PIL import Image, ImageFile, UnidentifiedImageError
14
  import gradio as gr
 
 
 
15
 
16
+ # --------------------------------------------------------------------------- #
17
+ # Constants & basic helpers
18
+ # --------------------------------------------------------------------------- #
19
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
20
  PIXTRAL_MODEL = "pixtral-12b-2409"
21
  VIDEO_MODEL = "voxtral-mini-latest"
 
25
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
26
 
27
  SYSTEM_INSTRUCTION = (
28
+ "You are a clinical visual analyst. Only analyse media actually provided (image or video). "
29
+ "If analysing a video, do it as a whole, not framebyframe. Produce a concise, factual narrative "
30
+ "describing observable features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). "
31
+ "Do not hallucinate sensory details."
 
 
32
  )
33
 
34
  ImageFile.LOAD_TRUNCATED_IMAGES = True
35
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
36
 
37
+ # --------------------------------------------------------------------------- #
38
+ # Mistral client utilities
39
+ # --------------------------------------------------------------------------- #
40
  try:
41
  from mistralai import Mistral
42
+ except Exception: # pragma: no cover
43
  Mistral = None
44
 
45
+
46
  def get_client(key: Optional[str] = None):
47
  api_key = (key or "").strip() or DEFAULT_KEY
48
  if not api_key:
49
  raise RuntimeError("MISTRAL_API_KEY not set")
50
  if Mistral is None:
 
51
  raise RuntimeError("mistralai library not installed")
52
  return Mistral(api_key=api_key)
53
 
54
+
55
+ def _progress(p: float, desc: str = None, fn: Callable = None):
56
+ """Helper to call a Gradio progress function if supplied."""
57
+ if fn is None:
58
+ return
59
+ try:
60
+ if desc is None:
61
+ fn(p)
62
+ else:
63
+ fn(p, desc)
64
+ except Exception:
65
+ pass
66
+
67
+
68
+ # --------------------------------------------------------------------------- #
69
+ # HTTP helpers (safe HEAD / GET)
70
+ # --------------------------------------------------------------------------- #
71
  def is_remote(src: str) -> bool:
72
+ return src.startswith(("http://", "https://"))
73
+
74
 
75
  def ext_from_src(src: str) -> str:
76
+ _, ext = os.path.splitext(src.split("?")[0])
 
77
  return ext.lower()
78
 
79
+
80
  def safe_head(url: str, timeout: int = 6):
81
  try:
82
  r = requests.head(url, timeout=timeout, allow_redirects=True)
 
84
  except Exception:
85
  return None
86
 
87
+
88
  def safe_get(url: str, timeout: int = 15):
89
  r = requests.get(url, timeout=timeout)
90
  r.raise_for_status()
91
  return r
92
 
93
+
94
+ # --------------------------------------------------------------------------- #
95
+ # Temp‑file helpers
96
+ # --------------------------------------------------------------------------- #
97
  def _temp_file(data: bytes, suffix: str) -> str:
98
+ """Write *data* to a temporary file and return its path."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  fd, path = tempfile.mkstemp(suffix=suffix)
100
+ os.close(fd)
101
  with open(path, "wb") as f:
102
  f.write(data)
103
  return path
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  def save_bytes_to_temp(b: bytes, suffix: str) -> str:
107
+ return _temp_file(b, suffix)
108
+
 
 
 
109
 
110
  def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
111
  img = Image.open(BytesIO(img_bytes))
112
+ if getattr(img, "is_animated", False):
113
+ img.seek(0)
 
 
 
114
  if img.mode != "RGB":
115
  img = img.convert("RGB")
116
  h = base_h
 
120
  img.save(buf, format="JPEG", quality=85)
121
  return buf.getvalue()
122
 
123
+
124
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
125
+ return f"data:{mime};base64," + base64.b64encode(b).decode()
126
+
127
+
128
+ # --------------------------------------------------------------------------- #
129
+ # Media‑type detection
130
+ # --------------------------------------------------------------------------- #
131
+ def determine_media_type(src: str, progress: Callable = None) -> Tuple[bool, bool]:
132
+ is_img = ext_from_src(src) in IMAGE_EXTS
133
+ is_vid = ext_from_src(src) in VIDEO_EXTS
134
+ if is_remote(src):
135
+ head = safe_head(src)
136
+ if head:
137
+ ct = (head.headers.get("content-type") or "").lower()
138
+ if ct.startswith("image/"):
139
+ is_img, is_vid = True, False
140
+ elif ct.startswith("video/"):
141
+ is_vid, is_img = True, False
142
+ _progress(0.02, "Determined media type", progress)
143
+ return is_img, is_vid
144
+
145
+
146
+ # --------------------------------------------------------------------------- #
147
+ # Download helpers – stream large files directly to a temp file
148
+ # --------------------------------------------------------------------------- #
149
+ def download_to_temp(src: str, progress: Callable = None) -> str:
150
+ """Return a temporary file path containing the downloaded content."""
151
+ _progress(0.05, "Downloading...", progress)
152
+ if is_remote(src):
153
+ # stream to avoid loading huge files into RAM
154
+ r = requests.get(src, stream=True, timeout=60)
155
+ r.raise_for_status()
156
+ suffix = ext_from_src(src) or ".bin"
157
+ fd, path = tempfile.mkstemp(suffix=suffix)
158
  os.close(fd)
159
+ with open(path, "wb") as f:
160
+ for chunk in r.iter_content(8192):
161
+ if chunk:
162
+ f.write(chunk)
163
+ return path
164
+ else:
165
+ if not os.path.exists(src):
166
+ raise FileNotFoundError(f"Local path does not exist: {src}")
167
+ return src
168
+
169
+
170
+ # --------------------------------------------------------------------------- #
171
+ # ffprobe / conversion helpers (browser‑playable video)
172
+ # --------------------------------------------------------------------------- #
173
+ def _ffprobe_streams(path: str) -> Optional[dict]:
174
+ if not FFMPEG_BIN:
175
+ return None
176
+ ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe")
177
+ if not shutil.which(ffprobe):
178
+ ffprobe = "ffprobe"
179
+ cmd = [
180
+ ffprobe,
181
+ "-v",
182
+ "error",
183
+ "-print_format",
184
+ "json",
185
+ "-show_streams",
186
+ "-show_format",
187
+ path,
188
+ ]
189
+ try:
190
+ out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
191
+ return json.loads(out)
192
+ except Exception:
193
+ return None
194
+
195
+
196
+ def _is_browser_playable(path: str) -> bool:
197
+ """True if the file is MP4 (or MOV) with an H.264/H.265 video stream."""
198
+ if not path:
199
+ return False
200
+ ext = path.lower()
201
+ if any(ext.endswith(e) for e in (".mp4", ".m4v", ".mov")):
202
+ info = _ffprobe_streams(path)
203
+ if not info:
204
+ return ext.endswith(".mp4")
205
+ for s in info.get("streams", []):
206
+ if s.get("codec_type") == "video" and s.get("codec_name") in (
207
+ "h264",
208
+ "h265",
209
+ "avc1",
210
+ ):
211
+ return True
212
+ # fallback: any video stream is acceptable
213
+ info = _ffprobe_streams(path)
214
+ if not info:
215
+ return False
216
+ return any(s.get("codec_type") == "video" for s in info.get("streams", []))
217
+
218
+
219
+ def _convert_video_for_preview_if_needed(path: str, progress: Callable = None) -> str:
220
+ """Return a path that Gradio can play (MP4 + H.264/AAC)."""
221
+ if not FFMPEG_BIN or not os.path.exists(path):
222
+ return path
223
+ if _is_browser_playable(path):
224
+ return path
225
+
226
+ _progress(0.70, "Re‑encoding video for preview", progress)
227
+ out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
228
+ os.close(out_fd)
229
+ cmd = [
230
+ FFMPEG_BIN,
231
+ "-y",
232
+ "-i",
233
+ path,
234
+ "-c:v",
235
+ "libx264",
236
+ "-preset",
237
+ "veryfast",
238
+ "-crf",
239
+ "28",
240
+ "-c:a",
241
+ "aac",
242
+ "-movflags",
243
+ "+faststart",
244
+ out_path,
245
+ ]
246
+ try:
247
+ subprocess.run(
248
+ cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60
249
+ )
250
+ return out_path
251
+ except Exception:
252
  try:
253
+ os.remove(out_path)
 
 
 
 
 
254
  except Exception:
255
  pass
256
+ return path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
+ # --------------------------------------------------------------------------- #
260
+ # Mistral interaction (upload + chat)
261
+ # --------------------------------------------------------------------------- #
262
  def upload_file_to_mistral(
263
  client,
264
  path: str,
265
  filename: str | None = None,
266
  purpose: str = "batch",
267
+ progress: Callable = None,
 
268
  ) -> str:
269
+ """Upload a file via the REST endpoint and return its file‑id."""
 
 
 
 
 
 
270
  fname = filename or os.path.basename(path)
271
 
272
+ # Build a short list of plausible purposes
273
+ purposes = [purpose]
 
 
 
 
274
  ext = os.path.splitext(fname)[1].lower()
275
  if ext in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tiff"}:
276
+ purposes.append("image")
 
 
277
  for p in ("batch", "fine-tune", "image"):
278
+ if p not in purposes:
279
+ purposes.append(p)
280
 
 
 
 
281
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
282
  if not api_key:
283
+ raise RuntimeError("MISTRAL_API_KEY missing")
284
 
285
  url = "https://api.mistral.ai/v1/files"
286
  headers = {"Authorization": f"Bearer {api_key}"}
287
 
288
+ last_err = None
289
+ for cur_purpose in purposes:
 
 
 
290
  try:
291
  with open(path, "rb") as fh:
292
  files = {"file": (fname, fh)}
293
  data = {"purpose": cur_purpose}
294
  resp = requests.post(
295
+ url, headers=headers, files=files, data=data, timeout=120
 
 
 
 
296
  )
297
  resp.raise_for_status()
298
+ payload = resp.json()
299
+ fid = payload.get("id") or payload.get("data", [{}])[0].get("id")
300
+ if fid:
301
+ _progress(0.65, f"Uploaded (purpose={cur_purpose})", progress)
302
+ return fid
303
+ raise RuntimeError("Upload succeeded but no file id returned")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  except requests.exceptions.HTTPError as he:
305
+ if he.response.status_code == 422:
306
+ last_err = he
 
 
307
  continue
308
+ raise RuntimeError(f"Upload failed: {he}") from he
309
+ except Exception as e:
310
+ last_err = e
311
+ raise RuntimeError(f"Upload failed: {e}") from e
312
+
313
+ raise RuntimeError(f"All upload attempts failed. Last error: {last_err}")
314
+
315
+
316
+ def chat_complete(client, model: str, messages, progress: Callable = None) -> str:
317
+ """Send a chat request (non‑streaming) and return the model’s text response."""
318
+ _progress(0.60, "Calling model", progress)
319
+ try:
320
+ if hasattr(client, "chat") and hasattr(client.chat, "complete"):
321
+ try:
322
+ res = client.chat.complete(
323
+ model=model, messages=messages, timeout=120, stream=False
324
+ )
325
+ except TypeError:
326
+ # SDK variations
327
+ res = client.chat.complete(
328
+ model=model, messages=messages, request_timeout=120, stream=False
329
+ )
330
+ else:
331
+ # raw REST fallback
332
+ api_key = getattr(client, "api_key", "") or DEFAULT_KEY
333
+ url = "https://api.mistral.ai/v1/chat/completions"
334
+ headers = {
335
+ "Authorization": f"Bearer {api_key}",
336
+ "Content-Type": "application/json",
337
+ }
338
+ r = requests.post(
339
+ url,
340
+ json={"model": model, "messages": messages},
341
+ headers=headers,
342
+ timeout=120,
343
+ )
344
+ r.raise_for_status()
345
+ res = r.json()
346
+ except Exception as e:
347
+ return f"Error during model call: {e}"
348
+
349
+ _progress(0.80, "Parsing response", progress)
350
+ choices = getattr(res, "choices", None) or (
351
+ res.get("choices") if isinstance(res, dict) else []
352
+ )
353
+ if not choices:
354
+ return f"Empty response: {res}"
355
+ first = choices[0]
356
+ msg = (
357
+ first.get("message")
358
+ if isinstance(first, dict)
359
+ else getattr(first, "message", {})
360
+ )
361
+ content = (
362
+ msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", "")
363
+ )
364
+ return content.strip() if isinstance(content, str) else str(content)
365
+
366
+
367
+ # --------------------------------------------------------------------------- #
368
+ # Analysis functions (image & video)
369
+ # --------------------------------------------------------------------------- #
370
+ def analyze_image_structured(
371
+ client, img_bytes: bytes, prompt: str, progress: Callable = None
372
+ ) -> str:
373
+ _progress(0.30, "Preparing image", progress)
374
+ jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
375
+ tmp = save_bytes_to_temp(jpeg, ".jpg")
376
  try:
377
+ fid = upload_file_to_mistral(
378
+ client, tmp, filename="image.jpg", purpose="image", progress=progress
379
+ )
380
+ finally:
381
  try:
382
+ os.remove(tmp)
383
+ except Exception:
384
+ pass
 
385
 
386
+ messages = [
387
+ {"role": "system", "content": SYSTEM_INSTRUCTION},
388
+ {
389
+ "role": "user",
390
+ "content": [
391
  {"type": "text", "text": prompt},
392
+ {"type": "file", "file_id": fid},
393
+ ],
394
+ },
395
+ ]
396
+ return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
397
+
 
 
398
 
399
+ def analyze_video_cohesive(
400
+ client,
401
+ video_path: str,
402
+ prompt: str,
403
+ progress: Callable = None,
404
+ model: str = VIDEO_MODEL,
405
+ ) -> str:
406
+ """Full‑video analysis; falls back to frame‑based analysis on upload failure."""
407
  try:
408
+ _progress(0.30, "Uploading video", progress)
409
+ fid = upload_file_to_mistral(
410
+ client,
411
+ video_path,
412
+ filename=os.path.basename(video_path),
413
+ purpose="batch",
414
+ progress=progress,
415
  )
416
  messages = [
417
  {"role": "system", "content": SYSTEM_INSTRUCTION},
418
+ {
419
+ "role": "user",
420
+ "content": [
421
+ {
422
+ "type": "text",
423
+ "text": f"{prompt}\n\nAnalyze the whole video and produce a single cohesive narrative.",
424
+ },
425
+ {"type": "file", "file_id": fid},
426
+ ],
427
+ },
428
  ]
429
+ return chat_complete(client, model, messages, progress=progress)
430
+
431
+ except Exception as exc:
432
+ # ---- fallback: extract a few representative frames --------------------
433
+ _progress(0.35, "Upload failed – extracting frames", progress)
434
+ frames = extract_best_frames_bytes(
435
+ video_path, sample_count=6, progress=progress
436
+ )
437
  if not frames:
438
+ return f"Error: upload failed and no frames could be extracted ({exc})"
439
+
440
+ # upload each frame and build the message payload
441
+ frame_files = []
442
+ for i, raw in enumerate(frames, 1):
443
+ _progress(
444
+ 0.40 + i / len(frames) * 0.15,
445
+ f"Uploading frame {i}/{len(frames)}",
446
+ progress,
447
+ )
448
+ tmp = save_bytes_to_temp(convert_to_jpeg_bytes(raw, base_h=720), ".jpg")
449
  try:
450
+ fid = upload_file_to_mistral(
451
+ client, tmp, f"frame_{i}.jpg", purpose="image", progress=progress
 
 
 
 
 
 
 
452
  )
453
+ frame_files.append({"type": "file", "file_id": fid})
454
+ finally:
455
+ os.remove(tmp)
456
+
457
  messages = [
458
  {"role": "system", "content": SYSTEM_INSTRUCTION},
459
+ {
460
+ "role": "user",
461
+ "content": [
462
+ {
463
+ "type": "text",
464
+ "text": f"{prompt}\n\nConsolidate observations across the provided frames into a single narrative.",
465
+ },
466
+ *frame_files,
467
+ ],
468
+ },
469
  ]
470
  return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
471
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
+ # --------------------------------------------------------------------------- #
474
+ # Gradio UI helpers
475
+ # --------------------------------------------------------------------------- #
476
+ css = ".preview_media img, .preview_media video {max-width:100%;height:auto;border-radius:6px;}"
 
 
 
 
 
477
 
 
 
 
 
 
 
 
 
478
 
479
+ def _make_preview(url: str, raw: bytes, cache: dict) -> str:
480
+ """Create (or reuse) a preview file – image → JPEG, video → MP4 (browser‑playable)."""
481
+ if url in cache:
482
+ return cache[url]
 
 
 
 
 
 
 
483
 
484
+ if determine_media_type(url)[1]: # video
485
+ tmp = _temp_file(raw, suffix=ext_from_src(url) or ".mp4")
486
+ preview = _convert_video_for_preview_if_needed(tmp)
487
+ os.remove(tmp) # the converted file is a separate temp file
488
+ else: # image
489
+ preview = _temp_file(convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg")
490
+ cache[url] = preview
491
+ return preview
 
492
 
 
 
 
 
 
 
 
 
 
493
 
494
+ def apply_preview(path: str, last_path: str):
495
+ """Return Gradio updates for image/video components and a status string."""
496
+ if not path or path == last_path:
497
+ return gr.update(), gr.update(), ""
498
+
499
+ if any(path.lower().endswith(ext) for ext in IMAGE_EXTS):
500
+ return (
501
+ gr.update(value=path, visible=True),
502
+ gr.update(value=None, visible=False),
503
+ "Preview updated.",
504
+ )
505
+ if any(path.lower().endswith(ext) for ext in VIDEO_EXTS):
506
+ return (
507
+ gr.update(value=None, visible=False),
508
+ gr.update(value=path, visible=True),
509
+ "Preview updated.",
510
+ )
511
+
512
+ # fallback – try to open as image
513
  try:
514
+ Image.open(path).verify()
515
+ return (
516
+ gr.update(value=path, visible=True),
517
+ gr.update(value=None, visible=False),
518
+ "Preview updated.",
519
+ )
520
+ except Exception:
521
+ return (
522
+ gr.update(value=None, visible=False),
523
+ gr.update(value=None, visible=False),
524
+ "",
525
+ )
526
+
 
 
 
 
 
 
 
 
527
 
528
  def create_demo():
529
  with gr.Blocks(title="Flux Multimodal", css=css) as demo:
530
  with gr.Row():
531
  with gr.Column(scale=1):
532
+ preview_image = gr.Image(
533
+ label="Preview Image",
534
+ type="filepath",
535
+ elem_classes="preview_media",
536
+ visible=False,
537
+ )
538
+ preview_video = gr.Video(
539
+ label="Preview Video",
540
+ elem_classes="preview_media",
541
+ visible=False,
542
+ format="mp4",
543
+ )
544
+ preview_status = gr.Textbox(
545
+ label="Preview status",
546
+ interactive=False,
547
+ lines=1,
548
+ value="",
549
+ visible=True,
550
+ )
551
  with gr.Column(scale=2):
552
+ url_input = gr.Textbox(
553
+ label="Image / Video URL", placeholder="https://...", lines=1
554
+ )
555
  with gr.Accordion("Prompt (optional)", open=False):
556
  custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
557
  with gr.Accordion("Mistral API Key (optional)", open=False):
558
+ api_key = gr.Textbox(
559
+ label="Mistral API Key", type="password", max_lines=1
560
+ )
561
  with gr.Row():
562
  submit_btn = gr.Button("Submit")
563
  clear_btn = gr.Button("Clear")
 
568
  preview_path_state = gr.State("")
569
 
570
  # small helper: fetch URL into bytes with retries and respect Retry-After
571
+ def _fetch_with_retries_bytes(
572
+ src: str, timeout: int = 15, max_retries: int = 3
573
+ ):
574
  attempt = 0
575
  delay = 1.0
576
  while True:
 
623
  def load_preview(url: str):
624
  # returns (preview_image_path, preview_video_path, status_msg)
625
  if not url:
626
+ return (
627
+ gr.update(value=None, visible=False),
628
+ gr.update(value=None, visible=False),
629
+ gr.update(value=""),
630
+ )
631
  try:
632
  if is_remote(url):
633
  head = safe_head(url)
634
  if head:
635
  ctype = (head.headers.get("content-type") or "").lower()
636
+ if ctype.startswith("video/") or any(
637
+ url.lower().endswith(ext) for ext in VIDEO_EXTS
638
+ ):
639
  local = _save_preview_local(url)
640
  if local:
641
+ return (
642
+ gr.update(value=None, visible=False),
643
+ gr.update(value=local, visible=True),
644
+ gr.update(
645
+ value=f"Remote video detected (content-type={ctype}). Showing preview if browser-playable."
646
+ ),
647
+ )
648
  else:
649
+ return (
650
+ gr.update(value=None, visible=False),
651
+ gr.update(value=None, visible=False),
652
+ gr.update(
653
+ value=f"Remote video detected but preview download failed (content-type={ctype})."
654
+ ),
655
+ )
656
  local = _save_preview_local(url)
657
  if not local:
658
+ return (
659
+ gr.update(value=None, visible=False),
660
+ gr.update(value=None, visible=False),
661
+ gr.update(
662
+ value="Preview load failed: could not fetch resource."
663
+ ),
664
+ )
665
  try:
666
  img = Image.open(local)
667
  if getattr(img, "is_animated", False):
668
  img.seek(0)
669
+ return (
670
+ gr.update(value=local, visible=True),
671
+ gr.update(value=None, visible=False),
672
+ gr.update(value="Image preview loaded."),
673
+ )
674
  except UnidentifiedImageError:
675
  if any(local.lower().endswith(ext) for ext in VIDEO_EXTS) or True:
676
+ return (
677
+ gr.update(value=None, visible=False),
678
+ gr.update(value=local, visible=True),
679
+ gr.update(
680
+ value="Non-image file — showing as video preview if playable."
681
+ ),
682
+ )
683
+ return (
684
+ gr.update(value=None, visible=False),
685
+ gr.update(value=None, visible=False),
686
+ gr.update(
687
+ value="Preview load failed: file is not a valid image."
688
+ ),
689
+ )
690
  except Exception as e:
691
+ return (
692
+ gr.update(value=None, visible=False),
693
+ gr.update(value=None, visible=False),
694
+ gr.update(value=f"Preview load failed: {e}"),
695
+ )
696
 
697
+ url_input.change(
698
+ fn=load_preview,
699
+ inputs=[url_input],
700
+ outputs=[preview_image, preview_video, preview_status],
701
+ )
702
 
703
  def clear_all():
704
  return "", None, None, "idle", "Idle", "", ""
705
 
706
+ clear_btn.click(
707
+ fn=clear_all,
708
+ inputs=[],
709
+ outputs=[
710
+ url_input,
711
+ preview_image,
712
+ preview_video,
713
+ status_state,
714
+ progress_md,
715
+ output_md,
716
+ preview_path_state,
717
+ ],
718
+ )
719
 
720
  def _convert_video_for_preview(path: str) -> str:
721
  if not FFMPEG_BIN or not os.path.exists(FFMPEG_BIN):
 
723
  out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
724
  os.close(out_fd)
725
  cmd = [
726
+ FFMPEG_BIN,
727
+ "-nostdin",
728
+ "-y",
729
+ "-i",
730
+ path,
731
+ "-c:v",
732
+ "libx264",
733
+ "-preset",
734
+ "veryfast",
735
+ "-crf",
736
+ "28",
737
+ "-c:a",
738
+ "aac",
739
+ "-movflags",
740
+ "+faststart",
741
+ out_path,
742
  ]
743
  try:
744
+ subprocess.run(
745
+ cmd,
746
+ stdout=subprocess.DEVNULL,
747
+ stderr=subprocess.DEVNULL,
748
+ timeout=60,
749
+ )
750
  return out_path
751
  except Exception:
752
+ try:
753
+ os.remove(out_path)
754
+ except Exception:
755
+ pass
756
  return path
757
 
758
  # --- Helper: probe codecs via ffprobe; returns dict with streams info or None on failure
759
  def _ffprobe_streams(path: str) -> Optional[dict]:
760
  if not FFMPEG_BIN:
761
  return None
762
+ ffprobe = (
763
+ FFMPEG_BIN.replace("ffmpeg", "ffprobe")
764
+ if "ffmpeg" in FFMPEG_BIN
765
+ else "ffprobe"
766
+ )
767
  if not shutil.which(ffprobe):
768
  ffprobe = "ffprobe"
769
  cmd = [
770
+ ffprobe,
771
+ "-v",
772
+ "error",
773
+ "-print_format",
774
+ "json",
775
+ "-show_streams",
776
+ "-show_format",
777
+ path,
778
  ]
779
  try:
780
  out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
781
  return json.loads(out)
782
  except Exception:
783
  return None
784
+
785
  # --- Helper: is file already browser-playable (mp4 container with h264 video and aac audio OR at least playable video)
786
  def _is_browser_playable(path: str) -> bool:
787
  try:
 
793
  return ext.endswith(".mp4")
794
  streams = info.get("streams", [])
795
  v_ok = any(
796
+ s.get("codec_name") in ("h264", "h265", "avc1")
797
+ and s.get("codec_type") == "video"
798
  for s in streams
799
  )
800
  # audio optional for preview
 
807
  return any(s.get("codec_type") == "video" for s in streams)
808
  except Exception:
809
  return False
810
+
811
  # --- Convert only if not browser-playable
812
  def _convert_video_for_preview_if_needed(path: str) -> str:
813
  """
 
818
  """
819
  if not FFMPEG_BIN or not os.path.exists(path):
820
  return path
821
+
822
  # Quick check: extension + ffprobe for codecs
823
  if path.lower().endswith((".mp4", ".m4v", ".mov")):
824
  info = _ffprobe_streams(path)
825
  if info:
826
+ codecs = {
827
+ s.get("codec_name")
828
+ for s in info.get("streams", [])
829
+ if s.get("codec_type") == "video"
830
+ }
831
  if "h264" in codecs or "h265" in codecs:
832
+ return path # already playable
833
+
834
  # Need conversion → write to a new temp MP4
835
  out_fd, out_path = tempfile.mkstemp(suffix=".mp4")
836
  os.close(out_fd)
837
  cmd = [
838
+ FFMPEG_BIN,
839
+ "-y",
840
+ "-i",
841
+ path,
842
+ "-c:v",
843
+ "libx264",
844
+ "-preset",
845
+ "veryfast",
846
+ "-crf",
847
+ "28",
848
+ "-c:a",
849
+ "aac",
850
+ "-movflags",
851
+ "+faststart",
852
+ out_path,
853
  ]
854
  try:
855
+ subprocess.run(
856
+ cmd,
857
+ stdout=subprocess.DEVNULL,
858
+ stderr=subprocess.DEVNULL,
859
+ timeout=60,
860
+ )
861
  return out_path
862
  except Exception:
863
  # If conversion fails, fall back to the original (Gradio will show its own warning)
864
+ try:
865
+ os.remove(out_path)
866
+ except Exception:
867
+ pass
868
  return path
869
+
870
  def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
871
  """Return (status, markdown_output, preview_path)."""
872
  try:
873
  if not url:
874
  return "error", "**Error:** No URL provided.", ""
875
+
876
  progress(0.02, desc="Checking URL / content‑type")
877
  is_img, is_vid = determine_media_type(url, progress=progress)
878
+
879
  client = get_client(key)
880
  preview_path = ""
881
+
882
  if is_vid:
883
  progress(0.05, desc="Downloading video")
884
  raw = fetch_bytes(url, timeout=120, progress=progress)
885
  if not raw:
886
  return "error", "Failed to download video bytes.", ""
887
+
888
  # write with a proper video extension
889
+ tmp_video = _temp_file(
890
+ raw, suffix="." + (ext_from_src(url) or "mp4")
891
+ )
892
  progress(0.15, desc="Preparing preview")
893
  preview_path = _make_preview(url, raw)
894
+
895
  progress(0.25, desc="Running full‑video analysis")
896
+ result = analyze_video_cohesive(
897
+ client, tmp_video, prompt, progress=progress
898
+ )
899
+
900
  # clean‑up the *raw* temp file (preview may be a different file)
901
+ try:
902
+ os.remove(tmp_video)
903
+ except Exception:
904
+ pass
905
+
906
  elif is_img:
907
  progress(0.05, desc="Downloading image")
908
  raw = fetch_bytes(url, progress=progress)
909
+
910
  # preview image (always JPEG for consistency)
911
  preview_path = _make_preview(url, raw)
912
+
913
  progress(0.20, desc="Running image analysis")
914
+ result = analyze_image_structured(
915
+ client, raw, prompt, progress=progress
916
+ )
917
+
918
  else:
919
  progress(0.07, desc="Downloading unknown media")
920
  raw = fetch_bytes(url, timeout=120, progress=progress)
921
+
922
  # try to open as image
923
  try:
924
  Image.open(BytesIO(raw)).verify()
925
  is_img = True
926
  except Exception:
927
  is_img = False
928
+
929
  if is_img:
930
+ preview_path = _temp_file(
931
+ convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg"
932
+ )
933
+ result = analyze_image_structured(
934
+ client, raw, prompt, progress=progress
935
+ )
936
  else:
937
  tmp_vid = _temp_file(raw, suffix=ext_from_src(url) or ".mp4")
938
  preview_path = _convert_video_for_preview_if_needed(tmp_vid)
939
+ result = analyze_video_cohesive(
940
+ client, tmp_vid, prompt, progress=progress
941
+ )
942
+ try:
943
+ os.remove(tmp_vid)
944
+ except Exception:
945
+ pass
946
+
947
+ status = (
948
+ "done"
949
+ if not (
950
+ isinstance(result, str) and result.lower().startswith("error")
951
+ )
952
+ else "error"
953
+ )
954
+ return (
955
+ status,
956
+ result if isinstance(result, str) else str(result),
957
+ preview_path or "",
958
+ )
959
+
960
  except Exception as exc:
961
  return "error", f"Unexpected worker error: {exc}", ""
962
 
 
980
  }
981
  return labels.get(s, "Submit")
982
 
983
+ status_state.change(
984
+ fn=lambda s: _btn_label_for_status(s),
985
+ inputs=[status_state],
986
+ outputs=[submit_btn],
987
+ )
988
 
989
  def status_to_progress_text(s):
990
+ return {
991
+ "idle": "Idle",
992
+ "busy": "Processing…",
993
+ "done": "Completed",
994
+ "error": "Error — see output",
995
+ }.get(s, s)
996
+
997
+ status_state.change(
998
+ fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md]
999
+ )
1000
 
1001
  preview_cache = {}
1002
 
 
1009
  preview = _convert_video_for_preview_if_needed(tmp)
1010
  preview_cache[url] = (preview, True)
1011
  else:
1012
+ preview = _temp_file(
1013
+ convert_to_jpeg_bytes(raw, base_h=1024), suffix=".jpg"
1014
+ )
1015
  preview_cache[url] = (preview, False)
1016
  return preview
1017
 
 
1027
 
1028
  try:
1029
  if any(path.lower().endswith(ext) for ext in IMAGE_EXTS):
1030
+ return (
1031
+ gr.update(value=path, visible=True),
1032
+ gr.update(value=None, visible=False),
1033
+ "Preview updated.",
1034
+ )
1035
+
1036
  if any(path.lower().endswith(ext) for ext in VIDEO_EXTS):
1037
+ return (
1038
+ gr.update(value=None, visible=False),
1039
+ gr.update(value=path, visible=True),
1040
+ "Preview updated.",
1041
+ )
1042
+
1043
  img = Image.open(path)
1044
  img.verify()
1045
+ return (
1046
+ gr.update(value=path, visible=True),
1047
+ gr.update(value=None, visible=False),
1048
+ "Preview updated.",
1049
+ )
1050
+
1051
  except Exception as e:
1052
  print(f"Failed to update preview: {e}")
1053
+ return (
1054
+ gr.update(value=None, visible=False),
1055
+ gr.update(value=None, visible=False),
1056
+ "",
1057
+ )
1058
 
1059
+ preview_path_state.change(
1060
+ fn=apply_preview,
1061
+ inputs=[preview_path_state, prev_preview_state],
1062
+ outputs=[preview_image, preview_video, preview_status],
1063
+ )
1064
 
1065
  return demo
1066
 
1067
+
1068
  if __name__ == "__main__":
1069
+ create_demo().launch(
1070
+ share=False, server_name="0.0.0.0", server_port=7860, max_threads=8
1071
+ )