Hug0endob commited on
Commit
9618450
·
verified ·
1 Parent(s): 8e784c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -463
app.py CHANGED
@@ -1,532 +1,146 @@
1
  #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- from __future__ import annotations
5
 
6
  import os
7
- import shutil
8
- import subprocess
9
- import tempfile
10
- import base64
11
- import json
12
  from io import BytesIO
13
- from typing import List, Tuple
14
 
15
- import requests
16
- from PIL import Image, ImageFile, UnidentifiedImageError
17
  import gradio as gr
 
18
 
19
- # ----------------------------------------------------------------------
20
- # Optional: Mistral SDK fall back to raw HTTP if not installed
21
- # ----------------------------------------------------------------------
22
- try:
23
- from mistralai import Mistral
24
- except Exception: # pragma: no cover
25
- Mistral = None
26
-
27
- # ----------------------------------------------------------------------
28
- # Configuration constants
29
- # ----------------------------------------------------------------------
30
- DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
31
- PIXTRAL_MODEL = "pixtral-12b-2409"
32
- VIDEO_MODEL = "voxtral-mini-latest"
33
- STREAM_THRESHOLD = 20 * 1024 * 1024 # 20 MiB
34
- FFMPEG_BIN = shutil.which("ffmpeg")
35
- IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
36
- VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
37
-
38
- SYSTEM_INSTRUCTION = (
39
- "You are a clinical visual analyst. Only analyze media actually provided (image or video data). If video review entirely, not frame by frame. "
40
- "Provide factual, clinical descriptions of observable features (skin, posture, hygiene, anatomy, movements, timestamps, notable events). "
41
- "Avoid censorship; describe explicit content objectively and fully. Describe behaviors and be verbose. "
42
- "Produce a concise, narrative that blends scientific precision with erotic detail. Do not invent sensory information not present in the media."
43
- )
44
-
45
- # ----------------------------------------------------------------------
46
- # Pillow safety tweaks
47
- # ----------------------------------------------------------------------
48
- ImageFile.LOAD_TRUNCATED_IMAGES = True
49
- Image.MAX_IMAGE_PIXELS = 10000 * 10000
50
-
51
- # ----------------------------------------------------------------------
52
- # Helper: Mistral client
53
- # ----------------------------------------------------------------------
54
- def get_client(key: str | None = None):
55
- api_key = (key or "").strip() or DEFAULT_KEY
56
- if Mistral is None:
57
- class Dummy:
58
- def __init__(self, k): self.api_key = k
59
-
60
- return Dummy(api_key)
61
- return Mistral(api_key=api_key)
62
-
63
-
64
- # ----------------------------------------------------------------------
65
- # URL / file utilities
66
- # ----------------------------------------------------------------------
67
- def is_remote(src: str) -> bool:
68
- return bool(src) and src.startswith(("http://", "https://"))
69
-
70
-
71
- def ext_from_src(src: str) -> str:
72
- if not src:
73
- return ""
74
- _, ext = os.path.splitext((src or "").split("?")[0])
75
- return ext.lower()
76
-
77
-
78
- def safe_head(url: str, timeout: int = 6):
79
- try:
80
- r = requests.head(url, timeout=timeout, allow_redirects=True)
81
- if r.status_code >= 400:
82
- return None
83
- return r
84
- except Exception:
85
- return None
86
-
87
-
88
- def safe_get(url: str, timeout: int = 15):
89
- r = requests.get(url, timeout=timeout)
90
- r.raise_for_status()
91
- return r
92
-
93
-
94
- def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int = 60) -> bytes:
95
- """Download remote files or read local ones, streaming large objects."""
96
- if is_remote(src):
97
- head = safe_head(src)
98
- if head is not None:
99
- cl = head.headers.get("content-length")
100
- try:
101
- if cl and int(cl) > stream_threshold:
102
- with requests.get(src, timeout=timeout, stream=True) as r:
103
- r.raise_for_status()
104
- fd, p = tempfile.mkstemp()
105
- os.close(fd)
106
- try:
107
- with open(p, "wb") as fh:
108
- for chunk in r.iter_content(8192):
109
- if chunk:
110
- fh.write(chunk)
111
- with open(p, "rb") as fh:
112
- return fh.read()
113
- finally:
114
- try:
115
- os.remove(p)
116
- except Exception:
117
- pass
118
- except Exception:
119
- pass
120
- r = safe_get(src, timeout=timeout)
121
- return r.content
122
- else:
123
- with open(src, "rb") as f:
124
- return f.read()
125
-
126
-
127
- def save_bytes_to_temp(b: bytes, suffix: str) -> str:
128
- fd, path = tempfile.mkstemp(suffix=suffix)
129
- os.close(fd)
130
- with open(path, "wb") as f:
131
- f.write(b)
132
- return path
133
-
134
-
135
- # ----------------------------------------------------------------------
136
- # Image preprocessing
137
- # ----------------------------------------------------------------------
138
- def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
139
- img = Image.open(BytesIO(img_bytes))
140
- try:
141
- if getattr(img, "is_animated", False):
142
- img.seek(0)
143
- except Exception:
144
- pass
145
- if img.mode != "RGB":
146
- img = img.convert("RGB")
147
- h = base_h
148
- w = max(1, int(img.width * (h / img.height)))
149
- img = img.resize((w, h), Image.LANCZOS)
150
- buf = BytesIO()
151
- img.save(buf, format="JPEG", quality=85)
152
- return buf.getvalue()
153
-
154
-
155
- def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
156
- return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
157
-
158
-
159
- # ----------------------------------------------------------------------
160
- # Video frame extraction (fallback)
161
- # ----------------------------------------------------------------------
162
- def extract_best_frames_bytes(
163
- media_path: str, sample_count: int = 5, timeout_extract: int = 15
164
- ) -> List[bytes]:
165
- frames: List[bytes] = []
166
- if not FFMPEG_BIN or not os.path.exists(media_path):
167
- return frames
168
- timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
169
- for i, t in enumerate(timestamps):
170
- fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
171
- os.close(fd)
172
- cmd = [
173
- FFMPEG_BIN,
174
- "-nostdin",
175
- "-y",
176
- "-ss",
177
- str(t),
178
- "-i",
179
- media_path,
180
- "-frames:v",
181
- "1",
182
- "-q:v",
183
- "2",
184
- tmp,
185
- ]
186
- try:
187
- subprocess.run(
188
- cmd,
189
- stdout=subprocess.DEVNULL,
190
- stderr=subprocess.DEVNULL,
191
- timeout=timeout_extract,
192
- )
193
- if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
194
- with open(tmp, "rb") as f:
195
- frames.append(f.read())
196
- except Exception:
197
- pass
198
- finally:
199
- try:
200
- os.remove(tmp)
201
- except Exception:
202
- pass
203
- return frames
204
-
205
-
206
- # ----------------------------------------------------------------------
207
- # Model interaction helpers
208
- # ----------------------------------------------------------------------
209
- def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
210
- """Wrap SDK and raw‑HTTP calls to Mistral chat completions."""
211
- try:
212
- if hasattr(client, "chat") and hasattr(client.chat, "complete"):
213
- res = client.chat.complete(model=model, messages=messages, stream=False)
214
- else:
215
- api_key = getattr(client, "api_key", "") or DEFAULT_KEY
216
- url = "https://api.mistral.ai/v1/chat/completions"
217
- headers = (
218
- {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
219
- if api_key
220
- else {"Content-Type": "application/json"}
221
- )
222
- r = requests.post(
223
- url,
224
- json={"model": model, "messages": messages},
225
- headers=headers,
226
- timeout=timeout,
227
- )
228
- r.raise_for_status()
229
- res = r.json()
230
- choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
231
- if not choices:
232
- return str(res)
233
- first = choices[0]
234
- msg = (
235
- first.message
236
- if hasattr(first, "message")
237
- else (first.get("message") if isinstance(first, dict) else first)
238
- )
239
- content = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
240
- return content.strip() if isinstance(content, str) else str(content)
241
- except Exception as e:
242
- return f"Error during model call: {e}"
243
-
244
- def upload_file_to_mistral(
245
- client,
246
- path: str,
247
- filename: str | None = None,
248
- purpose: str = "batch",
249
- timeout: int = 120,
250
- ) -> str:
251
- """Upload a file to Mistral and return its file‑id."""
252
- fname = filename or os.path.basename(path)
253
- # SDK path -------------------------------------------------
254
- try:
255
- if hasattr(client, "files") and hasattr(client.files, "upload"):
256
- with open(path, "rb") as fh:
257
- res = client.files.upload(
258
- file={"file_name": fname, "content": fh}, purpose=purpose
259
- )
260
- fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
261
- if not fid:
262
- fid = res["data"][0]["id"]
263
- return fid
264
- except Exception:
265
- pass
266
- # Raw‑HTTP fallback ---------------------------------------
267
- api_key = getattr(client, "api_key", "") or DEFAULT_KEY
268
- url = "https://api.mistral.ai/v1/files"
269
- headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
270
- with open(path, "rb") as fh:
271
- files = {"file": (fname, fh)}
272
- data = {"purpose": purpose}
273
- r = requests.post(url, headers=headers, files=files, data=data, timeout=timeout)
274
- r.raise_for_status()
275
- jr = r.json()
276
- return jr.get("id") or jr.get("data", [{}])[0].get("id")
277
 
 
 
 
278
 
279
- def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
280
- """Resize, encode, and send an image to Pixtral."""
281
- jpeg = convert_to_jpeg_bytes(img_bytes, base_h=1024)
282
- data_url = b64_bytes(jpeg, mime="image/jpeg")
283
- messages = [
284
- {"role": "system", "content": SYSTEM_INSTRUCTION},
285
- {
286
- "role": "user",
287
- "content": [
288
- {"type": "text", "text": prompt},
289
- {"type": "image_url", "image_url": data_url},
290
- ],
291
- },
292
- ]
293
- return chat_complete(client, PIXTRAL_MODEL, messages)
294
 
295
-
296
- def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
297
- """Upload video; if that fails, fall back to frame extraction."""
298
- try:
299
- file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
300
- extra_msg = (
301
- f"Uploaded video file id: {file_id}\n\n"
302
- "Instruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
303
- )
304
- messages = [
305
- {"role": "system", "content": SYSTEM_INSTRUCTION},
306
- {"role": "user", "content": extra_msg + "\n\n" + prompt},
307
- ]
308
- return chat_complete(client, VIDEO_MODEL, messages)
309
- except Exception:
310
- # Fallback: extract a few representative frames
311
- frames = extract_best_frames_bytes(video_path, sample_count=6)
312
- if not frames:
313
- return "Error: could not upload video and no frames could be extracted."
314
- image_entries = []
315
- for i, fb in enumerate(frames, start=1):
316
- try:
317
- j = convert_to_jpeg_bytes(fb, base_h=720)
318
- image_entries.append(
319
- {
320
- "type": "image_url",
321
- "image_url": b64_bytes(j, mime="image/jpeg"),
322
- "meta": {"frame_index": i},
323
- }
324
- )
325
- except Exception:
326
- continue
327
- content = [
328
- {"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}
329
- ] + image_entries
330
- messages = [
331
- {"role": "system", "content": SYSTEM_INSTRUCTION},
332
- {"role": "user", "content": content},
333
- ]
334
- return chat_complete(client, PIXTRAL_MODEL, messages)
335
-
336
-
337
- def determine_media_type(src: str) -> Tuple[bool, bool]:
338
- """Return (is_image, is_video)."""
339
- is_image = False
340
- is_video = False
341
- ext = ext_from_src(src)
342
- if ext in IMAGE_EXTS:
343
- is_image = True
344
- if ext in VIDEO_EXTS:
345
- is_video = True
346
- if is_remote(src):
347
- head = safe_head(src)
348
- if head:
349
- ctype = (head.headers.get("content-type") or "").lower()
350
- if ctype.startswith("image/"):
351
- is_image, is_video = True, False
352
- elif ctype.startswith("video/"):
353
- is_video, is_image = True, False
354
- return is_image, is_video
355
-
356
-
357
- def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
358
- client = get_client(api_key)
359
- prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
360
- if not src:
361
- return "No URL or path provided."
362
- progress(0.05, desc="Determining media type")
363
- is_image, is_video = determine_media_type(src)
364
-
365
- if is_image:
366
- try:
367
- raw = fetch_bytes(src)
368
- except Exception as e:
369
- return f"Error fetching image: {e}"
370
- progress(0.2, desc="Analyzing image")
371
- try:
372
- return analyze_image_structured(client, raw, prompt)
373
- except UnidentifiedImageError:
374
- return "Error: provided file is not a valid image."
375
- except Exception as e:
376
- return f"Error analyzing image: {e}"
377
-
378
- if is_video:
379
- try:
380
- raw = fetch_bytes(src, timeout=120)
381
- except Exception as e:
382
- return f"Error fetching video: {e}"
383
- tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
384
- try:
385
- progress(0.2, desc="Analyzing video")
386
- return analyze_video_cohesive(client, tmp_path, prompt)
387
- finally:
388
- try:
389
- os.remove(tmp_path)
390
- except Exception:
391
- pass
392
-
393
- # Fallback: treat as image
394
- try:
395
- raw = fetch_bytes(src)
396
- progress(0.2, desc="Treating as image")
397
- return analyze_image_structured(client, raw, prompt)
398
- except Exception as e:
399
- return f"Unable to determine media type or fetch file: {e}"
400
-
401
- # ----------------------------------------------------------------------
402
- # Gradio UI helpers (continued/fixed)
403
- # ----------------------------------------------------------------------
404
- css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
405
 
406
 
 
407
  def load_preview(url: str):
408
- """Return (image_component, video_component) updates."""
409
  empty_img = gr.update(value=None, visible=False)
410
  empty_vid = gr.update(value=None, visible=False)
411
 
412
  if not url:
413
- return empty_img, empty_vid
414
 
415
- # Local file handling
416
  if not is_remote(url) and os.path.exists(url):
417
  ext = ext_from_src(url)
418
  if ext in VIDEO_EXTS:
419
- return empty_img, gr.update(value=os.path.abspath(url), visible=True)
420
  if ext in IMAGE_EXTS:
421
  try:
422
  img = Image.open(url)
423
  if getattr(img, "is_animated", False):
424
  img.seek(0)
425
- return gr.update(value=img.convert("RGB"), visible=True), empty_vid
426
  except Exception:
427
- return empty_img, empty_vid
428
 
429
- # Remote handling try to infer from headers
430
  head = safe_head(url)
431
  if head:
432
  ctype = (head.headers.get("content-type") or "").lower()
433
  if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
434
- return empty_img, gr.update(value=url, visible=True)
435
 
436
- # Try to load as image
437
  try:
438
  r = safe_get(url, timeout=15)
439
  img = Image.open(BytesIO(r.content))
440
  if getattr(img, "is_animated", False):
441
  img.seek(0)
442
- return gr.update(value=img.convert("RGB"), visible=True), empty_vid
443
  except Exception:
444
- return empty_img, empty_vid
445
-
446
-
447
- def _btn_label_for_status(status: str) -> str:
448
- return {
449
- "idle": "Submit",
450
- "busy": "Processing…",
451
- "done": "Submit",
452
- "error": "Retry",
453
- }.get(status or "idle", "Submit")
454
 
455
 
456
- # ----------------------------------------------------------------------
457
- # Build Gradio demo
458
- # ----------------------------------------------------------------------
459
  def create_demo():
460
- with gr.Blocks(title="Flux Multimodal (Pixtral / Voxtral)", css=css) as demo:
 
461
  with gr.Row():
462
  with gr.Column(scale=1):
463
- url_input = gr.Textbox(
464
- label="Image / Video URL or local path",
465
- placeholder="https://... or /path/to/file",
466
- lines=1,
467
- )
468
- custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
 
 
 
 
469
  with gr.Accordion("Mistral API Key (optional)", open=False):
470
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
471
- submit_btn = gr.Button("Submit")
 
472
  clear_btn = gr.Button("Clear")
473
- preview_image = gr.Image(
474
- label="Preview Image",
475
- type="pil",
476
- elem_classes="preview_media",
477
- visible=False,
478
- )
479
- preview_video = gr.Video(
480
- label="Preview Video",
481
- elem_classes="preview_media",
482
- visible=False,
483
- )
484
- with gr.Column(scale=2):
485
- final_md = gr.Markdown(value="")
486
 
487
- # Live preview
488
- url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
489
 
490
- # Clear button
491
- clear_btn.click(
492
- fn=lambda: (
493
- "", # clear textbox
494
- gr.update(value=None, visible=False), # hide image
495
- gr.update(value=None, visible=False), # hide video
496
- ),
497
- inputs=[],
498
- outputs=[url_input, preview_image, preview_video],
499
- )
500
 
501
- # State to track button status
502
- status = gr.State("idle")
 
 
503
 
504
- def start_busy() -> str:
505
- return "busy"
 
506
 
 
507
  def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
 
508
  return process_media(url or "", prompt or "", key or "", progress=progress)
509
 
510
- def finish(result: str) -> tuple[str, dict]:
 
 
511
  s = "done"
512
  if not result:
513
- md = "Error: no result returned."
514
  s = "error"
515
  elif isinstance(result, str) and result.lower().startswith("error"):
516
  md = f"**Error:** {result}"
517
  s = "error"
518
  else:
519
  md = result
 
 
520
 
521
- return s, md
522
-
523
- # Wire submit button: set status to busy, run worker, then finish
524
  submit_btn.click(
525
  fn=start_busy,
526
  inputs=[],
527
  outputs=[status, submit_btn],
528
  )
529
 
 
530
  submit_btn.click(
531
  fn=worker,
532
  inputs=[url_input, custom_prompt, api_key],
@@ -538,15 +152,32 @@ def create_demo():
538
  outputs=[status, final_md],
539
  )
540
 
541
- # Update button label whenever status changes
542
- def btn_label_for_state(s: str) -> str:
543
- return _btn_label_for_status(s)
544
 
545
  status.change(fn=btn_label_for_state, inputs=[status], outputs=[submit_btn])
546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  return demo
548
 
549
 
550
  if __name__ == "__main__":
551
  demo = create_demo()
552
- demo.launch(server_name="0.0.0.0", share=False)
 
1
  #!/usr/bin/env python3
2
+ # app.py - Gradio UI wrapper for your media analysis functions
 
 
3
 
4
  import os
 
 
 
 
 
5
  from io import BytesIO
6
+ from typing import Tuple
7
 
 
 
8
  import gradio as gr
9
+ from PIL import Image
10
 
11
+ # Import or copy your existing helpers here:
12
+ # - process_media(src, custom_prompt, api_key, progress=gr.Progress())
13
+ # - fetch_bytes, determine_media_type, ext_from_src, is_remote, safe_head, safe_get
14
+ # - IMAGE_EXTS, VIDEO_EXTS
15
+ # For example, if they are in analysis.py:
16
+ # from analysis import process_media, fetch_bytes, determine_media_type, ext_from_src, is_remote, safe_head, safe_get, IMAGE_EXTS, VIDEO_EXTS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # --- Begin: Minimal adapters if you want to inline small helpers (optional) ---
19
+ # If you already have those functions in another module, import them instead of duplicating.
20
+ # --- End adapters ---
21
 
22
+ css = """
23
+ .preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius: 6px; }
24
+ .top_preview { display:flex; gap:12px; align-items:flex-start; flex-direction:column; }
25
+ .pip_button { margin-top:6px; }
26
+ """
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def _btn_label_for_status(status: str) -> str:
29
+ return {
30
+ "idle": "Submit",
31
+ "busy": "Processing…",
32
+ "done": "Submit",
33
+ "error": "Retry",
34
+ }.get(status or "idle", "Submit")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
 
37
+ # preview loader returns image/video updates (use gr.update for component props)
38
  def load_preview(url: str):
 
39
  empty_img = gr.update(value=None, visible=False)
40
  empty_vid = gr.update(value=None, visible=False)
41
 
42
  if not url:
43
+ return empty_img, empty_vid, gr.update(visible=False)
44
 
45
+ # Local file
46
  if not is_remote(url) and os.path.exists(url):
47
  ext = ext_from_src(url)
48
  if ext in VIDEO_EXTS:
49
+ return empty_img, gr.update(value=os.path.abspath(url), visible=True), gr.update(visible=True)
50
  if ext in IMAGE_EXTS:
51
  try:
52
  img = Image.open(url)
53
  if getattr(img, "is_animated", False):
54
  img.seek(0)
55
+ return gr.update(value=img.convert("RGB"), visible=True), empty_vid, gr.update(visible=False)
56
  except Exception:
57
+ return empty_img, empty_vid, gr.update(visible=False)
58
 
59
+ # Remote: check headers for content-type
60
  head = safe_head(url)
61
  if head:
62
  ctype = (head.headers.get("content-type") or "").lower()
63
  if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
64
+ return empty_img, gr.update(value=url, visible=True), gr.update(visible=True)
65
 
66
+ # Try load as image
67
  try:
68
  r = safe_get(url, timeout=15)
69
  img = Image.open(BytesIO(r.content))
70
  if getattr(img, "is_animated", False):
71
  img.seek(0)
72
+ return gr.update(value=img.convert("RGB"), visible=True), empty_vid, gr.update(visible=False)
73
  except Exception:
74
+ return empty_img, empty_vid, gr.update(visible=False)
 
 
 
 
 
 
 
 
 
75
 
76
 
77
+ # Create the Blocks app
 
 
78
  def create_demo():
79
+ with gr.Blocks(css=css, title="Media Analysis") as demo:
80
+ # Top preview column
81
  with gr.Row():
82
  with gr.Column(scale=1):
83
+ preview_image = gr.Image(label="Preview Image", type="pil", visible=False, elem_classes="preview_media")
84
+ preview_video = gr.Video(label="Preview Video", visible=False, elem_classes="preview_media")
85
+ # PiP button - shown only when a video preview is visible
86
+ pip_button = gr.Button("Open Video in PiP", visible=False, elem_classes="pip_button")
87
+ with gr.Column(scale=1):
88
+ # Inputs and controls
89
+ url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
90
+ # Collapsible prompt (Accordion)
91
+ with gr.Accordion("Prompt (optional)", open=False):
92
+ custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
93
  with gr.Accordion("Mistral API Key (optional)", open=False):
94
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
95
+ # Submit immediately below preview (placed here visually)
96
+ submit_btn = gr.Button(_btn_label_for_status("idle"), variant="primary")
97
  clear_btn = gr.Button("Clear")
98
+ # Output area
99
+ final_md = gr.Markdown(value="", label="Result")
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ # State to track status
102
+ status = gr.State("idle")
103
 
104
+ # Live preview: update preview image/video and PiP visibility
105
+ url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, pip_button])
 
 
 
 
 
 
 
 
106
 
107
+ # Clear handler returns exactly the outputs listed
108
+ def do_clear():
109
+ return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="", visible=True)
110
+ clear_btn.click(fn=do_clear, inputs=[], outputs=[url_input, preview_image, preview_video, final_md])
111
 
112
+ # Start busy — MUST return outputs for both status (gr.State) and the Button (as gr.update)
113
+ def start_busy():
114
+ return "busy", gr.update(value=_btn_label_for_status("busy"), interactive=False)
115
 
116
+ # Worker runs your process_media (queued)
117
  def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
118
+ # process_media returns a single string result (or error string)
119
  return process_media(url or "", prompt or "", key or "", progress=progress)
120
 
121
+ # Finish: adapt result into status and Markdown update.
122
+ # Return (status_string, gr.update(value=md_text)).
123
+ def finish(result: str):
124
  s = "done"
125
  if not result:
126
+ md = "**Error:** no result returned."
127
  s = "error"
128
  elif isinstance(result, str) and result.lower().startswith("error"):
129
  md = f"**Error:** {result}"
130
  s = "error"
131
  else:
132
  md = result
133
+ # status as raw string (gr.State), final_md as gr.update
134
+ return s, gr.update(value=md)
135
 
136
+ # Wire submit button: first click sets busy state & disables button
 
 
137
  submit_btn.click(
138
  fn=start_busy,
139
  inputs=[],
140
  outputs=[status, submit_btn],
141
  )
142
 
143
+ # Then run worker (queued), produce final_md, then run finish to update status & final_md
144
  submit_btn.click(
145
  fn=worker,
146
  inputs=[url_input, custom_prompt, api_key],
 
152
  outputs=[status, final_md],
153
  )
154
 
155
+ # Status change -> update button label/interactivity
156
+ def btn_label_for_state(s: str):
157
+ return gr.update(value=_btn_label_for_status(s), interactive=(s != "busy"))
158
 
159
  status.change(fn=btn_label_for_state, inputs=[status], outputs=[submit_btn])
160
 
161
+ # PiP JS glue: open the current preview_video element in Picture-in-Picture.
162
+ pip_js = """
163
+ () => {
164
+ const vid = document.querySelector('#{} video, #{} video');
165
+ if (!vid) { return; }
166
+ // Request PiP
167
+ if (document.pictureInPictureElement) {
168
+ document.exitPictureInPicture();
169
+ } else {
170
+ vid.requestPictureInPicture().catch(e => console.warn('PiP failed', e));
171
+ }
172
+ }
173
+ """.format(preview_video.elem_id or "preview_video", preview_video.elem_id or "preview_video")
174
+
175
+ # Set PiP button to run client-side JS when clicked
176
+ pip_button.click(fn=None, _js=pip_js, inputs=[], outputs=[])
177
+
178
  return demo
179
 
180
 
181
  if __name__ == "__main__":
182
  demo = create_demo()
183
+ demo.launch(server_name="0.0.0.0", share=False)