Hug0endob commited on
Commit
1a185c8
·
verified ·
1 Parent(s): 1c90917

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -72
app.py CHANGED
@@ -6,7 +6,7 @@ import os, shutil, subprocess, tempfile, base64, json
6
  from io import BytesIO
7
  from typing import List, Tuple
8
  import requests
9
- from PIL import Image, ImageFile, UnidentifiedImageError, ImageSequence
10
  import gradio as gr
11
 
12
  # --- Config
@@ -87,6 +87,26 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
87
  else:
88
  with open(src, "rb") as f: return f.read()
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
91
  frames: List[bytes] = []
92
  if not FFMPEG_BIN or not os.path.exists(media_path): return frames
@@ -150,50 +170,15 @@ def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
150
  {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}]}]
151
  return chat_complete(client, PIXTRAL_MODEL, messages)
152
 
153
- def ffmpeg_make_browser_mp4(input_path: str, output_path: str, max_width: int = 1280, crf: int = 28, preset: str = "fast", timeout: int = 60) -> bool:
154
- """
155
- Re-encode to H.264/AAC and move moov atom to front for browser playback.
156
- Returns True on success.
157
- """
158
- if not FFMPEG_BIN:
159
- return False
160
- cmd = [
161
- FFMPEG_BIN, "-nostdin", "-y", "-i", input_path,
162
- "-vf", f"scale='min({max_width},iw)':-2",
163
- "-c:v", "libx264", "-crf", str(crf), "-preset", preset,
164
- "-c:a", "aac", "-b:a", "128k",
165
- "-movflags", "+faststart",
166
- output_path
167
- ]
168
- try:
169
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout, check=True)
170
- return os.path.exists(output_path) and os.path.getsize(output_path) > 0
171
- except Exception:
172
- try:
173
- if os.path.exists(output_path): os.remove(output_path)
174
- except Exception: pass
175
- return False
176
-
177
  def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
178
- # Try upload first (preferred). If upload fails, try to ensure browser-playable mp4 and fall back to frames.
179
  try:
180
  file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
181
  extra_msg = f"Uploaded video file id: {file_id}\n\nInstruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
182
  messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": extra_msg + "\n\n" + prompt}]
183
  return chat_complete(client, VIDEO_MODEL, messages)
184
  except Exception:
185
- pass
186
-
187
- tmp_fixed = None
188
- try:
189
- tmp_fd, tmp_fixed = tempfile.mkstemp(suffix=".mp4"); os.close(tmp_fd)
190
- ok = ffmpeg_make_browser_mp4(video_path, tmp_fixed, max_width=1280, crf=28, preset="fast", timeout=120)
191
- if ok:
192
- frames = extract_best_frames_bytes(tmp_fixed, sample_count=6)
193
- else:
194
- frames = extract_best_frames_bytes(video_path, sample_count=6)
195
- if not frames:
196
- return "Error: could not upload video and no frames could be extracted."
197
  image_entries = []
198
  for i, fb in enumerate(frames, start=1):
199
  try:
@@ -204,11 +189,6 @@ def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
204
  content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
205
  messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
206
  return chat_complete(client, PIXTRAL_MODEL, messages)
207
- finally:
208
- try:
209
- if tmp_fixed and os.path.exists(tmp_fixed): os.remove(tmp_fixed)
210
- except Exception:
211
- pass
212
 
213
  def determine_media_type(src: str) -> Tuple[bool, bool]:
214
  is_image = False; is_video = False
@@ -223,30 +203,6 @@ def determine_media_type(src: str) -> Tuple[bool, bool]:
223
  elif ctype.startswith("video/"): is_video, is_image = True, False
224
  return is_image, is_video
225
 
226
- def save_bytes_to_temp(data: bytes, suffix: str = ".dat") -> str:
227
- fd, path = tempfile.mkstemp(suffix=suffix)
228
- os.close(fd)
229
- with open(path, "wb") as fh:
230
- fh.write(data)
231
- return path
232
-
233
- def convert_to_jpeg_bytes(data: bytes, base_h: int = 1024) -> bytes:
234
- buf = BytesIO(data)
235
- img = Image.open(buf)
236
- if getattr(img, "is_animated", False):
237
- img = next(ImageSequence.Iterator(img))
238
- img = img.convert("RGB")
239
- w, h = img.size
240
- if h > base_h:
241
- new_w = int(w * (base_h / h))
242
- img = img.resize((new_w, base_h), Image.LANCZOS)
243
- out = BytesIO()
244
- img.save(out, format="JPEG", quality=90, optimize=True)
245
- return out.getvalue()
246
-
247
- def b64_bytes(data: bytes, mime: str = "image/jpeg") -> str:
248
- return "data:" + mime + ";base64," + base64.b64encode(data).decode("ascii")
249
-
250
  def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
251
  client = get_client(api_key)
252
  prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
@@ -284,7 +240,7 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progre
284
  except Exception as e:
285
  return f"Unable to determine media type or fetch file: {e}"
286
 
287
- # --- Gradio UI (modified: removed PiP, keep preview left, Submit+Clear on same row)
288
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
289
 
290
  def _btn_label_for_status(status: str) -> str:
@@ -296,15 +252,87 @@ def create_demo():
296
  with gr.Column(scale=1):
297
  preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
298
  preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
 
299
  with gr.Column(scale=2):
300
  url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
301
  with gr.Accordion("Prompt (optional)", open=False):
302
  custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
303
  with gr.Accordion("Mistral API Key (optional)", open=False):
304
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
305
- # Buttons on same row
306
- with gr.Row():
307
- submit_btn = gr.Button(_btn_label_for_status("idle"))
308
- clear_btn = gr.Button("Clear")
309
  output_md = gr.Markdown("")
310
  status_state = gr.State("idle")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from io import BytesIO
7
  from typing import List, Tuple
8
  import requests
9
+ from PIL import Image, ImageFile, UnidentifiedImageError
10
  import gradio as gr
11
 
12
  # --- Config
 
87
  else:
88
  with open(src, "rb") as f: return f.read()
89
 
90
+ def save_bytes_to_temp(b: bytes, suffix: str) -> str:
91
+ fd, path = tempfile.mkstemp(suffix=suffix); os.close(fd)
92
+ with open(path, "wb") as f: f.write(b)
93
+ return path
94
+
95
+ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
96
+ img = Image.open(BytesIO(img_bytes))
97
+ try:
98
+ if getattr(img, "is_animated", False): img.seek(0)
99
+ except Exception: pass
100
+ if img.mode != "RGB": img = img.convert("RGB")
101
+ h = base_h
102
+ w = max(1, int(img.width * (h / img.height)))
103
+ img = img.resize((w, h), Image.LANCZOS)
104
+ buf = BytesIO(); img.save(buf, format="JPEG", quality=85)
105
+ return buf.getvalue()
106
+
107
+ def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
108
+ return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
109
+
110
  def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
111
  frames: List[bytes] = []
112
  if not FFMPEG_BIN or not os.path.exists(media_path): return frames
 
170
  {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": data_url}]}]
171
  return chat_complete(client, PIXTRAL_MODEL, messages)
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
 
174
  try:
175
  file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path))
176
  extra_msg = f"Uploaded video file id: {file_id}\n\nInstruction: Analyze the entire video and produce a single cohesive narrative describing consistent observations."
177
  messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": extra_msg + "\n\n" + prompt}]
178
  return chat_complete(client, VIDEO_MODEL, messages)
179
  except Exception:
180
+ frames = extract_best_frames_bytes(video_path, sample_count=6)
181
+ if not frames: return "Error: could not upload video and no frames could be extracted."
 
 
 
 
 
 
 
 
 
 
182
  image_entries = []
183
  for i, fb in enumerate(frames, start=1):
184
  try:
 
189
  content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
190
  messages = [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": content}]
191
  return chat_complete(client, PIXTRAL_MODEL, messages)
 
 
 
 
 
192
 
193
  def determine_media_type(src: str) -> Tuple[bool, bool]:
194
  is_image = False; is_video = False
 
203
  elif ctype.startswith("video/"): is_video, is_image = True, False
204
  return is_image, is_video
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  def process_media(src: str, custom_prompt: str, api_key: str, progress=gr.Progress()) -> str:
207
  client = get_client(api_key)
208
  prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
 
240
  except Exception as e:
241
  return f"Unable to determine media type or fetch file: {e}"
242
 
243
+ # --- Gradio UI
244
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
245
 
246
  def _btn_label_for_status(status: str) -> str:
 
252
  with gr.Column(scale=1):
253
  preview_image = gr.Image(label="Preview Image", type="pil", elem_classes="preview_media", visible=False)
254
  preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False)
255
+ pip_button = gr.Button("Open Video in PiP", visible=False)
256
  with gr.Column(scale=2):
257
  url_input = gr.Textbox(label="Image / Video URL or local path", placeholder="https://... or /path/to/file", lines=1)
258
  with gr.Accordion("Prompt (optional)", open=False):
259
  custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
260
  with gr.Accordion("Mistral API Key (optional)", open=False):
261
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
262
+ submit_btn = gr.Button(_btn_label_for_status("idle"))
263
+ clear_btn = gr.Button("Clear")
 
 
264
  output_md = gr.Markdown("")
265
  status_state = gr.State("idle")
266
+
267
+ pip_html = gr.HTML("""<div id="pip-root" style="display:none"></div>
268
+ <script>
269
+ window.openPiP = (sel) => {
270
+ try {
271
+ const v = document.querySelector(sel);
272
+ if (!v) return "no-video";
273
+ if (v.requestPictureInPicture) { v.requestPictureInPicture(); return "opened"; }
274
+ return "unsupported";
275
+ } catch(e){ return "error:"+e; }
276
+ };
277
+ </script>""")
278
+
279
+ def load_preview(url: str):
280
+ empty_img = gr.update(value=None, visible=False)
281
+ empty_vid = gr.update(value=None, visible=False)
282
+ pip_vis = gr.update(visible=False)
283
+ if not url: return empty_img, empty_vid, pip_vis
284
+ if not is_remote(url) and os.path.exists(url):
285
+ ext = ext_from_src(url)
286
+ if ext in VIDEO_EXTS: return empty_img, gr.update(value=os.path.abspath(url), visible=True), gr.update(visible=True)
287
+ if ext in IMAGE_EXTS:
288
+ try:
289
+ img = Image.open(url)
290
+ if getattr(img, "is_animated", False): img.seek(0)
291
+ return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
292
+ except Exception: return empty_img, empty_vid, pip_vis
293
+ head = safe_head(url)
294
+ if head:
295
+ ctype = (head.headers.get("content-type") or "").lower()
296
+ if ctype.startswith("video/") or any(url.lower().endswith(ext) for ext in VIDEO_EXTS):
297
+ return empty_img, gr.update(value=url, visible=True), gr.update(visible=True)
298
+ try:
299
+ r = safe_get(url, timeout=15)
300
+ img = Image.open(BytesIO(r.content))
301
+ if getattr(img, "is_animated", False): img.seek(0)
302
+ return gr.update(value=img.convert("RGB"), visible=True), empty_vid, pip_vis
303
+ except Exception:
304
+ return empty_img, empty_vid, pip_vis
305
+
306
+ url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, pip_button])
307
+
308
+ def clear_all():
309
+ return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle", gr.update(value=_btn_label_for_status("idle"))
310
+ clear_btn.click(fn=clear_all, inputs=[], outputs=[url_input, preview_image, preview_video, status_state, submit_btn])
311
+
312
+ def pip_click(_):
313
+ js = "<script>setTimeout(()=>window.openPiP('video.preview_media'),50);</script>"
314
+ return gr.HTML.update(value=js)
315
+ pip_button.click(fn=pip_click, inputs=[url_input], outputs=[pip_html])
316
+
317
+ def start_busy():
318
+ s = "busy"
319
+ return s, gr.update(value=_btn_label_for_status(s))
320
+ submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state, submit_btn])
321
+
322
+ def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
323
+ return process_media(url or "", prompt or "", key or "", progress=progress)
324
+ submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
325
+ fn=lambda res: ("error", "**Error:** no result returned.") if not res else
326
+ ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
327
+ inputs=[output_md],
328
+ outputs=[status_state, output_md],
329
+ )
330
+
331
+ def btn_label_for_state(s: str):
332
+ return _btn_label_for_status(s)
333
+ status_state.change(fn=btn_label_for_state, inputs=[status_state], outputs=[submit_btn])
334
+
335
+ return demo
336
+
337
+ if __name__ == "__main__":
338
+ create_demo().launch()