Hug0endob commited on
Commit
903c7b4
·
verified ·
1 Parent(s): 43ea394

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -68
app.py CHANGED
@@ -1,14 +1,18 @@
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
 
4
- import os, shutil, subprocess, tempfile, base64, json
 
 
 
 
 
5
  from io import BytesIO
6
  from typing import List, Tuple
7
  import requests
8
  from PIL import Image, ImageFile, UnidentifiedImageError
9
  import gradio as gr
10
 
11
- # ------------------- Backend (unchanged) -------------------
12
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
13
  PIXTRAL_MODEL = "pixtral-12b-2409"
14
  VIDEO_MODEL = "voxtral-mini-latest"
@@ -29,7 +33,6 @@ SYSTEM_INSTRUCTION = (
29
  ImageFile.LOAD_TRUNCATED_IMAGES = True
30
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
31
 
32
- # Optional import – fallback to a dummy client if the library is missing
33
  try:
34
  from mistralai import Mistral
35
  except Exception:
@@ -118,11 +121,7 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
118
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
119
  return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
120
 
121
- def extract_best_frames_bytes(
122
- media_path: str,
123
- sample_count: int = 5,
124
- timeout_extract: int = 15,
125
- ) -> List[bytes]:
126
  frames: List[bytes] = []
127
  if not FFMPEG_BIN or not os.path.exists(media_path):
128
  return frames
@@ -145,22 +144,15 @@ def extract_best_frames_bytes(
145
  tmp,
146
  ]
147
  try:
148
- subprocess.run(
149
- cmd,
150
- stdout=subprocess.DEVNULL,
151
- stderr=subprocess.DEVNULL,
152
- timeout=timeout_extract,
153
- )
154
  if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
155
  with open(tmp, "rb") as f:
156
  frames.append(f.read())
157
  except Exception:
158
  pass
159
  finally:
160
- try:
161
- os.remove(tmp)
162
- except Exception:
163
- pass
164
  return frames
165
 
166
  def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
@@ -170,37 +162,19 @@ def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
170
  else:
171
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
172
  url = "https://api.mistral.ai/v1/chat/completions"
173
- headers = (
174
- {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
175
- if api_key
176
- else {"Content-Type": "application/json"}
177
- )
178
- r = requests.post(
179
- url,
180
- json={"model": model, "messages": messages},
181
- headers=headers,
182
- timeout=timeout,
183
- )
184
  r.raise_for_status()
185
  res = r.json()
186
  choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
187
  if not choices:
188
  return str(res)
189
  first = choices[0]
190
- msg = (
191
- first.message
192
- if hasattr(first, "message")
193
- else (first.get("message") if isinstance(first, dict) else first)
194
- )
195
- content = (
196
- msg.get("content")
197
- if isinstance(msg, dict)
198
- else getattr(msg, "content", None)
199
- )
200
  return content.strip() if isinstance(content, str) else str(content)
201
  except Exception as e:
202
  return f"Error during model call: {e}"
203
-
204
  def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120) -> str:
205
  fname = filename or os.path.basename(path)
206
  try:
@@ -228,12 +202,10 @@ def determine_media_type(src: str) -> Tuple[bool, bool]:
228
  is_image = False
229
  is_video = False
230
  ext = ext_from_src(src)
231
-
232
  if ext in IMAGE_EXTS:
233
  is_image = True
234
  if ext in VIDEO_EXTS:
235
  is_video = True
236
-
237
  if is_remote(src):
238
  head = safe_head(src)
239
  if head:
@@ -249,13 +221,10 @@ def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
249
  data_url = b64_bytes(jpeg, mime="image/jpeg")
250
  messages = [
251
  {"role": "system", "content": SYSTEM_INSTRUCTION},
252
- {
253
- "role": "user",
254
- "content": [
255
- {"type": "text", "text": prompt},
256
- {"type": "image_url", "image_url": data_url},
257
- ],
258
- },
259
  ]
260
  return chat_complete(client, PIXTRAL_MODEL, messages)
261
 
@@ -288,21 +257,20 @@ def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
288
  )
289
  except Exception:
290
  continue
291
- content = [
292
- {"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}
293
- ] + image_entries
294
  messages = [
295
  {"role": "system", "content": SYSTEM_INSTRUCTION},
296
  {"role": "user", "content": content},
297
  ]
298
  return chat_complete(client, PIXTRAL_MODEL, messages)
299
 
300
- def process_media(src: str, custom_prompt: str, api_key: str, progress) -> str:
301
  client = get_client(api_key)
302
  prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
303
  if not src:
304
  return "No URL or path provided."
305
- progress(0.05, desc="Determining media type")
 
306
  is_image, is_video = determine_media_type(src)
307
 
308
  if is_image:
@@ -310,7 +278,8 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress) -> str:
310
  raw = fetch_bytes(src)
311
  except Exception as e:
312
  return f"Error fetching image: {e}"
313
- progress(0.2, desc="Analyzing image")
 
314
  try:
315
  return analyze_image_structured(client, raw, prompt)
316
  except UnidentifiedImageError:
@@ -325,23 +294,21 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress) -> str:
325
  return f"Error fetching video: {e}"
326
  tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
327
  try:
328
- progress(0.2, desc="Analyzing video")
 
329
  return analyze_video_cohesive(client, tmp_path, prompt)
330
  finally:
331
- try:
332
- os.remove(tmp_path)
333
- except Exception:
334
- pass
335
 
336
- # Fallback: treat as image
337
  try:
338
  raw = fetch_bytes(src)
339
- progress(0.2, desc="Treating as image")
 
340
  return analyze_image_structured(client, raw, prompt)
341
  except Exception as e:
342
  return f"Unable to determine media type or fetch file: {e}"
343
 
344
- # ------------------- Gradio UI (fixed) -------------------
345
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
346
 
347
  def _btn_label_for_status(status: str) -> str:
@@ -396,8 +363,7 @@ def create_demo():
396
  except Exception:
397
  return empty_img, empty_vid
398
 
399
- url_input.change(fn=load_preview, inputs=[url_input],
400
- outputs=[preview_image, preview_video])
401
 
402
  def clear_all():
403
  return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle"
@@ -407,11 +373,10 @@ def create_demo():
407
  return "busy"
408
  submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state])
409
 
410
- def worker(url: str, prompt: str, key: str, progress):
411
  return process_media(url or "", prompt or "", key or "", progress)
412
 
413
- submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key],
414
- outputs=[output_md], queue=True).then(
415
  fn=lambda res: ("error", "**Error:** no result returned.") if not res else
416
  ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
417
  inputs=[output_md],
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
 
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ import tempfile
8
+ import base64
9
+ import json
10
  from io import BytesIO
11
  from typing import List, Tuple
12
  import requests
13
  from PIL import Image, ImageFile, UnidentifiedImageError
14
  import gradio as gr
15
 
 
16
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
17
  PIXTRAL_MODEL = "pixtral-12b-2409"
18
  VIDEO_MODEL = "voxtral-mini-latest"
 
33
  ImageFile.LOAD_TRUNCATED_IMAGES = True
34
  Image.MAX_IMAGE_PIXELS = 10000 * 10000
35
 
 
36
  try:
37
  from mistralai import Mistral
38
  except Exception:
 
121
  def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
122
  return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
123
 
124
+ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
 
 
 
 
125
  frames: List[bytes] = []
126
  if not FFMPEG_BIN or not os.path.exists(media_path):
127
  return frames
 
144
  tmp,
145
  ]
146
  try:
147
+ subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
 
 
 
 
 
148
  if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
149
  with open(tmp, "rb") as f:
150
  frames.append(f.read())
151
  except Exception:
152
  pass
153
  finally:
154
+ try: os.remove(tmp)
155
+ except Exception: pass
 
 
156
  return frames
157
 
158
  def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
 
162
  else:
163
  api_key = getattr(client, "api_key", "") or DEFAULT_KEY
164
  url = "https://api.mistral.ai/v1/chat/completions"
165
+ headers = ({"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if api_key else {"Content-Type": "application/json"})
166
+ r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
 
 
 
 
 
 
 
 
 
167
  r.raise_for_status()
168
  res = r.json()
169
  choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
170
  if not choices:
171
  return str(res)
172
  first = choices[0]
173
+ msg = (first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first))
174
+ content = (msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None))
 
 
 
 
 
 
 
 
175
  return content.strip() if isinstance(content, str) else str(content)
176
  except Exception as e:
177
  return f"Error during model call: {e}"
 
178
  def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120) -> str:
179
  fname = filename or os.path.basename(path)
180
  try:
 
202
  is_image = False
203
  is_video = False
204
  ext = ext_from_src(src)
 
205
  if ext in IMAGE_EXTS:
206
  is_image = True
207
  if ext in VIDEO_EXTS:
208
  is_video = True
 
209
  if is_remote(src):
210
  head = safe_head(src)
211
  if head:
 
221
  data_url = b64_bytes(jpeg, mime="image/jpeg")
222
  messages = [
223
  {"role": "system", "content": SYSTEM_INSTRUCTION},
224
+ {"role": "user", "content": [
225
+ {"type": "text", "text": prompt},
226
+ {"type": "image_url", "image_url": data_url},
227
+ ]},
 
 
 
228
  ]
229
  return chat_complete(client, PIXTRAL_MODEL, messages)
230
 
 
257
  )
258
  except Exception:
259
  continue
260
+ content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
 
 
261
  messages = [
262
  {"role": "system", "content": SYSTEM_INSTRUCTION},
263
  {"role": "user", "content": content},
264
  ]
265
  return chat_complete(client, PIXTRAL_MODEL, messages)
266
 
267
+ def process_media(src: str, custom_prompt: str, api_key: str, progress=None) -> str:
268
  client = get_client(api_key)
269
  prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
270
  if not src:
271
  return "No URL or path provided."
272
+ if progress is not None:
273
+ progress(0.05, desc="Determining media type")
274
  is_image, is_video = determine_media_type(src)
275
 
276
  if is_image:
 
278
  raw = fetch_bytes(src)
279
  except Exception as e:
280
  return f"Error fetching image: {e}"
281
+ if progress is not None:
282
+ progress(0.2, desc="Analyzing image")
283
  try:
284
  return analyze_image_structured(client, raw, prompt)
285
  except UnidentifiedImageError:
 
294
  return f"Error fetching video: {e}"
295
  tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
296
  try:
297
+ if progress is not None:
298
+ progress(0.2, desc="Analyzing video")
299
  return analyze_video_cohesive(client, tmp_path, prompt)
300
  finally:
301
+ try: os.remove(tmp_path)
302
+ except Exception: pass
 
 
303
 
 
304
  try:
305
  raw = fetch_bytes(src)
306
+ if progress is not None:
307
+ progress(0.2, desc="Treating as image")
308
  return analyze_image_structured(client, raw, prompt)
309
  except Exception as e:
310
  return f"Unable to determine media type or fetch file: {e}"
311
 
 
312
  css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
313
 
314
  def _btn_label_for_status(status: str) -> str:
 
363
  except Exception:
364
  return empty_img, empty_vid
365
 
366
+ url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
 
367
 
368
  def clear_all():
369
  return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle"
 
373
  return "busy"
374
  submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state])
375
 
376
+ def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
377
  return process_media(url or "", prompt or "", key or "", progress)
378
 
379
+ submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
 
380
  fn=lambda res: ("error", "**Error:** no result returned.") if not res else
381
  ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
382
  inputs=[output_md],