Hug0endob commited on
Commit
0fef3e3
·
verified ·
1 Parent(s): 019adc8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -115
app.py CHANGED
@@ -1,7 +1,4 @@
1
  #!/usr/bin/env python3
2
- """
3
- Flux - multimodal visual analyzer (Mistral + Gradio)
4
- """
5
 
6
  import os
7
  import subprocess
@@ -31,17 +28,21 @@ SYSTEM_INSTRUCTION = (
31
  IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
32
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
33
 
 
34
  def get_client(key: str = None):
35
  api_key = (key or "").strip() or DEFAULT_KEY
36
  return Mistral(api_key=api_key)
37
 
 
38
  def is_remote(src: str) -> bool:
39
  return bool(src) and src.startswith(("http://", "https://"))
40
 
 
41
  def ext_from_src(src: str) -> str:
42
  _, ext = os.path.splitext((src or "").split("?")[0])
43
  return ext.lower()
44
 
 
45
  def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> bytes:
46
  if is_remote(src):
47
  with requests.get(src, timeout=timeout, stream=True) as r:
@@ -65,12 +66,9 @@ def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> byte
65
  with open(src, "rb") as f:
66
  return f.read()
67
 
 
68
  def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
69
- try:
70
- img = Image.open(BytesIO(media_bytes))
71
- except UnidentifiedImageError:
72
- raise
73
- # handle animated GIFs by taking first frame
74
  try:
75
  img.seek(0)
76
  except Exception:
@@ -84,9 +82,11 @@ def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
84
  img.save(buf, format="JPEG", quality=85)
85
  return buf.getvalue()
86
 
 
87
  def b64_jpeg(img_bytes: bytes) -> str:
88
  return base64.b64encode(img_bytes).decode("utf-8")
89
 
 
90
  def save_bytes_to_temp(b: bytes, suffix: str):
91
  fd, path = tempfile.mkstemp(suffix=suffix)
92
  os.close(fd)
@@ -94,38 +94,32 @@ def save_bytes_to_temp(b: bytes, suffix: str):
94
  f.write(b)
95
  return path
96
 
97
- def choose_model_for_src(src: str):
98
- ext = ext_from_src(src)
99
- if ext in VIDEO_EXTS:
100
- return DEFAULT_VIDEO_MODEL
101
- if ext in IMAGE_EXTS:
102
- return DEFAULT_IMAGE_MODEL
103
- return DEFAULT_VIDEO_MODEL if is_remote(src) else DEFAULT_IMAGE_MODEL
104
 
105
  def build_messages_for_image(prompt: str, b64_jpg: str):
106
- # Use a clear textual message with data URL; Mistral SDK supports structured image objects,
107
- # but this textual form is broadly compatible.
108
  content = (
109
  f"{prompt}\n\nImage (data URI follows):\n\ndata:image/jpeg;base64,{b64_jpg}\n\n"
110
- "Instruction: Analyze only visible, provided pixels. Do not assume unseen frames."
111
  )
112
  return [
113
  {"role": "system", "content": SYSTEM_INSTRUCTION},
114
  {"role": "user", "content": content},
115
  ]
116
 
 
117
  def build_messages_for_text(prompt: str, extra_text: str):
118
  return [
119
  {"role": "system", "content": SYSTEM_INSTRUCTION},
120
  {"role": "user", "content": f"{prompt}\n\n{extra_text}"},
121
  ]
122
 
 
123
  def extract_delta(chunk):
124
  if not chunk:
125
  return None
126
  data = getattr(chunk, "data", None) or getattr(chunk, "response", None) or getattr(chunk, "delta", None)
127
  if not data:
128
  return None
 
129
  try:
130
  content = data.choices[0].delta.content
131
  if content is None:
@@ -133,15 +127,6 @@ def extract_delta(chunk):
133
  return str(content)
134
  except Exception:
135
  pass
136
- try:
137
- c = data.choices[0].delta
138
- if isinstance(c, dict):
139
- txt = c.get("content") or c.get("text")
140
- if txt is None:
141
- return None
142
- return str(txt)
143
- except Exception:
144
- pass
145
  try:
146
  msg = data.choices[0].message
147
  if isinstance(msg, dict):
@@ -158,6 +143,105 @@ def extract_delta(chunk):
158
  except Exception:
159
  return None
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  def generate_final_text(src: str, custom_prompt: str, api_key: str):
162
  client = get_client(api_key)
163
  prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
@@ -212,7 +296,7 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
212
  except Exception as e:
213
  parts.append(f"[Model error: {e}]")
214
 
215
- # Image (or frame)
216
  if is_image:
217
  try:
218
  raw = fetch_bytes(src)
@@ -221,102 +305,74 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
221
  except Exception as e:
222
  return f"Error processing image: {e}"
223
  msgs = build_messages_for_image(prompt, b64)
224
- stream_and_collect(choose_model_for_src(src), msgs)
225
  return "".join(parts).strip()
226
 
227
- # Remote video: send URL and explicit instruction to not hallucinate unseen frames
228
  if is_remote(src):
229
- extra = (
230
- f"Remote video URL: {src}\n\n"
231
- "IMPORTANT: The model cannot access the video stream. Analyze only metadata, thumbnails, or "
232
- "user-provided transcript/description. Do not invent frames or events."
233
- )
234
- msgs = build_messages_for_text(prompt, extra)
235
- stream_and_collect(choose_model_for_src(src), msgs)
236
- return "".join(parts).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
- # Local video: attempt frame sampling with ffmpeg and send the clearest frame
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  tmp_media = None
240
  try:
241
  media_bytes = fetch_bytes(src)
242
  _, ext = os.path.splitext(src) if src else ("", ".mp4")
243
  ext = ext or ".mp4"
244
  tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
245
- ffmpeg = shutil.which("ffmpeg")
246
- if ffmpeg:
247
- # Try to probe duration and extract up to N frames evenly spaced
248
- sample_count = 5
249
- tmp_frames = []
250
- try:
251
- # get duration in seconds
252
- probe_cmd = [ffmpeg, "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", tmp_media]
253
- proc = subprocess.Popen(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
254
- out, err = proc.communicate(timeout=10)
255
- duration = None
256
- try:
257
- duration = float(out.strip().split(b"\n")[0]) if out else None
258
- except Exception:
259
- duration = None
260
- # choose timestamps
261
- timestamps = []
262
- if duration and duration > 0:
263
- for i in range(1, sample_count + 1):
264
- t = (duration * i) / (sample_count + 1)
265
- timestamps.append(t)
266
- else:
267
- # fallback fixed offsets
268
- timestamps = [0.5, 1.0, 2.0][:sample_count]
269
- # extract frames
270
- for i, t in enumerate(timestamps):
271
- fd, tmp_frame = tempfile.mkstemp(suffix=f"_{i}.jpg")
272
- os.close(fd)
273
- cmd = [
274
- ffmpeg, "-nostdin", "-y", "-i", tmp_media,
275
- "-ss", str(t),
276
- "-frames:v", "1",
277
- "-q:v", "2",
278
- tmp_frame
279
- ]
280
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
281
- try:
282
- out, err = proc.communicate(timeout=15)
283
- except subprocess.TimeoutExpired:
284
- try:
285
- proc.kill()
286
- except Exception:
287
- pass
288
- out, err = proc.communicate()
289
- if proc.returncode == 0 and os.path.exists(tmp_frame) and os.path.getsize(tmp_frame) > 0:
290
- tmp_frames.append(tmp_frame)
291
- else:
292
- try:
293
- if os.path.exists(tmp_frame):
294
- os.remove(tmp_frame)
295
- except Exception:
296
- pass
297
- # pick best frame by size (simple heuristic) or first
298
- chosen = None
299
- if tmp_frames:
300
- chosen = max(tmp_frames, key=lambda p: os.path.getsize(p) if os.path.exists(p) else 0)
301
- with open(chosen, "rb") as f:
302
- frame_bytes = f.read()
303
- try:
304
- jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
305
- b64 = b64_jpeg(jpg)
306
- msgs = build_messages_for_image(prompt, b64)
307
- stream_and_collect(choose_model_for_src(src), msgs)
308
- return "".join(parts).strip()
309
- finally:
310
- for fpath in tmp_frames:
311
- try:
312
- if os.path.exists(fpath):
313
- os.remove(fpath)
314
- except Exception:
315
- pass
316
- # no frames extracted
317
- except Exception:
318
- pass
319
- return "Unable to process the provided file. Provide a direct image/frame URL or a remote video URL."
320
  finally:
321
  try:
322
  if tmp_media and os.path.exists(tmp_media):
@@ -324,7 +380,8 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
324
  except Exception:
325
  pass
326
 
327
- # UI
 
328
  css = """
329
  .preview_column { min-width: 380px; }
330
  .preview_media img, .preview_media video { max-width: 100%; height: auto; }
@@ -345,6 +402,7 @@ def load_preview(url: str):
345
  except Exception:
346
  return None, None, "Preview failed"
347
 
 
348
  with gr.Blocks(title="Flux", css=css) as demo:
349
  with gr.Row():
350
  with gr.Column(scale=1, elem_classes="preview_column"):
 
1
  #!/usr/bin/env python3
 
 
 
2
 
3
  import os
4
  import subprocess
 
28
  IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
29
  VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
30
 
31
+
32
  def get_client(key: str = None):
33
  api_key = (key or "").strip() or DEFAULT_KEY
34
  return Mistral(api_key=api_key)
35
 
36
+
37
  def is_remote(src: str) -> bool:
38
  return bool(src) and src.startswith(("http://", "https://"))
39
 
40
+
41
  def ext_from_src(src: str) -> str:
42
  _, ext = os.path.splitext((src or "").split("?")[0])
43
  return ext.lower()
44
 
45
+
46
  def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> bytes:
47
  if is_remote(src):
48
  with requests.get(src, timeout=timeout, stream=True) as r:
 
66
  with open(src, "rb") as f:
67
  return f.read()
68
 
69
+
70
  def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
71
+ img = Image.open(BytesIO(media_bytes))
 
 
 
 
72
  try:
73
  img.seek(0)
74
  except Exception:
 
82
  img.save(buf, format="JPEG", quality=85)
83
  return buf.getvalue()
84
 
85
+
86
  def b64_jpeg(img_bytes: bytes) -> str:
87
  return base64.b64encode(img_bytes).decode("utf-8")
88
 
89
+
90
  def save_bytes_to_temp(b: bytes, suffix: str):
91
  fd, path = tempfile.mkstemp(suffix=suffix)
92
  os.close(fd)
 
94
  f.write(b)
95
  return path
96
 
 
 
 
 
 
 
 
97
 
98
  def build_messages_for_image(prompt: str, b64_jpg: str):
 
 
99
  content = (
100
  f"{prompt}\n\nImage (data URI follows):\n\ndata:image/jpeg;base64,{b64_jpg}\n\n"
101
+ "Instruction: Analyze only visible, provided pixels."
102
  )
103
  return [
104
  {"role": "system", "content": SYSTEM_INSTRUCTION},
105
  {"role": "user", "content": content},
106
  ]
107
 
108
+
109
  def build_messages_for_text(prompt: str, extra_text: str):
110
  return [
111
  {"role": "system", "content": SYSTEM_INSTRUCTION},
112
  {"role": "user", "content": f"{prompt}\n\n{extra_text}"},
113
  ]
114
 
115
+
116
  def extract_delta(chunk):
117
  if not chunk:
118
  return None
119
  data = getattr(chunk, "data", None) or getattr(chunk, "response", None) or getattr(chunk, "delta", None)
120
  if not data:
121
  return None
122
+ # try common shapes
123
  try:
124
  content = data.choices[0].delta.content
125
  if content is None:
 
127
  return str(content)
128
  except Exception:
129
  pass
 
 
 
 
 
 
 
 
 
130
  try:
131
  msg = data.choices[0].message
132
  if isinstance(msg, dict):
 
143
  except Exception:
144
  return None
145
 
146
+
147
+ def extract_best_frame_bytes(media_path: str, sample_count: int = 5, timeout_probe: int = 10, timeout_extract: int = 15):
148
+ ffmpeg = shutil.which("ffmpeg")
149
+ if not ffmpeg or not os.path.exists(media_path):
150
+ return None
151
+ tmp_frames = []
152
+ try:
153
+ probe_cmd = [ffmpeg, "-v", "error", "-show_entries", "format=duration",
154
+ "-of", "default=noprint_wrappers=1:nokey=1", media_path]
155
+ proc = subprocess.Popen(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
156
+ out, _ = proc.communicate(timeout=timeout_probe)
157
+ duration = None
158
+ try:
159
+ duration = float(out.strip().split(b"\n")[0]) if out else None
160
+ except Exception:
161
+ duration = None
162
+
163
+ if duration and duration > 0:
164
+ timestamps = [(duration * i) / (sample_count + 1) for i in range(1, sample_count + 1)]
165
+ else:
166
+ timestamps = [0.5, 1.0, 2.0][:sample_count]
167
+
168
+ for i, t in enumerate(timestamps):
169
+ fd, tmp_frame = tempfile.mkstemp(suffix=f"_{i}.jpg")
170
+ os.close(fd)
171
+ cmd = [
172
+ ffmpeg, "-nostdin", "-y", "-i", media_path,
173
+ "-ss", str(t),
174
+ "-frames:v", "1",
175
+ "-q:v", "2",
176
+ tmp_frame
177
+ ]
178
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
179
+ try:
180
+ proc.communicate(timeout=timeout_extract)
181
+ except subprocess.TimeoutExpired:
182
+ try:
183
+ proc.kill()
184
+ except Exception:
185
+ pass
186
+ proc.communicate()
187
+ if proc.returncode == 0 and os.path.exists(tmp_frame) and os.path.getsize(tmp_frame) > 0:
188
+ tmp_frames.append(tmp_frame)
189
+ else:
190
+ try:
191
+ if os.path.exists(tmp_frame):
192
+ os.remove(tmp_frame)
193
+ except Exception:
194
+ pass
195
+
196
+ if not tmp_frames:
197
+ return None
198
+
199
+ chosen = max(tmp_frames, key=lambda p: os.path.getsize(p) if os.path.exists(p) else 0)
200
+ with open(chosen, "rb") as f:
201
+ data = f.read()
202
+ return data
203
+ finally:
204
+ for fpath in tmp_frames:
205
+ try:
206
+ if os.path.exists(fpath):
207
+ os.remove(fpath)
208
+ except Exception:
209
+ pass
210
+
211
+
212
+ def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
213
+ fname = filename or os.path.basename(path)
214
+ # Try SDK upload
215
+ try:
216
+ with open(path, "rb") as fh:
217
+ res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
218
+ # try to extract id
219
+ fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
220
+ if not fid:
221
+ try:
222
+ fid = res["data"][0]["id"]
223
+ except Exception:
224
+ pass
225
+ if not fid:
226
+ raise RuntimeError(f"No file id returned: {res}")
227
+ return fid
228
+ except Exception:
229
+ # Fallback to HTTP upload
230
+ api_key = client.api_key if hasattr(client, "api_key") else os.getenv("MISTRAL_API_KEY", "")
231
+ url = "https://api.mistral.ai/v1/files"
232
+ headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
233
+ with open(path, "rb") as fh:
234
+ files = {"file": (fname, fh)}
235
+ data = {"purpose": purpose}
236
+ r = requests.post(url, headers=headers, files=files, data=data, timeout=120)
237
+ r.raise_for_status()
238
+ jr = r.json()
239
+ fid = jr.get("id") or jr.get("data", [{}])[0].get("id")
240
+ if not fid:
241
+ raise RuntimeError(f"Upload failed to return id: {jr}")
242
+ return fid
243
+
244
+
245
  def generate_final_text(src: str, custom_prompt: str, api_key: str):
246
  client = get_client(api_key)
247
  prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
 
296
  except Exception as e:
297
  parts.append(f"[Model error: {e}]")
298
 
299
+ # Image path: convert and send
300
  if is_image:
301
  try:
302
  raw = fetch_bytes(src)
 
305
  except Exception as e:
306
  return f"Error processing image: {e}"
307
  msgs = build_messages_for_image(prompt, b64)
308
+ stream_and_collect(DEFAULT_IMAGE_MODEL, msgs)
309
  return "".join(parts).strip()
310
 
311
+ # Remote video: download, upload to Mistral Files, reference file id in chat
312
  if is_remote(src):
313
+ try:
314
+ media_bytes = fetch_bytes(src, timeout=120)
315
+ except Exception as e:
316
+ return f"Error downloading remote media: {e}"
317
+ ext = ext_from_src(src) or ".mp4"
318
+ tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
319
+ try:
320
+ try:
321
+ file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src.split("?")[0]))
322
+ except Exception as e:
323
+ # If upload fails, fallback to sending a representative frame
324
+ frame_bytes = extract_best_frame_bytes(tmp_media)
325
+ if not frame_bytes:
326
+ return f"Error uploading to Mistral and no frame fallback available: {e}"
327
+ try:
328
+ jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
329
+ except UnidentifiedImageError:
330
+ jpg = frame_bytes
331
+ b64 = b64_jpeg(jpg)
332
+ msgs = build_messages_for_image(prompt, b64)
333
+ stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
334
+ return "".join(parts).strip()
335
 
336
+ extra = (
337
+ f"Remote video uploaded to Mistral Files with id: {file_id}\n\n"
338
+ "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
339
+ )
340
+ msgs = build_messages_for_text(prompt, extra)
341
+ stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
342
+ return "".join(parts).strip()
343
+ finally:
344
+ try:
345
+ if tmp_media and os.path.exists(tmp_media):
346
+ os.remove(tmp_media)
347
+ except Exception:
348
+ pass
349
+
350
+ # Local video: try upload to Mistral; otherwise fallback to frames
351
  tmp_media = None
352
  try:
353
  media_bytes = fetch_bytes(src)
354
  _, ext = os.path.splitext(src) if src else ("", ".mp4")
355
  ext = ext or ".mp4"
356
  tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
357
+ try:
358
+ file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src))
359
+ extra = (
360
+ f"Local video uploaded to Mistral Files with id: {file_id}\n\n"
361
+ "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
362
+ )
363
+ msgs = build_messages_for_text(prompt, extra)
364
+ stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
365
+ return "".join(parts).strip()
366
+ except Exception:
367
+ # fallback to extracting a best frame
368
+ frame_bytes = extract_best_frame_bytes(tmp_media)
369
+ if not frame_bytes:
370
+ return "Unable to process the provided file. Provide a direct image/frame URL or a remote video URL."
371
+ jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
372
+ b64 = b64_jpeg(jpg)
373
+ msgs = build_messages_for_image(prompt, b64)
374
+ stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
375
+ return "".join(parts).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  finally:
377
  try:
378
  if tmp_media and os.path.exists(tmp_media):
 
380
  except Exception:
381
  pass
382
 
383
+
384
+ # --- Minimal Gradio UI ---
385
  css = """
386
  .preview_column { min-width: 380px; }
387
  .preview_media img, .preview_media video { max-width: 100%; height: auto; }
 
402
  except Exception:
403
  return None, None, "Preview failed"
404
 
405
+
406
  with gr.Blocks(title="Flux", css=css) as demo:
407
  with gr.Row():
408
  with gr.Column(scale=1, elem_classes="preview_column"):