Hug0endob commited on
Commit
922dd1b
·
verified ·
1 Parent(s): f9926bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -61
app.py CHANGED
@@ -1,5 +1,4 @@
1
  #!/usr/bin/env python3
2
-
3
  import os
4
  import subprocess
5
  import tempfile
@@ -11,11 +10,10 @@ from PIL import Image, UnidentifiedImageError
11
  import gradio as gr
12
  from mistralai import Mistral
13
 
14
- # Config
15
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
16
  DEFAULT_IMAGE_MODEL = "pixtral-12b-2409"
17
  DEFAULT_VIDEO_MODEL = "voxtral-mini-latest"
18
- STREAM_THRESHOLD = 20 * 1024 * 1024 # 20 MB
19
 
20
  SYSTEM_INSTRUCTION = (
21
  "You are a clinical visual analyst. Only analyze media actually provided (image data or extracted frames). "
@@ -69,7 +67,6 @@ def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> byte
69
  def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
70
  img = Image.open(BytesIO(media_bytes))
71
  try:
72
- # For animated images, use first frame
73
  if getattr(img, "is_animated", False):
74
  img.seek(0)
75
  except Exception:
@@ -193,7 +190,6 @@ def extract_best_frame_bytes(media_path: str, sample_count: int = 5, timeout_pro
193
 
194
  def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
195
  fname = filename or os.path.basename(path)
196
- # Try SDK upload
197
  try:
198
  with open(path, "rb") as fh:
199
  res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
@@ -207,7 +203,6 @@ def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
207
  raise RuntimeError(f"No file id returned: {res}")
208
  return fid
209
  except Exception:
210
- # Fallback to HTTP upload
211
  api_key = client.api_key if hasattr(client, "api_key") else os.getenv("MISTRAL_API_KEY", "")
212
  url = "https://api.mistral.ai/v1/files"
213
  headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
@@ -224,38 +219,21 @@ def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
224
 
225
 
226
  def build_messages_for_image(prompt: str, b64_jpg: str = None, image_url: str = None):
227
- """
228
- Build messages using structured content per Mistral vision API:
229
- - For remote images: include an {"type":"image_url","image_url":...} item
230
- - For local bytes: include {"type":"image_base64","image_base64": "..."} (no data: URI prefix)
231
- The user content is a list of typed items.
232
- """
233
- user_content = []
234
- user_content.append({"type": "text", "text": prompt})
235
  if image_url:
236
  user_content.append({"type": "image_url", "image_url": image_url})
237
  elif b64_jpg:
238
  user_content.append({"type": "image_base64", "image_base64": b64_jpg})
239
  else:
240
  raise ValueError("Either image_url or b64_jpg required")
241
- return [
242
- {"role": "system", "content": SYSTEM_INSTRUCTION},
243
- {"role": "user", "content": user_content},
244
- ]
245
 
246
 
247
  def build_messages_for_text(prompt: str, extra_text: str):
248
- return [
249
- {"role": "system", "content": SYSTEM_INSTRUCTION},
250
- {"role": "user", "content": f"{prompt}\n\n{extra_text}"},
251
- ]
252
 
253
 
254
  def stream_and_collect(client, model, messages, parts: list):
255
- """
256
- Use client.chat.stream if available; otherwise use complete.
257
- Appends textual pieces to parts list.
258
- """
259
  try:
260
  stream_gen = None
261
  try:
@@ -304,16 +282,12 @@ def stream_and_collect(client, model, messages, parts: list):
304
 
305
 
306
  def generate_final_text(src: str, custom_prompt: str, api_key: str):
307
- """
308
- Main entry for Submit button. Returns final text (string).
309
- """
310
  client = get_client(api_key)
311
  prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
312
  ext = ext_from_src(src)
313
  is_image = ext in IMAGE_EXTS or (not is_remote(src) and os.path.isfile(src) and ext in IMAGE_EXTS)
314
  parts = []
315
 
316
- # Image handling: remote image_url or local image_base64
317
  if is_image:
318
  try:
319
  if is_remote(src):
@@ -321,16 +295,14 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
321
  else:
322
  raw = fetch_bytes(src)
323
  jpg = convert_to_jpeg_bytes(raw, base_h=480)
324
- b64 = b64_jpeg(jpg) # NOTE: this is plain base64 string (no data: prefix)
325
  msgs = build_messages_for_image(prompt, b64_jpg=b64)
326
  except Exception as e:
327
  return f"Error processing image: {e}"
328
  stream_and_collect(client, DEFAULT_IMAGE_MODEL, msgs, parts)
329
  return "".join(parts).strip()
330
 
331
- # Video handling (remote/local)
332
  if is_remote(src):
333
- # download remote media, try upload to Mistral Files; fallback to a representative frame
334
  try:
335
  media_bytes = fetch_bytes(src, timeout=120)
336
  except Exception as e:
@@ -341,7 +313,6 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
341
  try:
342
  file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src.split("?")[0]))
343
  except Exception as e:
344
- # fallback to sending representative frame
345
  frame_bytes = extract_best_frame_bytes(tmp_media)
346
  if not frame_bytes:
347
  return f"Error uploading to Mistral and no frame fallback available: {e}"
@@ -354,10 +325,7 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
354
  stream_and_collect(client, DEFAULT_VIDEO_MODEL, msgs, parts)
355
  return "".join(parts).strip()
356
 
357
- extra = (
358
- f"Remote video uploaded to Mistral Files with id: {file_id}\n\n"
359
- "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
360
- )
361
  msgs = build_messages_for_text(prompt, extra)
362
  stream_and_collect(client, DEFAULT_VIDEO_MODEL, msgs, parts)
363
  return "".join(parts).strip()
@@ -368,7 +336,6 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
368
  except Exception:
369
  pass
370
 
371
- # Local video: upload or fallback to frames
372
  tmp_media = None
373
  try:
374
  media_bytes = fetch_bytes(src)
@@ -377,10 +344,7 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
377
  tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
378
  try:
379
  file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src))
380
- extra = (
381
- f"Local video uploaded to Mistral Files with id: {file_id}\n\n"
382
- "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
383
- )
384
  msgs = build_messages_for_text(prompt, extra)
385
  stream_and_collect(client, DEFAULT_VIDEO_MODEL, msgs, parts)
386
  return "".join(parts).strip()
@@ -401,17 +365,10 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
401
  pass
402
 
403
 
404
- # --- Gradio UI ---
405
- css = """
406
- .preview_media img, .preview_media video { max-width: 100%; height: auto; }
407
- """
408
 
409
  def load_preview(url: str):
410
- """
411
- Returns: (image_or_None, video_or_None, mime_label)
412
- - For images: return PIL.Image, None, "Image"
413
- - For videos: return None, url, "Video"
414
- """
415
  if not url:
416
  return None, None, ""
417
  try:
@@ -436,25 +393,20 @@ def load_preview(url: str):
436
  with gr.Blocks(title="Flux", css=css) as demo:
437
  with gr.Row():
438
  with gr.Column(scale=1):
439
- # Top-left controls
440
  url_input = gr.Textbox(label="Image or Video URL", placeholder="https://...", lines=1)
441
  custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
442
  with gr.Accordion("Mistral API Key (optional)", open=False):
443
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
444
  submit = gr.Button("Submit")
445
-
446
- # Single preview area (either image or video shown)
447
  preview_image = gr.Image(label="Preview", type="pil", elem_classes="preview_media")
448
  preview_video = gr.Video(label="Preview", elem_classes="preview_media")
449
 
450
  with gr.Column(scale=2):
451
- # Right column: plain text output (rendered as Markdown/HTML allowed)
452
- final_text = gr.Markdown(value="") # use Markdown so long text renders nicely
453
 
454
- # Wire up events
455
  url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, gr.Textbox(visible=False)])
456
- # For submit, use queue to avoid blocking UI
457
- submit.click(fn=generate_final_text, inputs=[url_input, custom_prompt, api_key], outputs=[final_text], queue=True)
458
 
459
  if __name__ == "__main__":
460
- demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), enable_queue=True)
 
1
  #!/usr/bin/env python3
 
2
  import os
3
  import subprocess
4
  import tempfile
 
10
  import gradio as gr
11
  from mistralai import Mistral
12
 
 
13
  DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
14
  DEFAULT_IMAGE_MODEL = "pixtral-12b-2409"
15
  DEFAULT_VIDEO_MODEL = "voxtral-mini-latest"
16
+ STREAM_THRESHOLD = 20 * 1024 * 1024
17
 
18
  SYSTEM_INSTRUCTION = (
19
  "You are a clinical visual analyst. Only analyze media actually provided (image data or extracted frames). "
 
67
  def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
68
  img = Image.open(BytesIO(media_bytes))
69
  try:
 
70
  if getattr(img, "is_animated", False):
71
  img.seek(0)
72
  except Exception:
 
190
 
191
  def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
192
  fname = filename or os.path.basename(path)
 
193
  try:
194
  with open(path, "rb") as fh:
195
  res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
 
203
  raise RuntimeError(f"No file id returned: {res}")
204
  return fid
205
  except Exception:
 
206
  api_key = client.api_key if hasattr(client, "api_key") else os.getenv("MISTRAL_API_KEY", "")
207
  url = "https://api.mistral.ai/v1/files"
208
  headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
 
219
 
220
 
221
  def build_messages_for_image(prompt: str, b64_jpg: str = None, image_url: str = None):
222
+ user_content = [{"type": "text", "text": prompt}]
 
 
 
 
 
 
 
223
  if image_url:
224
  user_content.append({"type": "image_url", "image_url": image_url})
225
  elif b64_jpg:
226
  user_content.append({"type": "image_base64", "image_base64": b64_jpg})
227
  else:
228
  raise ValueError("Either image_url or b64_jpg required")
229
+ return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": user_content}]
 
 
 
230
 
231
 
232
  def build_messages_for_text(prompt: str, extra_text: str):
233
+ return [{"role": "system", "content": SYSTEM_INSTRUCTION}, {"role": "user", "content": f"{prompt}\n\n{extra_text}"}]
 
 
 
234
 
235
 
236
  def stream_and_collect(client, model, messages, parts: list):
 
 
 
 
237
  try:
238
  stream_gen = None
239
  try:
 
282
 
283
 
284
  def generate_final_text(src: str, custom_prompt: str, api_key: str):
 
 
 
285
  client = get_client(api_key)
286
  prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
287
  ext = ext_from_src(src)
288
  is_image = ext in IMAGE_EXTS or (not is_remote(src) and os.path.isfile(src) and ext in IMAGE_EXTS)
289
  parts = []
290
 
 
291
  if is_image:
292
  try:
293
  if is_remote(src):
 
295
  else:
296
  raw = fetch_bytes(src)
297
  jpg = convert_to_jpeg_bytes(raw, base_h=480)
298
+ b64 = b64_jpeg(jpg)
299
  msgs = build_messages_for_image(prompt, b64_jpg=b64)
300
  except Exception as e:
301
  return f"Error processing image: {e}"
302
  stream_and_collect(client, DEFAULT_IMAGE_MODEL, msgs, parts)
303
  return "".join(parts).strip()
304
 
 
305
  if is_remote(src):
 
306
  try:
307
  media_bytes = fetch_bytes(src, timeout=120)
308
  except Exception as e:
 
313
  try:
314
  file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src.split("?")[0]))
315
  except Exception as e:
 
316
  frame_bytes = extract_best_frame_bytes(tmp_media)
317
  if not frame_bytes:
318
  return f"Error uploading to Mistral and no frame fallback available: {e}"
 
325
  stream_and_collect(client, DEFAULT_VIDEO_MODEL, msgs, parts)
326
  return "".join(parts).strip()
327
 
328
+ extra = f"Remote video uploaded to Mistral Files with id: {file_id}\n\nInstruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
 
 
 
329
  msgs = build_messages_for_text(prompt, extra)
330
  stream_and_collect(client, DEFAULT_VIDEO_MODEL, msgs, parts)
331
  return "".join(parts).strip()
 
336
  except Exception:
337
  pass
338
 
 
339
  tmp_media = None
340
  try:
341
  media_bytes = fetch_bytes(src)
 
344
  tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
345
  try:
346
  file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src))
347
+ extra = f"Local video uploaded to Mistral Files with id: {file_id}\n\nInstruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
 
 
 
348
  msgs = build_messages_for_text(prompt, extra)
349
  stream_and_collect(client, DEFAULT_VIDEO_MODEL, msgs, parts)
350
  return "".join(parts).strip()
 
365
  pass
366
 
367
 
368
+ css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; }"
369
+
 
 
370
 
371
  def load_preview(url: str):
 
 
 
 
 
372
  if not url:
373
  return None, None, ""
374
  try:
 
393
  with gr.Blocks(title="Flux", css=css) as demo:
394
  with gr.Row():
395
  with gr.Column(scale=1):
 
396
  url_input = gr.Textbox(label="Image or Video URL", placeholder="https://...", lines=1)
397
  custom_prompt = gr.Textbox(label="Prompt (optional)", lines=2, value="")
398
  with gr.Accordion("Mistral API Key (optional)", open=False):
399
  api_key = gr.Textbox(label="API Key", type="password", max_lines=1)
400
  submit = gr.Button("Submit")
 
 
401
  preview_image = gr.Image(label="Preview", type="pil", elem_classes="preview_media")
402
  preview_video = gr.Video(label="Preview", elem_classes="preview_media")
403
 
404
  with gr.Column(scale=2):
405
+ final_text = gr.Markdown(value="")
 
406
 
 
407
  url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video, gr.Textbox(visible=False)])
408
+ submit.click(fn=generate_final_text, inputs=[url_input, custom_prompt, api_key], outputs=[final_text])
409
+ demo.queue()
410
 
411
  if __name__ == "__main__":
412
+ demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))