Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,19 +13,63 @@ import time
|
|
| 13 |
import atexit
|
| 14 |
from requests.exceptions import RequestException
|
| 15 |
|
| 16 |
-
#
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
from mistralai
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
# --- Configuration and Globals ---
|
| 22 |
-
|
| 23 |
PIXTRAL_MODEL = "pixtral-12b-2409"
|
| 24 |
VIDEO_MODEL = "voxtral-mini-latest"
|
| 25 |
-
|
| 26 |
FFMPEG_BIN = shutil.which("ffmpeg")
|
| 27 |
-
|
| 28 |
-
|
| 29 |
|
| 30 |
SYSTEM_INSTRUCTION = (
|
| 31 |
"You are a clinical visual analyst. Only analyze media actually provided (image or video data). "
|
|
@@ -42,47 +86,55 @@ Image.MAX_IMAGE_PIXELS = 10000 * 10000
|
|
| 42 |
DEFAULT_HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
|
| 43 |
|
| 44 |
# --- Temporary File Cleanup ---
|
| 45 |
-
|
| 46 |
|
| 47 |
-
def
|
| 48 |
-
"""Removes all temporary files created
|
| 49 |
-
for f_path in list(
|
| 50 |
if os.path.exists(f_path):
|
| 51 |
try:
|
| 52 |
os.remove(f_path)
|
| 53 |
except Exception as e:
|
| 54 |
print(f"Error during final cleanup of {f_path}: {e}")
|
| 55 |
-
|
| 56 |
|
| 57 |
-
atexit.register(
|
| 58 |
|
| 59 |
# --- Mistral Client and API Helpers ---
|
| 60 |
-
def get_client(
|
| 61 |
"""
|
| 62 |
-
Returns a Mistral client instance. If the API key is missing
|
| 63 |
-
a MistralAPIException is raised.
|
| 64 |
"""
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
|
|
|
| 68 |
raise MistralAPIException(
|
| 69 |
"Mistral API key is not set. Please provide it in the UI or as MISTRAL_API_KEY environment variable.",
|
| 70 |
status_code=401 # Unauthorized
|
| 71 |
)
|
| 72 |
|
| 73 |
-
#
|
| 74 |
-
|
| 75 |
-
return
|
| 76 |
|
| 77 |
def is_remote(src: str) -> bool:
|
|
|
|
| 78 |
return bool(src) and src.startswith(("http://", "https://"))
|
| 79 |
|
| 80 |
def ext_from_src(src: str) -> str:
|
|
|
|
| 81 |
if not src: return ""
|
| 82 |
_, ext = os.path.splitext((src or "").split("?")[0])
|
| 83 |
return ext.lower()
|
| 84 |
|
| 85 |
def safe_head(url: str, timeout: int = 6):
|
|
|
|
| 86 |
try:
|
| 87 |
r = requests.head(url, timeout=timeout, allow_redirects=True, headers=DEFAULT_HEADERS)
|
| 88 |
return None if r.status_code >= 400 else r
|
|
@@ -90,11 +142,13 @@ def safe_head(url: str, timeout: int = 6):
|
|
| 90 |
return None
|
| 91 |
|
| 92 |
def safe_get(url: str, timeout: int = 15):
|
|
|
|
| 93 |
r = requests.get(url, timeout=timeout, headers=DEFAULT_HEADERS)
|
| 94 |
r.raise_for_status()
|
| 95 |
return r
|
| 96 |
|
| 97 |
def _temp_file(data: bytes, suffix: str) -> str:
|
|
|
|
| 98 |
if not data:
|
| 99 |
return ""
|
| 100 |
|
|
@@ -102,10 +156,11 @@ def _temp_file(data: bytes, suffix: str) -> str:
|
|
| 102 |
os.close(fd)
|
| 103 |
with open(path, "wb") as f:
|
| 104 |
f.write(data)
|
| 105 |
-
|
| 106 |
return path
|
| 107 |
|
| 108 |
-
def fetch_bytes(src: str, stream_threshold: int =
|
|
|
|
| 109 |
if progress is not None:
|
| 110 |
progress(0.05, desc="Checking remote/local source...")
|
| 111 |
if is_remote(src):
|
|
@@ -148,17 +203,18 @@ def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD, timeout: int
|
|
| 148 |
return data
|
| 149 |
|
| 150 |
def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
|
|
|
|
| 151 |
try:
|
| 152 |
img = Image.open(BytesIO(img_bytes))
|
| 153 |
except UnidentifiedImageError:
|
| 154 |
-
print(
|
| 155 |
return b""
|
| 156 |
except Exception as e:
|
| 157 |
print(f"Warning: Error opening image for JPEG conversion: {e}")
|
| 158 |
return b""
|
| 159 |
|
| 160 |
try:
|
| 161 |
-
if getattr(img, "is_animated", False):
|
| 162 |
img.seek(0)
|
| 163 |
except Exception:
|
| 164 |
pass
|
|
@@ -173,14 +229,16 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
|
|
| 173 |
return buf.getvalue()
|
| 174 |
|
| 175 |
def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
|
|
|
|
| 176 |
return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
|
| 177 |
|
| 178 |
def _ffprobe_streams(path: str) -> Optional[dict]:
|
|
|
|
| 179 |
if not FFMPEG_BIN:
|
| 180 |
return None
|
| 181 |
ffprobe_path = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
|
| 182 |
if not shutil.which(ffprobe_path):
|
| 183 |
-
ffprobe_path = "ffprobe"
|
| 184 |
if not shutil.which(ffprobe_path):
|
| 185 |
return None
|
| 186 |
|
|
@@ -194,6 +252,7 @@ def _ffprobe_streams(path: str) -> Optional[dict]:
|
|
| 194 |
return None
|
| 195 |
|
| 196 |
def _get_video_info_and_timestamps(media_path: str, sample_count: int) -> Tuple[Optional[dict], List[float]]:
|
|
|
|
| 197 |
info = _ffprobe_streams(media_path)
|
| 198 |
duration = 0.0
|
| 199 |
if info and "format" in info and "duration" in info["format"]:
|
|
@@ -209,12 +268,16 @@ def _get_video_info_and_timestamps(media_path: str, sample_count: int) -> Tuple[
|
|
| 209 |
step = duration / (actual_sample_count + 1)
|
| 210 |
timestamps = [step * (i + 1) for i in range(actual_sample_count)]
|
| 211 |
|
| 212 |
-
if not timestamps:
|
| 213 |
timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
|
| 214 |
|
| 215 |
return info, timestamps
|
| 216 |
|
| 217 |
def extract_frames_for_model_and_gallery(media_path: str, sample_count: int = 5, timeout_extract: int = 15, gallery_base_h: int = 1080, model_base_h: int = 1024, progress=None) -> Tuple[List[bytes], List[str]]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
frames_for_model: List[bytes] = []
|
| 219 |
frame_paths_for_gallery: List[str] = []
|
| 220 |
|
|
@@ -277,6 +340,7 @@ def extract_frames_for_model_and_gallery(media_path: str, sample_count: int = 5,
|
|
| 277 |
return frames_for_model, frame_paths_for_gallery
|
| 278 |
|
| 279 |
def chat_complete(client, model: str, messages, timeout: int = 120, progress=None) -> str:
|
|
|
|
| 280 |
max_retries = 5
|
| 281 |
initial_delay = 1.0
|
| 282 |
for attempt in range(max_retries):
|
|
@@ -294,8 +358,9 @@ def chat_complete(client, model: str, messages, timeout: int = 120, progress=Non
|
|
| 294 |
return f"Empty response from model: {res}"
|
| 295 |
|
| 296 |
first = choices[0]
|
| 297 |
-
|
| 298 |
-
|
|
|
|
| 299 |
return content.strip() if isinstance(content, str) else str(content)
|
| 300 |
|
| 301 |
except MistralAPIException as e:
|
|
@@ -318,6 +383,7 @@ def chat_complete(client, model: str, messages, timeout: int = 120, progress=Non
|
|
| 318 |
return "Error: Maximum retries reached for API call."
|
| 319 |
|
| 320 |
def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120, progress=None) -> str:
|
|
|
|
| 321 |
fname = filename or os.path.basename(path)
|
| 322 |
max_retries = 3
|
| 323 |
initial_delay = 1.0
|
|
@@ -355,13 +421,14 @@ def upload_file_to_mistral(client, path: str, filename: str | None = None, purpo
|
|
| 355 |
raise RuntimeError("File upload failed: Maximum retries reached.")
|
| 356 |
|
| 357 |
def determine_media_type(src: str, progress=None) -> Tuple[bool, bool]:
|
|
|
|
| 358 |
is_image = False
|
| 359 |
is_video = False
|
| 360 |
ext = ext_from_src(src)
|
| 361 |
|
| 362 |
-
if ext in
|
| 363 |
is_image = True
|
| 364 |
-
elif ext in
|
| 365 |
is_video = True
|
| 366 |
|
| 367 |
if is_remote(src):
|
|
@@ -378,6 +445,7 @@ def determine_media_type(src: str, progress=None) -> Tuple[bool, bool]:
|
|
| 378 |
return is_image, is_video
|
| 379 |
|
| 380 |
def analyze_image_structured(client, img_bytes: bytes, prompt: str, progress=None) -> str:
|
|
|
|
| 381 |
try:
|
| 382 |
if progress is not None:
|
| 383 |
progress(0.3, desc="Preparing image for analysis...")
|
|
@@ -399,11 +467,16 @@ def analyze_image_structured(client, img_bytes: bytes, prompt: str, progress=Non
|
|
| 399 |
return f"Error analyzing image: {e}"
|
| 400 |
|
| 401 |
def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None) -> Tuple[str, List[str]]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
gallery_frame_paths: List[str] = []
|
| 403 |
try:
|
| 404 |
if progress is not None:
|
| 405 |
progress(0.3, desc="Uploading video for full analysis...")
|
| 406 |
-
file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path), progress=progress)
|
| 407 |
|
| 408 |
messages = [
|
| 409 |
{"role": "system", "content": SYSTEM_INSTRUCTION},
|
|
@@ -413,20 +486,22 @@ def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None)
|
|
| 413 |
]
|
| 414 |
result = chat_complete(client, VIDEO_MODEL, messages, progress=progress)
|
| 415 |
|
|
|
|
| 416 |
_, gallery_frame_paths = extract_frames_for_model_and_gallery(
|
| 417 |
video_path, sample_count=6, gallery_base_h=1080, model_base_h=1024, progress=progress
|
| 418 |
)
|
| 419 |
return result, gallery_frame_paths
|
| 420 |
except Exception as e:
|
|
|
|
| 421 |
if progress is not None:
|
| 422 |
-
progress(0.35, desc=f"
|
| 423 |
|
| 424 |
frames_for_model_bytes, gallery_frame_paths = extract_frames_for_model_and_gallery(
|
| 425 |
video_path, sample_count=6, gallery_base_h=1080, model_base_h=1024, progress=progress
|
| 426 |
)
|
| 427 |
|
| 428 |
if not frames_for_model_bytes:
|
| 429 |
-
return f"Error: could not upload video and no frames could be extracted. ({e})", []
|
| 430 |
|
| 431 |
image_entries = []
|
| 432 |
for i, fb in enumerate(frames_for_model_bytes, start=1):
|
|
@@ -439,6 +514,7 @@ def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None)
|
|
| 439 |
"meta": {"frame_index": i},
|
| 440 |
}
|
| 441 |
)
|
|
|
|
| 442 |
content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
|
| 443 |
messages = [
|
| 444 |
{"role": "system", "content": SYSTEM_INSTRUCTION},
|
|
@@ -449,15 +525,20 @@ def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None)
|
|
| 449 |
|
| 450 |
# --- FFmpeg Helpers for Preview ---
|
| 451 |
def _convert_video_for_preview_if_needed(path: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
if not FFMPEG_BIN or not os.path.exists(path):
|
| 453 |
return path
|
| 454 |
|
|
|
|
| 455 |
if path.lower().endswith((".mp4", ".m4v")):
|
| 456 |
info = _ffprobe_streams(path)
|
| 457 |
if info:
|
| 458 |
video_streams = [s for s in info.get("streams", []) if s.get("codec_type") == "video"]
|
| 459 |
if video_streams and any(s.get("codec_name") in ("h264", "h265", "avc1") for s in video_streams):
|
| 460 |
-
return path
|
| 461 |
|
| 462 |
out_path = _temp_file(b"", suffix=".mp4")
|
| 463 |
if not out_path:
|
|
@@ -466,24 +547,29 @@ def _convert_video_for_preview_if_needed(path: str) -> str:
|
|
| 466 |
|
| 467 |
cmd = [
|
| 468 |
FFMPEG_BIN, "-y", "-i", path,
|
| 469 |
-
"-c:v", "libx264", "-preset", "veryfast", "-crf", "28",
|
| 470 |
-
"-c:a", "aac", "-b:a", "128k",
|
| 471 |
-
"-movflags", "+faststart", out_path,
|
| 472 |
-
"-map_metadata", "-1"
|
| 473 |
]
|
| 474 |
try:
|
| 475 |
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60)
|
| 476 |
return out_path
|
| 477 |
except Exception as e:
|
| 478 |
print(f"Error converting video for preview: {e}")
|
| 479 |
-
|
| 480 |
-
|
|
|
|
| 481 |
try: os.remove(out_path)
|
| 482 |
except Exception: pass
|
| 483 |
return path
|
| 484 |
|
| 485 |
# --- Preview Generation Logic ---
|
| 486 |
def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes, is_image_hint: bool, is_video_hint: bool) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
if not raw_bytes:
|
| 488 |
print(f"Error: No raw bytes provided for preview generation of {src_url}.")
|
| 489 |
return ""
|
|
@@ -493,14 +579,14 @@ def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes, is_image
|
|
| 493 |
Image.open(BytesIO(raw_bytes)).verify()
|
| 494 |
is_actually_image = True
|
| 495 |
except (UnidentifiedImageError, Exception):
|
| 496 |
-
pass
|
| 497 |
|
| 498 |
if is_actually_image:
|
| 499 |
jpeg_bytes = convert_to_jpeg_bytes(raw_bytes, base_h=1024)
|
| 500 |
if jpeg_bytes:
|
| 501 |
return _temp_file(jpeg_bytes, suffix=".jpg")
|
| 502 |
-
return "" # Failed image conversion
|
| 503 |
-
elif is_video_hint:
|
| 504 |
temp_raw_video_path = _temp_file(raw_bytes, suffix=ext_from_src(src_url) or ".mp4")
|
| 505 |
if not temp_raw_video_path:
|
| 506 |
print(f"Error: Failed to create temporary raw video file for {src_url}.")
|
|
@@ -508,7 +594,7 @@ def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes, is_image
|
|
| 508 |
|
| 509 |
playable_path = _convert_video_for_preview_if_needed(temp_raw_video_path)
|
| 510 |
return playable_path
|
| 511 |
-
elif is_image_hint: # Fallback
|
| 512 |
jpeg_bytes = convert_to_jpeg_bytes(raw_bytes, base_h=1024)
|
| 513 |
if jpeg_bytes:
|
| 514 |
return _temp_file(jpeg_bytes, suffix=".jpg")
|
|
@@ -519,30 +605,32 @@ def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes, is_image
|
|
| 519 |
|
| 520 |
|
| 521 |
# --- Gradio Interface Logic ---
|
| 522 |
-
|
| 523 |
|
| 524 |
-
def
|
|
|
|
| 525 |
return {"idle": "Submit", "busy": "Processing…", "done": "Done!", "error": "Retry"}.get(status, "Submit")
|
| 526 |
|
| 527 |
def create_demo():
|
| 528 |
-
|
|
|
|
| 529 |
with gr.Row():
|
| 530 |
with gr.Column(scale=1):
|
| 531 |
preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
|
| 532 |
preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False, format="mp4")
|
| 533 |
screenshot_gallery = gr.Gallery(label="Extracted Screenshots", columns=5, rows=1, height="auto", object_fit="contain", visible=False)
|
| 534 |
-
|
| 535 |
with gr.Column(scale=2):
|
| 536 |
url_input = gr.Textbox(label="Image / Video URL", placeholder="https://...", lines=1)
|
| 537 |
with gr.Accordion("Prompt (optional)", open=False):
|
| 538 |
custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
|
| 539 |
with gr.Accordion("Mistral API Key (optional)", open=False):
|
| 540 |
-
|
| 541 |
with gr.Row():
|
| 542 |
submit_btn = gr.Button("Submit")
|
| 543 |
clear_btn = gr.Button("Clear")
|
| 544 |
-
|
| 545 |
-
|
| 546 |
|
| 547 |
status_state = gr.State("idle")
|
| 548 |
main_preview_path_state = gr.State("") # Path to the playable preview file (image or video)
|
|
@@ -552,15 +640,14 @@ def create_demo():
|
|
| 552 |
def clear_all_ui_and_files_handler():
|
| 553 |
"""
|
| 554 |
Cleans up all tracked temporary files and resets all relevant UI components and states.
|
| 555 |
-
This function is meant to be called when the user explicitly clicks "Clear".
|
| 556 |
"""
|
| 557 |
-
for f_path in list(
|
| 558 |
if os.path.exists(f_path):
|
| 559 |
try:
|
| 560 |
os.remove(f_path)
|
| 561 |
except Exception as e:
|
| 562 |
print(f"Error during proactive cleanup of {f_path}: {e}")
|
| 563 |
-
|
| 564 |
|
| 565 |
return "", \
|
| 566 |
gr.update(value=None, visible=False), \
|
|
@@ -583,13 +670,14 @@ def create_demo():
|
|
| 583 |
preview_video,
|
| 584 |
screenshot_gallery,
|
| 585 |
status_state,
|
| 586 |
-
|
| 587 |
-
|
| 588 |
main_preview_path_state,
|
| 589 |
screenshot_paths_state,
|
| 590 |
-
|
| 591 |
raw_media_path_state
|
| 592 |
-
]
|
|
|
|
| 593 |
)
|
| 594 |
|
| 595 |
def load_main_preview_and_setup_for_analysis(
|
|
@@ -599,6 +687,10 @@ def create_demo():
|
|
| 599 |
current_screenshot_paths: List[str],
|
| 600 |
progress=gr.Progress()
|
| 601 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
# --- Proactive cleanup of old files related to previous load ---
|
| 603 |
files_to_clean_up_now = []
|
| 604 |
if current_main_preview_path and os.path.exists(current_main_preview_path):
|
|
@@ -610,8 +702,8 @@ def create_demo():
|
|
| 610 |
files_to_clean_up_now.append(path)
|
| 611 |
|
| 612 |
for f_path in files_to_clean_up_now:
|
| 613 |
-
if f_path in
|
| 614 |
-
|
| 615 |
try: os.remove(f_path)
|
| 616 |
except Exception as e: print(f"Error cleaning up old temp file {f_path}: {e}")
|
| 617 |
|
|
@@ -628,7 +720,7 @@ def create_demo():
|
|
| 628 |
return img_update_clear, video_update_clear, gallery_update_clear, \
|
| 629 |
preview_status_clear, main_path_clear, raw_media_path_clear, screenshot_paths_clear
|
| 630 |
|
| 631 |
-
temp_raw_path_for_analysis = ""
|
| 632 |
try:
|
| 633 |
progress(0.01, desc="Downloading media for preview and analysis...")
|
| 634 |
raw_bytes_for_analysis = fetch_bytes(url, timeout=60, progress=progress)
|
|
@@ -650,8 +742,8 @@ def create_demo():
|
|
| 650 |
|
| 651 |
if not local_playable_path:
|
| 652 |
# If preview failed, cleanup the temp_raw_path_for_analysis as well
|
| 653 |
-
if temp_raw_path_for_analysis in
|
| 654 |
-
|
| 655 |
try: os.remove(temp_raw_path_for_analysis)
|
| 656 |
except Exception as e: print(f"Error during cleanup of raw temp file {temp_raw_path_for_analysis}: {e}")
|
| 657 |
|
|
@@ -660,8 +752,8 @@ def create_demo():
|
|
| 660 |
main_path_clear, raw_media_path_clear, screenshot_paths_clear
|
| 661 |
|
| 662 |
ext = ext_from_src(local_playable_path)
|
| 663 |
-
is_img_preview = ext in
|
| 664 |
-
is_vid_preview = ext in
|
| 665 |
|
| 666 |
if is_img_preview:
|
| 667 |
return gr.update(value=local_playable_path, visible=True), gr.update(value=None, visible=False), \
|
|
@@ -673,13 +765,13 @@ def create_demo():
|
|
| 673 |
local_playable_path, temp_raw_path_for_analysis, screenshot_paths_clear
|
| 674 |
else:
|
| 675 |
# If local_playable_path exists but is not image/video, clean it up
|
| 676 |
-
if local_playable_path in
|
| 677 |
-
|
| 678 |
try: os.remove(local_playable_path)
|
| 679 |
except Exception as e: print(f"Error during cleanup of unplayable temp file {local_playable_path}: {e}")
|
| 680 |
# Also clean up raw_media_path if the playable path was not generated successfully
|
| 681 |
-
if temp_raw_path_for_analysis in
|
| 682 |
-
|
| 683 |
try: os.remove(temp_raw_path_for_analysis)
|
| 684 |
except Exception as e: print(f"Error during cleanup of raw temp file {temp_raw_path_for_analysis}: {e}")
|
| 685 |
|
|
@@ -690,22 +782,25 @@ def create_demo():
|
|
| 690 |
except Exception as e:
|
| 691 |
# If an error occurred during loading, clear all relevant paths.
|
| 692 |
if os.path.exists(temp_raw_path_for_analysis):
|
| 693 |
-
if temp_raw_path_for_analysis in
|
| 694 |
-
|
| 695 |
try: os.remove(temp_raw_path_for_analysis)
|
| 696 |
except Exception as ex: print(f"Error during cleanup of raw temp file {temp_raw_path_for_analysis} on error: {ex}")
|
| 697 |
|
| 698 |
return img_update_clear, video_update_clear, gallery_update_clear, \
|
| 699 |
-
gr.update(value=f"Preview load failed: {e}", visible=True), \
|
| 700 |
main_path_clear, raw_media_path_clear, screenshot_paths_clear
|
| 701 |
|
| 702 |
url_input.change(
|
| 703 |
fn=load_main_preview_and_setup_for_analysis,
|
| 704 |
inputs=[url_input, main_preview_path_state, raw_media_path_state, screenshot_paths_state],
|
| 705 |
-
outputs=[preview_image, preview_video, screenshot_gallery,
|
| 706 |
)
|
| 707 |
|
| 708 |
def worker(url: str, prompt: str, key: str, current_main_preview_path: str, raw_media_path: str, progress=gr.Progress()):
|
|
|
|
|
|
|
|
|
|
| 709 |
generated_screenshot_paths: List[str] = []
|
| 710 |
result_text = ""
|
| 711 |
|
|
@@ -724,16 +819,21 @@ def create_demo():
|
|
| 724 |
is_actually_image_for_analysis = False
|
| 725 |
is_actually_video_for_analysis = False
|
| 726 |
|
|
|
|
| 727 |
try:
|
| 728 |
Image.open(BytesIO(raw_bytes_for_analysis)).verify()
|
| 729 |
is_actually_image_for_analysis = True
|
| 730 |
except UnidentifiedImageError:
|
| 731 |
-
|
|
|
|
|
|
|
| 732 |
except Exception as e:
|
| 733 |
-
|
| 734 |
-
|
|
|
|
|
|
|
| 735 |
|
| 736 |
-
client = get_client(key)
|
| 737 |
|
| 738 |
if is_actually_video_for_analysis:
|
| 739 |
progress(0.25, desc="Running full-video analysis")
|
|
@@ -742,37 +842,42 @@ def create_demo():
|
|
| 742 |
progress(0.20, desc="Running image analysis")
|
| 743 |
result_text = analyze_image_structured(client, raw_bytes_for_analysis, prompt, progress=progress)
|
| 744 |
else:
|
| 745 |
-
return "error", "Error: Could not definitively determine media type for analysis after byte inspection.", current_main_preview_path, []
|
| 746 |
|
| 747 |
status = "done" if not (isinstance(result_text, str) and result_text.lower().startswith("error")) else "error"
|
| 748 |
return status, result_text, current_main_preview_path, generated_screenshot_paths
|
| 749 |
|
|
|
|
|
|
|
|
|
|
| 750 |
except Exception as exc:
|
| 751 |
-
return "error", f"Unexpected worker error: {exc}", current_main_preview_path, []
|
| 752 |
|
| 753 |
submit_btn.click(
|
| 754 |
fn=worker,
|
| 755 |
-
inputs=[url_input, custom_prompt,
|
| 756 |
-
outputs=[status_state,
|
| 757 |
show_progress="full",
|
| 758 |
-
show_progress_on=
|
| 759 |
)
|
| 760 |
|
| 761 |
-
status_state.change(fn=
|
| 762 |
|
| 763 |
-
def
|
|
|
|
| 764 |
return {"idle": "Idle", "busy": "Processing…", "done": "Completed", "error": "Error — see output"}.get(s, s)
|
| 765 |
-
status_state.change(fn=
|
| 766 |
|
| 767 |
def _update_preview_components(current_main_preview_path: str, current_screenshot_paths: List[str]):
|
|
|
|
| 768 |
img_update = gr.update(value=None, visible=False)
|
| 769 |
video_update = gr.update(value=None, visible=False)
|
| 770 |
|
| 771 |
if current_main_preview_path:
|
| 772 |
ext = ext_from_src(current_main_preview_path)
|
| 773 |
-
if ext in
|
| 774 |
img_update = gr.update(value=current_main_preview_path, visible=True)
|
| 775 |
-
elif ext in
|
| 776 |
video_update = gr.update(value=current_main_preview_path, visible=True)
|
| 777 |
else:
|
| 778 |
print(f"Warning: Unknown media type for main preview path: {current_main_preview_path}")
|
|
@@ -780,6 +885,7 @@ def create_demo():
|
|
| 780 |
gallery_update = gr.update(value=current_screenshot_paths, visible=bool(current_screenshot_paths))
|
| 781 |
return img_update, video_update, gallery_update
|
| 782 |
|
|
|
|
| 783 |
main_preview_path_state.change(
|
| 784 |
fn=_update_preview_components,
|
| 785 |
inputs=[main_preview_path_state, screenshot_paths_state],
|
|
|
|
| 13 |
import atexit
|
| 14 |
from requests.exceptions import RequestException
|
| 15 |
|
| 16 |
+
# --- Mistral Client Import & Placeholder for graceful degradation ---
|
| 17 |
+
_MISTRAL_CLIENT_INSTALLED = False
|
| 18 |
+
try:
|
| 19 |
+
from mistralai import Mistral
|
| 20 |
+
from mistralai.exceptions import MistralAPIException
|
| 21 |
+
_MISTRAL_CLIENT_INSTALLED = True
|
| 22 |
+
except ImportError:
|
| 23 |
+
print(
|
| 24 |
+
"Warning: Mistral AI client library ('mistralai') not found. "
|
| 25 |
+
"Please install it with 'pip install mistralai' to enable AI analysis features. "
|
| 26 |
+
"The application will launch, but API calls will fail."
|
| 27 |
+
)
|
| 28 |
+
# Define placeholder classes to prevent NameErrors and provide clear messages
|
| 29 |
+
class MistralAPIException(Exception):
|
| 30 |
+
"""A placeholder for mistralai.exceptions.MistralAPIException."""
|
| 31 |
+
def __init__(self, message: str, status_code: Optional[int] = None):
|
| 32 |
+
super().__init__(message)
|
| 33 |
+
self.message = message
|
| 34 |
+
self.status_code = status_code or 500
|
| 35 |
+
def __str__(self):
|
| 36 |
+
return f"MistralAPIException (Status: {self.status_code}): {self.message}"
|
| 37 |
+
|
| 38 |
+
class _DummyMistralChatClient:
|
| 39 |
+
"""Placeholder for Mistral client's chat interface."""
|
| 40 |
+
def complete(self, *args, **kwargs):
|
| 41 |
+
raise MistralAPIException(
|
| 42 |
+
"Mistral AI chat client is unavailable. "
|
| 43 |
+
"Please install 'mistralai' with 'pip install mistralai'.",
|
| 44 |
+
status_code=500
|
| 45 |
+
)
|
| 46 |
+
class _DummyMistralFilesClient:
|
| 47 |
+
"""Placeholder for Mistral client's files interface."""
|
| 48 |
+
def upload(self, *args, **kwargs):
|
| 49 |
+
raise MistralAPIException(
|
| 50 |
+
"Mistral AI files client is unavailable. "
|
| 51 |
+
"Please install 'mistralai' with 'pip install mistralai'.",
|
| 52 |
+
status_code=500
|
| 53 |
+
)
|
| 54 |
+
class Mistral:
|
| 55 |
+
"""A placeholder for the Mistral client if the library is not installed."""
|
| 56 |
+
def __init__(self, *args, **kwargs):
|
| 57 |
+
pass # Constructor doesn't need to raise here, methods will.
|
| 58 |
+
@property
|
| 59 |
+
def chat(self):
|
| 60 |
+
return _DummyMistralChatClient()
|
| 61 |
+
@property
|
| 62 |
+
def files(self):
|
| 63 |
+
return _DummyMistralFilesClient()
|
| 64 |
|
| 65 |
# --- Configuration and Globals ---
|
| 66 |
+
DEFAULT_MISTRAL_KEY = os.getenv("MISTRAL_API_KEY", "")
|
| 67 |
PIXTRAL_MODEL = "pixtral-12b-2409"
|
| 68 |
VIDEO_MODEL = "voxtral-mini-latest"
|
| 69 |
+
STREAM_THRESHOLD_BYTES = 20 * 1024 * 1024 # 20MB
|
| 70 |
FFMPEG_BIN = shutil.which("ffmpeg")
|
| 71 |
+
IMAGE_EXTENSIONS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
|
| 72 |
+
VIDEO_EXTENSIONS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
|
| 73 |
|
| 74 |
SYSTEM_INSTRUCTION = (
|
| 75 |
"You are a clinical visual analyst. Only analyze media actually provided (image or video data). "
|
|
|
|
| 86 |
DEFAULT_HEADERS = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
|
| 87 |
|
| 88 |
# --- Temporary File Cleanup ---
|
| 89 |
+
_temp_files_to_delete = []
|
| 90 |
|
| 91 |
+
def _cleanup_all_temp_files():
|
| 92 |
+
"""Removes all temporary files created upon application exit."""
|
| 93 |
+
for f_path in list(_temp_files_to_delete):
|
| 94 |
if os.path.exists(f_path):
|
| 95 |
try:
|
| 96 |
os.remove(f_path)
|
| 97 |
except Exception as e:
|
| 98 |
print(f"Error during final cleanup of {f_path}: {e}")
|
| 99 |
+
_temp_files_to_delete.clear()
|
| 100 |
|
| 101 |
+
atexit.register(_cleanup_all_temp_files)
|
| 102 |
|
| 103 |
# --- Mistral Client and API Helpers ---
|
| 104 |
+
def get_client(api_key: Optional[str] = None):
|
| 105 |
"""
|
| 106 |
+
Returns a Mistral client instance. If the API key is missing or the client library
|
| 107 |
+
is not installed, a MistralAPIException is raised.
|
| 108 |
"""
|
| 109 |
+
if not _MISTRAL_CLIENT_INSTALLED:
|
| 110 |
+
raise MistralAPIException(
|
| 111 |
+
"Mistral AI client library is not installed. Please install it with 'pip install mistralai'.",
|
| 112 |
+
status_code=500 # Internal Server Error, as it's a server-side dependency issue
|
| 113 |
+
)
|
| 114 |
|
| 115 |
+
key_to_use = (api_key or "").strip() or DEFAULT_MISTRAL_KEY
|
| 116 |
+
if not key_to_use:
|
| 117 |
raise MistralAPIException(
|
| 118 |
"Mistral API key is not set. Please provide it in the UI or as MISTRAL_API_KEY environment variable.",
|
| 119 |
status_code=401 # Unauthorized
|
| 120 |
)
|
| 121 |
|
| 122 |
+
# If _MISTRAL_CLIENT_INSTALLED is True, this will be the real Mistral client.
|
| 123 |
+
# Otherwise, it's the placeholder that will raise on method call.
|
| 124 |
+
return Mistral(api_key=key_to_use)
|
| 125 |
|
| 126 |
def is_remote(src: str) -> bool:
|
| 127 |
+
"""Checks if a source string is a remote URL."""
|
| 128 |
return bool(src) and src.startswith(("http://", "https://"))
|
| 129 |
|
| 130 |
def ext_from_src(src: str) -> str:
|
| 131 |
+
"""Extracts the file extension from a source string (path or URL)."""
|
| 132 |
if not src: return ""
|
| 133 |
_, ext = os.path.splitext((src or "").split("?")[0])
|
| 134 |
return ext.lower()
|
| 135 |
|
| 136 |
def safe_head(url: str, timeout: int = 6):
|
| 137 |
+
"""Performs a HEAD request safely, returning None on error or status >= 400."""
|
| 138 |
try:
|
| 139 |
r = requests.head(url, timeout=timeout, allow_redirects=True, headers=DEFAULT_HEADERS)
|
| 140 |
return None if r.status_code >= 400 else r
|
|
|
|
| 142 |
return None
|
| 143 |
|
| 144 |
def safe_get(url: str, timeout: int = 15):
|
| 145 |
+
"""Performs a GET request safely, raising for status errors."""
|
| 146 |
r = requests.get(url, timeout=timeout, headers=DEFAULT_HEADERS)
|
| 147 |
r.raise_for_status()
|
| 148 |
return r
|
| 149 |
|
| 150 |
def _temp_file(data: bytes, suffix: str) -> str:
|
| 151 |
+
"""Creates a temporary file with the given data and suffix, and registers it for cleanup."""
|
| 152 |
if not data:
|
| 153 |
return ""
|
| 154 |
|
|
|
|
| 156 |
os.close(fd)
|
| 157 |
with open(path, "wb") as f:
|
| 158 |
f.write(data)
|
| 159 |
+
_temp_files_to_delete.append(path)
|
| 160 |
return path
|
| 161 |
|
| 162 |
+
def fetch_bytes(src: str, stream_threshold: int = STREAM_THRESHOLD_BYTES, timeout: int = 60, progress=None) -> bytes:
|
| 163 |
+
"""Fetches content bytes from a local path or remote URL, with streaming for large files."""
|
| 164 |
if progress is not None:
|
| 165 |
progress(0.05, desc="Checking remote/local source...")
|
| 166 |
if is_remote(src):
|
|
|
|
| 203 |
return data
|
| 204 |
|
| 205 |
def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
|
| 206 |
+
"""Converts image bytes to JPEG, resizing to a target height while maintaining aspect ratio."""
|
| 207 |
try:
|
| 208 |
img = Image.open(BytesIO(img_bytes))
|
| 209 |
except UnidentifiedImageError:
|
| 210 |
+
print("Warning: convert_to_jpeg_bytes received unidentifiable image data.")
|
| 211 |
return b""
|
| 212 |
except Exception as e:
|
| 213 |
print(f"Warning: Error opening image for JPEG conversion: {e}")
|
| 214 |
return b""
|
| 215 |
|
| 216 |
try:
|
| 217 |
+
if getattr(img, "is_animated", False): # Handle animated images (e.g., GIFs) by taking the first frame
|
| 218 |
img.seek(0)
|
| 219 |
except Exception:
|
| 220 |
pass
|
|
|
|
| 229 |
return buf.getvalue()
|
| 230 |
|
| 231 |
def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
|
| 232 |
+
"""Encodes bytes to a Data URL string."""
|
| 233 |
return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
|
| 234 |
|
| 235 |
def _ffprobe_streams(path: str) -> Optional[dict]:
|
| 236 |
+
"""Uses ffprobe to get stream information for a media file."""
|
| 237 |
if not FFMPEG_BIN:
|
| 238 |
return None
|
| 239 |
ffprobe_path = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
|
| 240 |
if not shutil.which(ffprobe_path):
|
| 241 |
+
ffprobe_path = "ffprobe" # Try raw 'ffprobe' if 'ffmpeg' replacement isn't found
|
| 242 |
if not shutil.which(ffprobe_path):
|
| 243 |
return None
|
| 244 |
|
|
|
|
| 252 |
return None
|
| 253 |
|
| 254 |
def _get_video_info_and_timestamps(media_path: str, sample_count: int) -> Tuple[Optional[dict], List[float]]:
|
| 255 |
+
"""Extracts video info and generates timestamps for frame extraction."""
|
| 256 |
info = _ffprobe_streams(media_path)
|
| 257 |
duration = 0.0
|
| 258 |
if info and "format" in info and "duration" in info["format"]:
|
|
|
|
| 268 |
step = duration / (actual_sample_count + 1)
|
| 269 |
timestamps = [step * (i + 1) for i in range(actual_sample_count)]
|
| 270 |
|
| 271 |
+
if not timestamps: # Fallback for very short videos or if duration couldn't be determined
|
| 272 |
timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
|
| 273 |
|
| 274 |
return info, timestamps
|
| 275 |
|
| 276 |
def extract_frames_for_model_and_gallery(media_path: str, sample_count: int = 5, timeout_extract: int = 15, gallery_base_h: int = 1080, model_base_h: int = 1024, progress=None) -> Tuple[List[bytes], List[str]]:
|
| 277 |
+
"""
|
| 278 |
+
Extracts frames from a video for model input and a gallery display.
|
| 279 |
+
Returns: (list of JPEG bytes for model, list of paths to JPEG files for gallery)
|
| 280 |
+
"""
|
| 281 |
frames_for_model: List[bytes] = []
|
| 282 |
frame_paths_for_gallery: List[str] = []
|
| 283 |
|
|
|
|
| 340 |
return frames_for_model, frame_paths_for_gallery
|
| 341 |
|
| 342 |
def chat_complete(client, model: str, messages, timeout: int = 120, progress=None) -> str:
|
| 343 |
+
"""Sends messages to the Mistral chat completion API with retry logic."""
|
| 344 |
max_retries = 5
|
| 345 |
initial_delay = 1.0
|
| 346 |
for attempt in range(max_retries):
|
|
|
|
| 358 |
return f"Empty response from model: {res}"
|
| 359 |
|
| 360 |
first = choices[0]
|
| 361 |
+
# Handle both object-style and dict-style responses
|
| 362 |
+
msg = getattr(first, "message", None) or (first.get("message") if isinstance(first, dict) else first)
|
| 363 |
+
content = getattr(msg, "content", None) or (msg.get("content") if isinstance(msg, dict) else None)
|
| 364 |
return content.strip() if isinstance(content, str) else str(content)
|
| 365 |
|
| 366 |
except MistralAPIException as e:
|
|
|
|
| 383 |
return "Error: Maximum retries reached for API call."
|
| 384 |
|
| 385 |
def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120, progress=None) -> str:
|
| 386 |
+
"""Uploads a file to the Mistral API, returning its file ID."""
|
| 387 |
fname = filename or os.path.basename(path)
|
| 388 |
max_retries = 3
|
| 389 |
initial_delay = 1.0
|
|
|
|
| 421 |
raise RuntimeError("File upload failed: Maximum retries reached.")
|
| 422 |
|
| 423 |
def determine_media_type(src: str, progress=None) -> Tuple[bool, bool]:
|
| 424 |
+
"""Provides an initial hint about media type based on extension or content-type header."""
|
| 425 |
is_image = False
|
| 426 |
is_video = False
|
| 427 |
ext = ext_from_src(src)
|
| 428 |
|
| 429 |
+
if ext in IMAGE_EXTENSIONS:
|
| 430 |
is_image = True
|
| 431 |
+
elif ext in VIDEO_EXTENSIONS:
|
| 432 |
is_video = True
|
| 433 |
|
| 434 |
if is_remote(src):
|
|
|
|
| 445 |
return is_image, is_video
|
| 446 |
|
| 447 |
def analyze_image_structured(client, img_bytes: bytes, prompt: str, progress=None) -> str:
|
| 448 |
+
"""Analyzes an image using the PixTRAL model."""
|
| 449 |
try:
|
| 450 |
if progress is not None:
|
| 451 |
progress(0.3, desc="Preparing image for analysis...")
|
|
|
|
| 467 |
return f"Error analyzing image: {e}"
|
| 468 |
|
| 469 |
def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None) -> Tuple[str, List[str]]:
|
| 470 |
+
"""
|
| 471 |
+
Analyzes a video using the VoxTRAL model (if available) or by extracting frames
|
| 472 |
+
and using PixTRAL as a fallback.
|
| 473 |
+
Returns: (analysis result text, list of paths to gallery frames)
|
| 474 |
+
"""
|
| 475 |
gallery_frame_paths: List[str] = []
|
| 476 |
try:
|
| 477 |
if progress is not None:
|
| 478 |
progress(0.3, desc="Uploading video for full analysis...")
|
| 479 |
+
file_id = upload_file_to_mistral(client, video_path, filename=os.path.basename(video_path), purpose="batch", progress=progress)
|
| 480 |
|
| 481 |
messages = [
|
| 482 |
{"role": "system", "content": SYSTEM_INSTRUCTION},
|
|
|
|
| 486 |
]
|
| 487 |
result = chat_complete(client, VIDEO_MODEL, messages, progress=progress)
|
| 488 |
|
| 489 |
+
# Still extract frames for gallery even if full video upload was successful
|
| 490 |
_, gallery_frame_paths = extract_frames_for_model_and_gallery(
|
| 491 |
video_path, sample_count=6, gallery_base_h=1080, model_base_h=1024, progress=progress
|
| 492 |
)
|
| 493 |
return result, gallery_frame_paths
|
| 494 |
except Exception as e:
|
| 495 |
+
print(f"Warning: Video upload/full analysis failed ({e}). Extracting frames as fallback...")
|
| 496 |
if progress is not None:
|
| 497 |
+
progress(0.35, desc=f"Video upload failed ({type(e).__name__}). Extracting frames as fallback...")
|
| 498 |
|
| 499 |
frames_for_model_bytes, gallery_frame_paths = extract_frames_for_model_and_gallery(
|
| 500 |
video_path, sample_count=6, gallery_base_h=1080, model_base_h=1024, progress=progress
|
| 501 |
)
|
| 502 |
|
| 503 |
if not frames_for_model_bytes:
|
| 504 |
+
return f"Error: could not upload video and no frames could be extracted for fallback. ({e})", []
|
| 505 |
|
| 506 |
image_entries = []
|
| 507 |
for i, fb in enumerate(frames_for_model_bytes, start=1):
|
|
|
|
| 514 |
"meta": {"frame_index": i},
|
| 515 |
}
|
| 516 |
)
|
| 517 |
+
# Consolidate frames for a cohesive narrative, as per requirement
|
| 518 |
content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
|
| 519 |
messages = [
|
| 520 |
{"role": "system", "content": SYSTEM_INSTRUCTION},
|
|
|
|
| 525 |
|
| 526 |
# --- FFmpeg Helpers for Preview ---
|
| 527 |
def _convert_video_for_preview_if_needed(path: str) -> str:
|
| 528 |
+
"""
|
| 529 |
+
Converts a video to a web-friendly MP4 format if necessary for preview.
|
| 530 |
+
Returns the path to the converted video or the original path if no conversion needed/failed.
|
| 531 |
+
"""
|
| 532 |
if not FFMPEG_BIN or not os.path.exists(path):
|
| 533 |
return path
|
| 534 |
|
| 535 |
+
# Check if it's already a web-friendly MP4 (H.264/AVC1, H.265)
|
| 536 |
if path.lower().endswith((".mp4", ".m4v")):
|
| 537 |
info = _ffprobe_streams(path)
|
| 538 |
if info:
|
| 539 |
video_streams = [s for s in info.get("streams", []) if s.get("codec_type") == "video"]
|
| 540 |
if video_streams and any(s.get("codec_name") in ("h264", "h265", "avc1") for s in video_streams):
|
| 541 |
+
return path # Already compatible, no conversion needed
|
| 542 |
|
| 543 |
out_path = _temp_file(b"", suffix=".mp4")
|
| 544 |
if not out_path:
|
|
|
|
| 547 |
|
| 548 |
cmd = [
|
| 549 |
FFMPEG_BIN, "-y", "-i", path,
|
| 550 |
+
"-c:v", "libx264", "-preset", "veryfast", "-crf", "28", # H.264 codec
|
| 551 |
+
"-c:a", "aac", "-b:a", "128k", # AAC audio
|
| 552 |
+
"-movflags", "+faststart", out_path, # Optimize for web streaming
|
| 553 |
+
"-map_metadata", "-1" # Remove metadata
|
| 554 |
]
|
| 555 |
try:
|
| 556 |
subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=60)
|
| 557 |
return out_path
|
| 558 |
except Exception as e:
|
| 559 |
print(f"Error converting video for preview: {e}")
|
| 560 |
+
# If conversion fails, remove the failed temp file and return original path
|
| 561 |
+
if out_path in _temp_files_to_delete:
|
| 562 |
+
_temp_files_to_delete.remove(out_path)
|
| 563 |
try: os.remove(out_path)
|
| 564 |
except Exception: pass
|
| 565 |
return path
|
| 566 |
|
| 567 |
# --- Preview Generation Logic ---
|
| 568 |
def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes, is_image_hint: bool, is_video_hint: bool) -> str:
|
| 569 |
+
"""
|
| 570 |
+
Generates a playable preview file (JPEG for image, MP4 for video) from raw bytes.
|
| 571 |
+
Returns the path to the generated preview file.
|
| 572 |
+
"""
|
| 573 |
if not raw_bytes:
|
| 574 |
print(f"Error: No raw bytes provided for preview generation of {src_url}.")
|
| 575 |
return ""
|
|
|
|
| 579 |
Image.open(BytesIO(raw_bytes)).verify()
|
| 580 |
is_actually_image = True
|
| 581 |
except (UnidentifiedImageError, Exception):
|
| 582 |
+
pass # Not a verifiable image, proceed to video or fallback logic
|
| 583 |
|
| 584 |
if is_actually_image:
|
| 585 |
jpeg_bytes = convert_to_jpeg_bytes(raw_bytes, base_h=1024)
|
| 586 |
if jpeg_bytes:
|
| 587 |
return _temp_file(jpeg_bytes, suffix=".jpg")
|
| 588 |
+
return "" # Failed image conversion
|
| 589 |
+
elif is_video_hint: # If it's not an image, and was hinted as video
|
| 590 |
temp_raw_video_path = _temp_file(raw_bytes, suffix=ext_from_src(src_url) or ".mp4")
|
| 591 |
if not temp_raw_video_path:
|
| 592 |
print(f"Error: Failed to create temporary raw video file for {src_url}.")
|
|
|
|
| 594 |
|
| 595 |
playable_path = _convert_video_for_preview_if_needed(temp_raw_video_path)
|
| 596 |
return playable_path
|
| 597 |
+
elif is_image_hint: # Fallback: if hinted image but not verifiable, still try conversion
|
| 598 |
jpeg_bytes = convert_to_jpeg_bytes(raw_bytes, base_h=1024)
|
| 599 |
if jpeg_bytes:
|
| 600 |
return _temp_file(jpeg_bytes, suffix=".jpg")
|
|
|
|
| 605 |
|
| 606 |
|
| 607 |
# --- Gradio Interface Logic ---
|
| 608 |
+
GRADIO_CSS = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
|
| 609 |
|
| 610 |
+
def _get_button_label_for_status(status: str) -> str:
|
| 611 |
+
"""Returns the appropriate button label based on the processing status."""
|
| 612 |
return {"idle": "Submit", "busy": "Processing…", "done": "Done!", "error": "Retry"}.get(status, "Submit")
|
| 613 |
|
| 614 |
def create_demo():
|
| 615 |
+
"""Creates the Gradio interface for Flux Multimodal analysis."""
|
| 616 |
+
with gr.Blocks(title="Flux Multimodal", css=GRADIO_CSS) as demo:
|
| 617 |
with gr.Row():
|
| 618 |
with gr.Column(scale=1):
|
| 619 |
preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
|
| 620 |
preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False, format="mp4")
|
| 621 |
screenshot_gallery = gr.Gallery(label="Extracted Screenshots", columns=5, rows=1, height="auto", object_fit="contain", visible=False)
|
| 622 |
+
preview_status_text = gr.Textbox(label="Preview status", interactive=False, lines=1, value="", visible=True)
|
| 623 |
with gr.Column(scale=2):
|
| 624 |
url_input = gr.Textbox(label="Image / Video URL", placeholder="https://...", lines=1)
|
| 625 |
with gr.Accordion("Prompt (optional)", open=False):
|
| 626 |
custom_prompt = gr.Textbox(label="Prompt", lines=4, value="")
|
| 627 |
with gr.Accordion("Mistral API Key (optional)", open=False):
|
| 628 |
+
api_key_input = gr.Textbox(label="Mistral API Key", type="password", max_lines=1)
|
| 629 |
with gr.Row():
|
| 630 |
submit_btn = gr.Button("Submit")
|
| 631 |
clear_btn = gr.Button("Clear")
|
| 632 |
+
progress_markdown = gr.Markdown("Idle")
|
| 633 |
+
output_markdown = gr.Markdown("")
|
| 634 |
|
| 635 |
status_state = gr.State("idle")
|
| 636 |
main_preview_path_state = gr.State("") # Path to the playable preview file (image or video)
|
|
|
|
| 640 |
def clear_all_ui_and_files_handler():
|
| 641 |
"""
|
| 642 |
Cleans up all tracked temporary files and resets all relevant UI components and states.
|
|
|
|
| 643 |
"""
|
| 644 |
+
for f_path in list(_temp_files_to_delete):
|
| 645 |
if os.path.exists(f_path):
|
| 646 |
try:
|
| 647 |
os.remove(f_path)
|
| 648 |
except Exception as e:
|
| 649 |
print(f"Error during proactive cleanup of {f_path}: {e}")
|
| 650 |
+
_temp_files_to_delete.clear()
|
| 651 |
|
| 652 |
return "", \
|
| 653 |
gr.update(value=None, visible=False), \
|
|
|
|
| 670 |
preview_video,
|
| 671 |
screenshot_gallery,
|
| 672 |
status_state,
|
| 673 |
+
progress_markdown,
|
| 674 |
+
output_markdown,
|
| 675 |
main_preview_path_state,
|
| 676 |
screenshot_paths_state,
|
| 677 |
+
preview_status_text,
|
| 678 |
raw_media_path_state
|
| 679 |
+
],
|
| 680 |
+
queue=False
|
| 681 |
)
|
| 682 |
|
| 683 |
def load_main_preview_and_setup_for_analysis(
|
|
|
|
| 687 |
current_screenshot_paths: List[str],
|
| 688 |
progress=gr.Progress()
|
| 689 |
):
|
| 690 |
+
"""
|
| 691 |
+
Loads media from URL, generates a preview, and sets up temporary files for analysis.
|
| 692 |
+
Also handles cleanup of previously loaded media.
|
| 693 |
+
"""
|
| 694 |
# --- Proactive cleanup of old files related to previous load ---
|
| 695 |
files_to_clean_up_now = []
|
| 696 |
if current_main_preview_path and os.path.exists(current_main_preview_path):
|
|
|
|
| 702 |
files_to_clean_up_now.append(path)
|
| 703 |
|
| 704 |
for f_path in files_to_clean_up_now:
|
| 705 |
+
if f_path in _temp_files_to_delete:
|
| 706 |
+
_temp_files_to_delete.remove(f_path)
|
| 707 |
try: os.remove(f_path)
|
| 708 |
except Exception as e: print(f"Error cleaning up old temp file {f_path}: {e}")
|
| 709 |
|
|
|
|
| 720 |
return img_update_clear, video_update_clear, gallery_update_clear, \
|
| 721 |
preview_status_clear, main_path_clear, raw_media_path_clear, screenshot_paths_clear
|
| 722 |
|
| 723 |
+
temp_raw_path_for_analysis = ""
|
| 724 |
try:
|
| 725 |
progress(0.01, desc="Downloading media for preview and analysis...")
|
| 726 |
raw_bytes_for_analysis = fetch_bytes(url, timeout=60, progress=progress)
|
|
|
|
| 742 |
|
| 743 |
if not local_playable_path:
|
| 744 |
# If preview failed, cleanup the temp_raw_path_for_analysis as well
|
| 745 |
+
if temp_raw_path_for_analysis in _temp_files_to_delete:
|
| 746 |
+
_temp_files_to_delete.remove(temp_raw_path_for_analysis)
|
| 747 |
try: os.remove(temp_raw_path_for_analysis)
|
| 748 |
except Exception as e: print(f"Error during cleanup of raw temp file {temp_raw_path_for_analysis}: {e}")
|
| 749 |
|
|
|
|
| 752 |
main_path_clear, raw_media_path_clear, screenshot_paths_clear
|
| 753 |
|
| 754 |
ext = ext_from_src(local_playable_path)
|
| 755 |
+
is_img_preview = ext in IMAGE_EXTENSIONS
|
| 756 |
+
is_vid_preview = ext in VIDEO_EXTENSIONS
|
| 757 |
|
| 758 |
if is_img_preview:
|
| 759 |
return gr.update(value=local_playable_path, visible=True), gr.update(value=None, visible=False), \
|
|
|
|
| 765 |
local_playable_path, temp_raw_path_for_analysis, screenshot_paths_clear
|
| 766 |
else:
|
| 767 |
# If local_playable_path exists but is not image/video, clean it up
|
| 768 |
+
if local_playable_path in _temp_files_to_delete:
|
| 769 |
+
_temp_files_to_delete.remove(local_playable_path)
|
| 770 |
try: os.remove(local_playable_path)
|
| 771 |
except Exception as e: print(f"Error during cleanup of unplayable temp file {local_playable_path}: {e}")
|
| 772 |
# Also clean up raw_media_path if the playable path was not generated successfully
|
| 773 |
+
if temp_raw_path_for_analysis in _temp_files_to_delete:
|
| 774 |
+
_temp_files_to_delete.remove(temp_raw_path_for_analysis)
|
| 775 |
try: os.remove(temp_raw_path_for_analysis)
|
| 776 |
except Exception as e: print(f"Error during cleanup of raw temp file {temp_raw_path_for_analysis}: {e}")
|
| 777 |
|
|
|
|
| 782 |
except Exception as e:
|
| 783 |
# If an error occurred during loading, clear all relevant paths.
|
| 784 |
if os.path.exists(temp_raw_path_for_analysis):
|
| 785 |
+
if temp_raw_path_for_analysis in _temp_files_to_delete:
|
| 786 |
+
_temp_files_to_delete.remove(temp_raw_path_for_analysis)
|
| 787 |
try: os.remove(temp_raw_path_for_analysis)
|
| 788 |
except Exception as ex: print(f"Error during cleanup of raw temp file {temp_raw_path_for_analysis} on error: {ex}")
|
| 789 |
|
| 790 |
return img_update_clear, video_update_clear, gallery_update_clear, \
|
| 791 |
+
gr.update(value=f"Preview load failed: {type(e).__name__}: {e}", visible=True), \
|
| 792 |
main_path_clear, raw_media_path_clear, screenshot_paths_clear
|
| 793 |
|
| 794 |
url_input.change(
|
| 795 |
fn=load_main_preview_and_setup_for_analysis,
|
| 796 |
inputs=[url_input, main_preview_path_state, raw_media_path_state, screenshot_paths_state],
|
| 797 |
+
outputs=[preview_image, preview_video, screenshot_gallery, preview_status_text, main_preview_path_state, raw_media_path_state, screenshot_paths_state]
|
| 798 |
)
|
| 799 |
|
| 800 |
def worker(url: str, prompt: str, key: str, current_main_preview_path: str, raw_media_path: str, progress=gr.Progress()):
|
| 801 |
+
"""
|
| 802 |
+
The main worker function that performs media analysis using Mistral models.
|
| 803 |
+
"""
|
| 804 |
generated_screenshot_paths: List[str] = []
|
| 805 |
result_text = ""
|
| 806 |
|
|
|
|
| 819 |
is_actually_image_for_analysis = False
|
| 820 |
is_actually_video_for_analysis = False
|
| 821 |
|
| 822 |
+
# Determine media type for analysis robustly
|
| 823 |
try:
|
| 824 |
Image.open(BytesIO(raw_bytes_for_analysis)).verify()
|
| 825 |
is_actually_image_for_analysis = True
|
| 826 |
except UnidentifiedImageError:
|
| 827 |
+
# If PIL can't identify it as an image, check if it has a video extension.
|
| 828 |
+
if ext_from_src(raw_media_path) in VIDEO_EXTENSIONS:
|
| 829 |
+
is_actually_video_for_analysis = True
|
| 830 |
except Exception as e:
|
| 831 |
+
# Catch other PIL errors (e.g., truncated, memory, etc.).
|
| 832 |
+
print(f"Warning: PIL error during image verification for raw analysis media ({raw_media_path}): {e}. Checking for video extension.")
|
| 833 |
+
if ext_from_src(raw_media_path) in VIDEO_EXTENSIONS:
|
| 834 |
+
is_actually_video_for_analysis = True
|
| 835 |
|
| 836 |
+
client = get_client(key) # This will raise MistralAPIException if library not installed or key missing
|
| 837 |
|
| 838 |
if is_actually_video_for_analysis:
|
| 839 |
progress(0.25, desc="Running full-video analysis")
|
|
|
|
| 842 |
progress(0.20, desc="Running image analysis")
|
| 843 |
result_text = analyze_image_structured(client, raw_bytes_for_analysis, prompt, progress=progress)
|
| 844 |
else:
|
| 845 |
+
return "error", "Error: Could not definitively determine media type for analysis after byte inspection and extension check. Please check the URL.", current_main_preview_path, []
|
| 846 |
|
| 847 |
status = "done" if not (isinstance(result_text, str) and result_text.lower().startswith("error")) else "error"
|
| 848 |
return status, result_text, current_main_preview_path, generated_screenshot_paths
|
| 849 |
|
| 850 |
+
except MistralAPIException as e:
|
| 851 |
+
# Catch API key missing or client not installed errors from get_client or client method calls
|
| 852 |
+
return "error", f"**Mistral API Error:** {e.message}", current_main_preview_path, []
|
| 853 |
except Exception as exc:
|
| 854 |
+
return "error", f"**Unexpected worker error:** {type(exc).__name__}: {exc}", current_main_preview_path, []
|
| 855 |
|
| 856 |
submit_btn.click(
|
| 857 |
fn=worker,
|
| 858 |
+
inputs=[url_input, custom_prompt, api_key_input, main_preview_path_state, raw_media_path_state],
|
| 859 |
+
outputs=[status_state, output_markdown, main_preview_path_state, screenshot_paths_state],
|
| 860 |
show_progress="full",
|
| 861 |
+
show_progress_on=progress_markdown,
|
| 862 |
)
|
| 863 |
|
| 864 |
+
status_state.change(fn=_get_button_label_for_status, inputs=[status_state], outputs=[submit_btn], queue=False)
|
| 865 |
|
| 866 |
+
def _status_to_progress_text(s):
|
| 867 |
+
"""Converts internal status to user-friendly progress text."""
|
| 868 |
return {"idle": "Idle", "busy": "Processing…", "done": "Completed", "error": "Error — see output"}.get(s, s)
|
| 869 |
+
status_state.change(fn=_status_to_progress_text, inputs=[status_state], outputs=[progress_markdown], queue=False)
|
| 870 |
|
| 871 |
def _update_preview_components(current_main_preview_path: str, current_screenshot_paths: List[str]):
|
| 872 |
+
"""Updates the visibility and content of preview components (image, video, gallery)."""
|
| 873 |
img_update = gr.update(value=None, visible=False)
|
| 874 |
video_update = gr.update(value=None, visible=False)
|
| 875 |
|
| 876 |
if current_main_preview_path:
|
| 877 |
ext = ext_from_src(current_main_preview_path)
|
| 878 |
+
if ext in IMAGE_EXTENSIONS:
|
| 879 |
img_update = gr.update(value=current_main_preview_path, visible=True)
|
| 880 |
+
elif ext in VIDEO_EXTENSIONS:
|
| 881 |
video_update = gr.update(value=current_main_preview_path, visible=True)
|
| 882 |
else:
|
| 883 |
print(f"Warning: Unknown media type for main preview path: {current_main_preview_path}")
|
|
|
|
| 885 |
gallery_update = gr.update(value=current_screenshot_paths, visible=bool(current_screenshot_paths))
|
| 886 |
return img_update, video_update, gallery_update
|
| 887 |
|
| 888 |
+
# These change events use queue=False to ensure UI updates are immediate and don't block
|
| 889 |
main_preview_path_state.change(
|
| 890 |
fn=_update_preview_components,
|
| 891 |
inputs=[main_preview_path_state, screenshot_paths_state],
|