Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on 11 days ago

Commit

9c7140f

verified ·

1 Parent(s): 31baa74

Update app.py

Browse files

Files changed (1) hide show

app.py +258 -162

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ _temp_preview_files_to_delete = []
 def _cleanup_all_temp_preview_files():
     """Removes all temporary files created for previews upon application exit."""
-    for f_path in list(_temp_preview_files_to_delete): # Iterate over a copy
         if os.path.exists(f_path):
             try:
                 os.remove(f_path)
@@ -158,17 +158,54 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
 def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
     return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
 def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15, progress=None) -> List[bytes]:
     frames: List[bytes] = []
     if not FFMPEG_BIN or not os.path.exists(media_path):
         return frames
     if progress is not None:
-        progress(0.05, desc="Preparing frame extraction...")
-    timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
     for i, t in enumerate(timestamps):
-        fd, tmp = tempfile.mkstemp(suffix=f"_{i}.jpg")
         os.close(fd)
-        _temp_preview_files_to_delete.append(tmp) # Track for cleanup
         cmd = [
             FFMPEG_BIN,
             "-nostdin",
@@ -180,12 +217,12 @@ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_ex
             "-frames:v",
             "1",
             "-q:v",
-            "2",
             tmp,
         ]
         try:
             if progress is not None:
-                progress(0.1 + (i / max(1, sample_count)) * 0.2, desc=f"Extracting frame {i+1}/{sample_count}...")
             subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
                 with open(tmp, "rb") as f:
@@ -193,16 +230,70 @@ def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_ex
         except Exception:
             pass
         finally:
-            # frame is read into memory, temp file can be removed early if not already done by atexit
-            try:
-                if tmp in _temp_preview_files_to_delete:
-                    _temp_preview_files_to_delete.remove(tmp)
-                os.remove(tmp)
             except Exception: pass
     if progress is not None:
-        progress(0.45, desc=f"Extracted {len(frames)} frames")
     return frames
 def chat_complete(client, model: str, messages, timeout: int = 120, progress=None) -> str:
     try:
         if progress is not None:
@@ -302,7 +393,12 @@ def analyze_image_structured(client, img_bytes: bytes, prompt: str, progress=Non
     except Exception as e:
         return f"Error analyzing image: {e}"
-def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None) -> str:
     try:
         if progress is not None:
             progress(0.3, desc="Uploading video for full analysis...")
@@ -315,18 +411,28 @@ def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None)
             {"role": "system", "content": SYSTEM_INSTRUCTION},
             {"role": "user", "content": extra_msg + "\n\n" + prompt},
         ]
-        return chat_complete(client, VIDEO_MODEL, messages, progress=progress)
     except Exception as e:
         if progress is not None:
             progress(0.35, desc="Upload failed, extracting frames as fallback...")
-        frames = extract_best_frames_bytes(video_path, sample_count=6, progress=progress)
-        if not frames:
-            return f"Error: could not upload video and no frames could be extracted. ({e})"
         image_entries = []
-        for i, fb in enumerate(frames, start=1):
             try:
                 if progress is not None:
-                    progress(0.4 + (i / len(frames)) * 0.2, desc=f"Preparing frame {i}/{len(frames)}...")
                 j = convert_to_jpeg_bytes(fb, base_h=720)
                 image_entries.append(
                     {
@@ -342,25 +448,10 @@ def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None)
             {"role": "system", "content": SYSTEM_INSTRUCTION},
             {"role": "user", "content": content},
         ]
-        return chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
 # --- FFmpeg Helpers for Preview ---
-def _ffprobe_streams(path: str) -> Optional[dict]:
-    """Probes video codecs via ffprobe; returns dict with streams info or None on failure."""
-    if not FFMPEG_BIN:
-        return None
-    ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
-    if not shutil.which(ffprobe):
-        ffprobe = "ffprobe" # Try system PATH
-    cmd = [
-        ffprobe, "-v", "error", "-print_format", "json", "-show_streams", "-show_format", path
-    ]
-    try:
-        out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
-        return json.loads(out)
-    except Exception:
-        return None
 def _convert_video_for_preview_if_needed(path: str) -> str:
     """
     Returns a path that the Gradio video component can play.
@@ -371,7 +462,6 @@ def _convert_video_for_preview_if_needed(path: str) -> str:
     if not FFMPEG_BIN or not os.path.exists(path):
         return path # Cannot convert, return original
-    # Quick check for MP4 and common codecs
     if path.lower().endswith((".mp4", ".m4v", ".mov")):
         info = _ffprobe_streams(path)
         if info:
@@ -379,7 +469,6 @@ def _convert_video_for_preview_if_needed(path: str) -> str:
             if video_streams and any(s.get("codec_name") in ("h264", "h265", "avc1") for s in video_streams):
                 return path # Already playable
-    # Need conversion → write to a new temp MP4
     out_path = _temp_file(b"", suffix=".mp4") # Create an empty temp file and add to cleanup list
     cmd = [
         FFMPEG_BIN, "-y", "-i", path,
@@ -396,7 +485,7 @@ def _convert_video_for_preview_if_needed(path: str) -> str:
             _temp_preview_files_to_delete.remove(out_path)
         try: os.remove(out_path)
         except Exception: pass
-        return path # Gradio will show its own warning if not playable
 # --- Preview Generation Logic ---
 def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes) -> str:
@@ -407,22 +496,17 @@ def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes) -> str:
     is_img, is_vid = determine_media_type(src_url)
     if is_vid:
-        # Save raw video bytes to a temp file for potential conversion
         temp_raw_video_path = _temp_file(raw_bytes, suffix=ext_from_src(src_url) or ".mp4")
-        # Convert it for browser playback if necessary; this might return a new temp path or the original
         playable_path = _convert_video_for_preview_if_needed(temp_raw_video_path)
-        # If a new path was created by conversion, the original temp_raw_video_path is no longer needed
-        # and should be explicitly removed if it's no longer tracked or if it's tracked separately
-        if playable_path != temp_raw_video_path and os.path.exists(temp_raw_video_path):
-            if temp_raw_video_path in _temp_preview_files_to_delete:
-                _temp_preview_files_to_delete.remove(temp_raw_video_path)
             try: os.remove(temp_raw_video_path)
             except Exception: pass
         return playable_path
-    else: # Assume image or unknown treated as image for preview
-        # Convert image bytes to JPEG and save as temp file
         return _temp_file(convert_to_jpeg_bytes(raw_bytes, base_h=1024), suffix=".jpg")
 def _fetch_with_retries_bytes(src: str, timeout: int = 15, max_retries: int = 3):
@@ -432,25 +516,24 @@ def _fetch_with_retries_bytes(src: str, timeout: int = 15, max_retries: int = 3)
         attempt += 1
         try:
             if is_remote(src):
-                r = requests.get(src, timeout=timeout, stream=True)
-                if r.status_code == 200:
                     return r.content
-                if r.status_code == 429: # Rate limit
-                    ra = r.headers.get("Retry-After")
-                    try: delay = float(ra) if ra is not None else delay
-                    except Exception: pass
-                r.raise_for_status()
             else:
                 with open(src, "rb") as fh:
                     return fh.read()
-        except requests.exceptions.RequestException:
-            if attempt >= max_retries: raise
             time.sleep(delay)
             delay *= 2
         except FileNotFoundError:
             raise
-        except Exception:
-            if attempt >= max_retries: raise
             time.sleep(delay)
             delay *= 2
@@ -467,8 +550,8 @@ def _save_local_playable_preview(src: str) -> Optional[str]:
             is_img, is_vid = determine_media_type(src)
             if is_vid:
                 return _convert_video_for_preview_if_needed(src)
-            return src # Local image, return as is
-        return None # Local path does not exist
     # Remote source
     try:
@@ -492,6 +575,8 @@ def create_demo():
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False, format="mp4")
                 preview_status = gr.Textbox(label="Preview status", interactive=False, lines=1, value="", visible=True)
             with gr.Column(scale=2):
                 url_input = gr.Textbox(label="Image / Video URL", placeholder="https://...", lines=1)
@@ -505,77 +590,97 @@ def create_demo():
                 progress_md = gr.Markdown("Idle")
                 output_md = gr.Markdown("")
-                # State to track overall processing status (idle, busy, done, error)
                 status_state = gr.State("idle")
-                # State to hold the current path of the file being used for preview (whether from URL input or worker)
-                preview_path_state = gr.State("")
-        # Function to handle URL input change and update preview
-        def load_preview(url: str):
             """
-            Loads a preview for the given URL and updates the preview components.
-            Returns (image_update, video_update, status_message, new_preview_path_for_state).
             """
             if not url:
-                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=""), ""
             try:
-                local_playable_path = _save_local_playable_preview(url)
                 if not local_playable_path:
-                    return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="Preview load failed: could not fetch resource or make playable."), ""
-                # Determine if it's an image or video for display
                 is_img_preview = False
                 try:
                     Image.open(local_playable_path).verify()
                     is_img_preview = True
                 except Exception:
-                    pass # Not an image, treat as video
                 if is_img_preview:
-                    return gr.update(value=local_playable_path, visible=True), gr.update(value=None, visible=False), gr.update(value="Image preview loaded."), local_playable_path
-                else: # Assume video (Gradio will render if playable)
-                    return gr.update(value=None, visible=False), gr.update(value=local_playable_path, visible=True), gr.update(value="Video preview loaded."), local_playable_path
             except Exception as e:
-                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Preview load failed: {e}"), ""
-        # Bind load_preview to the URL input change event
         url_input.change(
-            fn=load_preview,
             inputs=[url_input],
-            outputs=[preview_image, preview_video, preview_status, preview_path_state]
         )
-        # Function to clear all inputs and outputs
-        def clear_all(current_preview_path: str):
-            """Clears all inputs/outputs and cleans up the currently displayed preview file."""
-            if current_preview_path and os.path.exists(current_preview_path) and current_preview_path in _temp_preview_files_to_delete:
-                try:
-                    os.remove(current_preview_path)
-                    _temp_preview_files_to_delete.remove(current_preview_path)
-                except Exception as e:
-                    print(f"Error cleaning up on clear: {e}")
-            return "", None, None, "idle", "Idle", "", "" # url_input, preview_image, preview_video, status_state, progress_md, output_md, preview_path_state
-        clear_btn.click(
-            fn=clear_all,
-            inputs=[preview_path_state], # Pass current preview path for cleanup
-            outputs=[url_input, preview_image, preview_video, status_state, progress_md, output_md, preview_path_state]
-        )
-        # Main worker function for analysis
         def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
             """
             Performs the media analysis.
-            Returns (status, markdown_output, new_preview_path_for_state).
             """
-            temp_media_file_for_analysis = None # Temporary file for analysis (video-only for voxtral)
-            generated_preview_path = "" # Path for the Gradio preview components
             try:
                 if not url:
-                    return "error", "**Error:** No URL provided.", ""
                 progress(0.01, desc="Starting media processing")
                 progress(0.02, desc="Checking URL / content‑type")
@@ -590,16 +695,13 @@ def create_demo():
                     progress(0.05, desc="Downloading video for analysis")
                     raw_bytes = fetch_bytes(url, timeout=120, progress=progress)
                     if not raw_bytes:
-                        return "error", "Failed to download video bytes.", ""
-                    # Create a temporary file for analysis (Mistral API needs a path for video upload)
                     temp_media_file_for_analysis = _temp_file(raw_bytes, suffix=ext_from_src(url) or ".mp4")
-                    progress(0.15, desc="Preparing video preview")
-                    generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
                     progress(0.25, desc="Running full‑video analysis")
-                    result = analyze_video_cohesive(client, temp_media_file_for_analysis, prompt, progress=progress)
                 # --- Image Processing Path ---
                 elif is_img:
@@ -607,17 +709,17 @@ def create_demo():
                     raw_bytes = fetch_bytes(url, progress=progress)
                     progress(0.15, desc="Preparing image preview")
-                    generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
                     progress(0.20, desc="Running image analysis")
-                    result = analyze_image_structured(client, raw_bytes, prompt, progress=progress)
                 # --- Unknown Media Type (Fallback) ---
                 else:
                     progress(0.07, desc="Downloading unknown media for type determination")
                     raw_bytes = fetch_bytes(url, timeout=120, progress=progress)
-                    # Try to open as image first
                     is_definitely_img = False
                     try:
                         Image.open(BytesIO(raw_bytes)).verify()
@@ -627,35 +729,34 @@ def create_demo():
                     if is_definitely_img:
                         progress(0.15, desc="Preparing image preview (fallback)")
-                        generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
                         progress(0.20, desc="Running image analysis (fallback)")
-                        result = analyze_image_structured(client, raw_bytes, prompt, progress=progress)
                     else: # Treat as video fallback
                         progress(0.15, desc="Preparing video preview (fallback)")
                         temp_media_file_for_analysis = _temp_file(raw_bytes, suffix=ext_from_src(url) or ".mp4")
-                        generated_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
                         progress(0.25, desc="Running video analysis (fallback)")
-                        result = analyze_video_cohesive(client, temp_media_file_for_analysis, prompt, progress=progress)
-                status = "done" if not (isinstance(result, str) and result.lower().startswith("error")) else "error"
-                return status, result if isinstance(result, str) else str(result), generated_preview_path
             except Exception as exc:
-                return "error", f"Unexpected worker error: {exc}", ""
             finally:
-                # Clean up the file used for analysis, if it was a temporary file
                 if temp_media_file_for_analysis and os.path.exists(temp_media_file_for_analysis):
                     if temp_media_file_for_analysis in _temp_preview_files_to_delete:
-                        _temp_preview_files_to_delete.remove(temp_media_file_for_analysis) # Remove from list if also added there
                     try: os.remove(temp_media_file_for_analysis)
                     except Exception as e: print(f"Error cleaning up analysis temp file {temp_media_file_for_analysis}: {e}")
-        # Bind worker function to submit button click
         submit_btn.click(
             fn=worker,
             inputs=[url_input, custom_prompt, api_key],
-            outputs=[status_state, output_md, preview_path_state], # Worker updates preview_path_state
             show_progress="full",
             show_progress_on=progress_md,
         )
@@ -668,47 +769,42 @@ def create_demo():
             return {"idle": "Idle", "busy": "Processing…", "done": "Completed", "error": "Error — see output"}.get(s, s)
         status_state.change(fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md])
-        # Function to react to changes in preview_path_state and update the UI
-        def apply_preview_change(new_path: str, old_path: str):
-            """
-            Handles updating the preview_image/preview_video components and cleaning up old files.
-            `old_path` is implicitly passed by Gradio for State components.
-            """
-            # Clean up the OLD preview file if it was a temporary file managed by us
-            if old_path and os.path.exists(old_path) and old_path in _temp_preview_files_to_delete:
                 try:
-                    os.remove(old_path)
-                    _temp_preview_files_to_delete.remove(old_path) # Remove from tracking list
                 except Exception as e:
-                    print(f"Error cleaning up old preview file {old_path}: {e}")
-            # If new_path is empty, clear both components and status
-            if not new_path:
-                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value="")
-            # Determine if new_path is an image or video and update components
-            try:
-                is_img_preview = False
-                try:
-                    Image.open(new_path).verify()
-                    is_img_preview = True
-                except Exception:
-                    pass # Not an image, treat as video
-                if is_img_preview:
-                    return gr.update(value=new_path, visible=True), gr.update(value=None, visible=False), gr.update(value="Preview updated.")
-                else: # Assume video (Gradio will render if playable)
-                    return gr.update(value=None, visible=False), gr.update(value=new_path, visible=True), gr.update(value="Preview updated.")
-            except Exception as e:
-                print(f"Error applying new preview from path {new_path}: {e}")
-                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), gr.update(value=f"Preview failed for path: {e}")
-        # Register the change event for preview_path_state
-        # Gradio will automatically pass the new value as the first argument and the old value as the second.
-        preview_path_state.change(
-            fn=apply_preview_change,
-            inputs=[preview_path_state], # `preview_path_state` will be `new_path`. `old_path` is passed implicitly.
-            outputs=[preview_image, preview_video, preview_status]
         )
     return demo

 def _cleanup_all_temp_preview_files():
     """Removes all temporary files created for previews upon application exit."""
+    for f_path in list(_temp_preview_files_to_delete): # Iterate over a copy to allow modification
         if os.path.exists(f_path):
             try:
                 os.remove(f_path)
 def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
     return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
+def _ffprobe_streams(path: str) -> Optional[dict]:
+    """Probes video codecs via ffprobe; returns dict with streams info or None on failure."""
+    if not FFMPEG_BIN:
+        return None
+    ffprobe = FFMPEG_BIN.replace("ffmpeg", "ffprobe") if "ffmpeg" in FFMPEG_BIN else "ffprobe"
+    if not shutil.which(ffprobe):
+        ffprobe = "ffprobe" # Try system PATH
+    cmd = [
+        ffprobe, "-v", "error", "-print_format", "json", "-show_streams", "-show_format", path
+    ]
+    try:
+        out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL)
+        return json.loads(out)
+    except Exception:
+        return None
 def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15, progress=None) -> List[bytes]:
+    """
+    Extracts frames as bytes for model input. These temp files are immediately deleted.
+    """
     frames: List[bytes] = []
     if not FFMPEG_BIN or not os.path.exists(media_path):
         return frames
     if progress is not None:
+        progress(0.05, desc="Preparing frame extraction for model...")
+    duration = 0.0
+    info = _ffprobe_streams(media_path)
+    if info and "format" in info and "duration" in info["format"]:
+        try:
+            duration = float(info["format"]["duration"])
+        except ValueError:
+            pass
+    timestamps: List[float] = []
+    if duration > 0 and sample_count > 0:
+        # Sample evenly across the video
+        step = duration / (sample_count + 1)
+        timestamps = [step * (i + 1) for i in range(sample_count)]
+    else:
+        # Fallback to fixed timestamps
+        timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count]
     for i, t in enumerate(timestamps):
+        fd, tmp = tempfile.mkstemp(suffix=f"_{i}_model.jpg")
         os.close(fd)
+        # This temp file is for immediate read and deletion, not persistent tracking
         cmd = [
             FFMPEG_BIN,
             "-nostdin",
             "-frames:v",
             "1",
             "-q:v",
+            "2", # High quality JPEG
             tmp,
         ]
         try:
             if progress is not None:
+                progress(0.1 + (i / max(1, sample_count)) * 0.2, desc=f"Extracting frame {i+1}/{sample_count} for model...")
             subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
                 with open(tmp, "rb") as f:
         except Exception:
             pass
         finally:
+            try: os.remove(tmp)
             except Exception: pass
     if progress is not None:
+        progress(0.45, desc=f"Extracted {len(frames)} frames for model")
     return frames
+def extract_and_save_frames_for_gallery(media_path: str, sample_count: int = 5, timeout_extract: int = 15, base_h: int = 128, progress=None) -> List[str]:
+    """
+    Extracts frames from a video, converts them to small JPEGs, saves them as temp files
+    (tracked for cleanup), and returns a list of paths to these temporary files for gallery display.
+    """
+    frame_paths: List[str] = []
+    if not FFMPEG_BIN or not os.path.exists(media_path):
+        return frame_paths
+    duration = 0.0
+    info = _ffprobe_streams(media_path)
+    if info and "format" in info and "duration" in info["format"]:
+        try:
+            duration = float(info["format"]["duration"])
+        except ValueError:
+            pass
+    timestamps: List[float] = []
+    if duration > 0 and sample_count > 0:
+        step = duration / (sample_count + 1)
+        timestamps = [step * (i + 1) for i in range(sample_count)]
+    else:
+        timestamps = [0.5, 1.0, 2.0, 3.0, 4.0][:sample_count] # Fallback to fixed times
+    for i, t in enumerate(timestamps):
+        if progress is not None:
+            progress(0.1 + (i / max(1, sample_count)) * 0.2, desc=f"Extracting frame {i+1}/{sample_count} for gallery...")
+        # Extract to a temp PNG first for best quality, then process with PIL
+        fd_raw, tmp_png_path = tempfile.mkstemp(suffix=".png")
+        os.close(fd_raw)
+        # Command to extract frame to PNG
+        cmd_extract = [
+            FFMPEG_BIN, "-nostdin", "-y", "-ss", str(t), "-i", media_path,
+            "-frames:v", "1", "-pix_fmt", "rgb24", tmp_png_path,
+        ]
+        try:
+            subprocess.run(cmd_extract, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
+            if os.path.exists(tmp_png_path) and os.path.getsize(tmp_png_path) > 0:
+                # Convert extracted PNG to a smaller JPEG and save as new temp file
+                jpeg_bytes = convert_to_jpeg_bytes(open(tmp_png_path, "rb").read(), base_h=base_h)
+                temp_jpeg_path = _temp_file(jpeg_bytes, suffix=f"_gallery_{i}.jpg") # _temp_file tracks this for cleanup
+                frame_paths.append(temp_jpeg_path)
+        except Exception as e:
+            print(f"Error processing frame {i+1} for gallery: {e}")
+        finally:
+            if os.path.exists(tmp_png_path):
+                try: os.remove(tmp_png_path)
+                except Exception: pass
+    if progress is not None:
+        progress(0.45, desc=f"Extracted {len(frame_paths)} frames for gallery")
+    return frame_paths
 def chat_complete(client, model: str, messages, timeout: int = 120, progress=None) -> str:
     try:
         if progress is not None:
     except Exception as e:
         return f"Error analyzing image: {e}"
+def analyze_video_cohesive(client, video_path: str, prompt: str, progress=None) -> Tuple[str, List[str]]:
+    """
+    Analyzes video, either by uploading or by extracting frames.
+    Returns analysis result (str) and a list of paths to gallery frames (List[str]).
+    """
+    gallery_frame_paths: List[str] = []
     try:
         if progress is not None:
             progress(0.3, desc="Uploading video for full analysis...")
             {"role": "system", "content": SYSTEM_INSTRUCTION},
             {"role": "user", "content": extra_msg + "\n\n" + prompt},
         ]
+        result = chat_complete(client, VIDEO_MODEL, messages, progress=progress)
+        # If successful upload, still extract frames for gallery display
+        gallery_frame_paths = extract_and_save_frames_for_gallery(video_path, sample_count=6, base_h=128, progress=progress)
+        return result, gallery_frame_paths
     except Exception as e:
         if progress is not None:
             progress(0.35, desc="Upload failed, extracting frames as fallback...")
+        # Extract frames for model input (bytes)
+        frames_for_model_bytes = extract_best_frames_bytes(video_path, sample_count=6, progress=progress)
+        # Extract and save frames for gallery display (paths)
+        gallery_frame_paths = extract_and_save_frames_for_gallery(video_path, sample_count=6, base_h=128, progress=progress)
+        if not frames_for_model_bytes:
+            return f"Error: could not upload video and no frames could be extracted. ({e})", []
         image_entries = []
+        for i, fb in enumerate(frames_for_model_bytes, start=1):
             try:
                 if progress is not None:
+                    progress(0.4 + (i / len(frames_for_model_bytes)) * 0.2, desc=f"Preparing frame {i}/{len(frames_for_model_bytes)} for model...")
                 j = convert_to_jpeg_bytes(fb, base_h=720)
                 image_entries.append(
                     {
             {"role": "system", "content": SYSTEM_INSTRUCTION},
             {"role": "user", "content": content},
         ]
+        result = chat_complete(client, PIXTRAL_MODEL, messages, progress=progress)
+        return result, gallery_frame_paths
 # --- FFmpeg Helpers for Preview ---
 def _convert_video_for_preview_if_needed(path: str) -> str:
     """
     Returns a path that the Gradio video component can play.
     if not FFMPEG_BIN or not os.path.exists(path):
         return path # Cannot convert, return original
     if path.lower().endswith((".mp4", ".m4v", ".mov")):
         info = _ffprobe_streams(path)
         if info:
             if video_streams and any(s.get("codec_name") in ("h264", "h265", "avc1") for s in video_streams):
                 return path # Already playable
     out_path = _temp_file(b"", suffix=".mp4") # Create an empty temp file and add to cleanup list
     cmd = [
         FFMPEG_BIN, "-y", "-i", path,
             _temp_preview_files_to_delete.remove(out_path)
         try: os.remove(out_path)
         except Exception: pass
+        return path
 # --- Preview Generation Logic ---
 def _get_playable_preview_path_from_raw(src_url: str, raw_bytes: bytes) -> str:
     is_img, is_vid = determine_media_type(src_url)
     if is_vid:
         temp_raw_video_path = _temp_file(raw_bytes, suffix=ext_from_src(src_url) or ".mp4")
         playable_path = _convert_video_for_preview_if_needed(temp_raw_video_path)
+        # If conversion created a *new* temp path, and the original raw video path
+        # is no longer needed (and different), remove the raw path's tracking.
+        if playable_path != temp_raw_video_path and temp_raw_video_path in _temp_preview_files_to_delete:
+            _temp_preview_files_to_delete.remove(temp_raw_video_path)
             try: os.remove(temp_raw_video_path)
             except Exception: pass
         return playable_path
+    else:
         return _temp_file(convert_to_jpeg_bytes(raw_bytes, base_h=1024), suffix=".jpg")
 def _fetch_with_retries_bytes(src: str, timeout: int = 15, max_retries: int = 3):
         attempt += 1
         try:
             if is_remote(src):
+                with requests.get(src, timeout=timeout, stream=True) as r:
+                    r.raise_for_status()
                     return r.content
             else:
                 with open(src, "rb") as fh:
                     return fh.read()
+        except requests.exceptions.RequestException as e:
+            if attempt >= max_retries:
+                raise RuntimeError(f"Failed to fetch {src} after {max_retries} attempts: {e}")
+            print(f"Retrying fetch for {src} ({attempt}/{max_retries}). Delaying {delay:.1f}s...")
             time.sleep(delay)
             delay *= 2
         except FileNotFoundError:
             raise
+        except Exception as e:
+            if attempt >= max_retries:
+                raise RuntimeError(f"Failed to fetch {src} after {max_retries} attempts due to unexpected error: {e}")
+            print(f"Retrying fetch for {src} ({attempt}/{max_retries}). Delaying {delay:.1f}s...")
             time.sleep(delay)
             delay *= 2
             is_img, is_vid = determine_media_type(src)
             if is_vid:
                 return _convert_video_for_preview_if_needed(src)
+            return src # Local image, return as is (assuming Gradio can display it)
+        return None
     # Remote source
     try:
             with gr.Column(scale=1):
                 preview_image = gr.Image(label="Preview Image", type="filepath", elem_classes="preview_media", visible=False)
                 preview_video = gr.Video(label="Preview Video", elem_classes="preview_media", visible=False, format="mp4")
+                # New gallery for screenshots, visible=False by default
+                screenshot_gallery = gr.Gallery(label="Extracted Screenshots", columns=5, rows=1, height="auto", object_fit="contain", visible=False)
                 preview_status = gr.Textbox(label="Preview status", interactive=False, lines=1, value="", visible=True)
             with gr.Column(scale=2):
                 url_input = gr.Textbox(label="Image / Video URL", placeholder="https://...", lines=1)
                 progress_md = gr.Markdown("Idle")
                 output_md = gr.Markdown("")
+                # State to track overall processing status
                 status_state = gr.State("idle")
+                # State to hold the current path of the main preview (image/video)
+                main_preview_path_state = gr.State("")
+                # State to hold the list of screenshot paths for the gallery
+                screenshot_paths_state = gr.State([])
+        def clear_all_files_and_ui():
             """
+            Cleans up all tracked temporary files and resets all relevant UI components.
+            This function is meant to be called at the start of any new processing
+            or when the user explicitly clicks "Clear".
             """
+            for f_path in list(_temp_preview_files_to_delete):
+                if os.path.exists(f_path):
+                    try:
+                        os.remove(f_path)
+                    except Exception as e:
+                        print(f"Error during proactive cleanup of {f_path}: {e}")
+            _temp_preview_files_to_delete.clear()
+            return "", \
+                   gr.update(value=None, visible=False), \
+                   gr.update(value=None, visible=False), \
+                   gr.update(value=[], visible=False), \
+                   "idle", "Idle", "", "", "", [], gr.update(value="", visible=True) # url_input, preview_image, preview_video, screenshot_gallery, status_state, progress_md, output_md, main_preview_path_state, screenshot_paths_state, preview_status
+        clear_btn.click(
+            fn=clear_all_files_and_ui,
+            inputs=[],
+            outputs=[url_input, preview_image, preview_video, screenshot_gallery, status_state, progress_md, output_md, main_preview_path_state, screenshot_paths_state, preview_status]
+        )
+        # Function to handle URL input change and update main preview
+        def load_main_preview_and_clear_old(url: str):
+            # First, clear all existing temporary files and reset UI components
+            # This ensures a clean slate before loading new content
+            _, img_update_clear, video_update_clear, gallery_update_clear, _, _, _, \
+            main_path_clear, screenshot_paths_clear, status_update_clear = clear_all_files_and_ui() # Call the cleanup function
             if not url:
+                return img_update_clear, video_update_clear, gallery_update_clear, \
+                       gr.update(value="", visible=True), main_path_clear, screenshot_paths_clear
             try:
+                local_playable_path = _save_local_playable_preview(url) # This adds to _temp_preview_files_to_delete
                 if not local_playable_path:
+                    return img_update_clear, video_update_clear, gallery_update_clear, \
+                           gr.update(value="Preview load failed: could not fetch resource or make playable.", visible=True), \
+                           "", []
                 is_img_preview = False
                 try:
                     Image.open(local_playable_path).verify()
                     is_img_preview = True
                 except Exception:
+                    pass
                 if is_img_preview:
+                    return gr.update(value=local_playable_path, visible=True), gr.update(value=None, visible=False), \
+                           gr.update(value=[], visible=False), gr.update(value="Image preview loaded.", visible=True), \
+                           local_playable_path, []
+                else: # Assume video
+                    return gr.update(value=None, visible=False), gr.update(value=local_playable_path, visible=True), \
+                           gr.update(value=[], visible=False), gr.update(value="Video preview loaded.", visible=True), \
+                           local_playable_path, []
             except Exception as e:
+                return gr.update(value=None, visible=False), gr.update(value=None, visible=False), \
+                       gr.update(value=[], visible=False), gr.update(value=f"Preview load failed: {e}", visible=True), \
+                       "", []
         url_input.change(
+            fn=load_main_preview_and_clear_old,
             inputs=[url_input],
+            outputs=[preview_image, preview_video, screenshot_gallery, preview_status, main_preview_path_state, screenshot_paths_state]
         )
         def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
             """
             Performs the media analysis.
+            Returns (status, markdown_output, main_preview_path_for_state, screenshot_paths_for_state).
             """
+            temp_media_file_for_analysis = None
+            generated_main_preview_path = "" # This should reflect the preview that was loaded by load_main_preview_and_clear_old
+            generated_screenshot_paths: List[str] = [] # List of paths for gallery
+            result_text = ""
             try:
                 if not url:
+                    return "error", "**Error:** No URL provided.", "", []
                 progress(0.01, desc="Starting media processing")
                 progress(0.02, desc="Checking URL / content‑type")
                     progress(0.05, desc="Downloading video for analysis")
                     raw_bytes = fetch_bytes(url, timeout=120, progress=progress)
                     if not raw_bytes:
+                        return "error", "Failed to download video bytes.", "", []
                     temp_media_file_for_analysis = _temp_file(raw_bytes, suffix=ext_from_src(url) or ".mp4")
+                    generated_main_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes) # This generates the main video preview path
                     progress(0.25, desc="Running full‑video analysis")
+                    result_text, generated_screenshot_paths = analyze_video_cohesive(client, temp_media_file_for_analysis, prompt, progress=progress)
                 # --- Image Processing Path ---
                 elif is_img:
                     raw_bytes = fetch_bytes(url, progress=progress)
                     progress(0.15, desc="Preparing image preview")
+                    generated_main_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes) # This generates the main image preview path
                     progress(0.20, desc="Running image analysis")
+                    result_text = analyze_image_structured(client, raw_bytes, prompt, progress=progress)
+                    # No screenshots for images
                 # --- Unknown Media Type (Fallback) ---
                 else:
                     progress(0.07, desc="Downloading unknown media for type determination")
                     raw_bytes = fetch_bytes(url, timeout=120, progress=progress)
                     is_definitely_img = False
                     try:
                         Image.open(BytesIO(raw_bytes)).verify()
                     if is_definitely_img:
                         progress(0.15, desc="Preparing image preview (fallback)")
+                        generated_main_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
                         progress(0.20, desc="Running image analysis (fallback)")
+                        result_text = analyze_image_structured(client, raw_bytes, prompt, progress=progress)
                     else: # Treat as video fallback
                         progress(0.15, desc="Preparing video preview (fallback)")
                         temp_media_file_for_analysis = _temp_file(raw_bytes, suffix=ext_from_src(url) or ".mp4")
+                        generated_main_preview_path = _get_playable_preview_path_from_raw(url, raw_bytes)
                         progress(0.25, desc="Running video analysis (fallback)")
+                        result_text, generated_screenshot_paths = analyze_video_cohesive(client, temp_media_file_for_analysis, prompt, progress=progress)
+                status = "done" if not (isinstance(result_text, str) and result_text.lower().startswith("error")) else "error"
+                return status, result_text, generated_main_preview_path, generated_screenshot_paths
             except Exception as exc:
+                return "error", f"Unexpected worker error: {exc}", "", []
             finally:
                 if temp_media_file_for_analysis and os.path.exists(temp_media_file_for_analysis):
                     if temp_media_file_for_analysis in _temp_preview_files_to_delete:
+                        _temp_preview_files_to_delete.remove(temp_media_file_for_analysis)
                     try: os.remove(temp_media_file_for_analysis)
                     except Exception as e: print(f"Error cleaning up analysis temp file {temp_media_file_for_analysis}: {e}")
+        # Worker output changed to include screenshot_paths_state
         submit_btn.click(
             fn=worker,
             inputs=[url_input, custom_prompt, api_key],
+            outputs=[status_state, output_md, main_preview_path_state, screenshot_paths_state],
             show_progress="full",
             show_progress_on=progress_md,
         )
             return {"idle": "Idle", "busy": "Processing…", "done": "Completed", "error": "Error — see output"}.get(s, s)
         status_state.change(fn=status_to_progress_text, inputs=[status_state], outputs=[progress_md])
+        # This function updates the UI components based on the state values.
+        # It should *not* perform cleanup, as that's handled by clear_all_files_and_ui or load_main_preview_and_clear_old.
+        def _update_preview_components(current_main_preview_path: str, current_screenshot_paths: List[str]):
+            img_update = gr.update(value=None, visible=False)
+            video_update = gr.update(value=None, visible=False)
+            if current_main_preview_path:
                 try:
+                    is_img_preview = False
+                    try:
+                        Image.open(current_main_preview_path).verify()
+                        is_img_preview = True
+                    except Exception:
+                        pass # Not an image, treat as video
+                    if is_img_preview:
+                        img_update = gr.update(value=current_main_preview_path, visible=True)
+                    else:
+                        video_update = gr.update(value=current_main_preview_path, visible=True)
                 except Exception as e:
+                    print(f"Error setting main preview from path {current_main_preview_path}: {e}")
+            # Gallery is visible only if there are paths
+            gallery_update = gr.update(value=current_screenshot_paths, visible=bool(current_screenshot_paths))
+            return img_update, video_update, gallery_update
+        # Register changes to the states to update the UI components
+        main_preview_path_state.change(
+            fn=_update_preview_components,
+            inputs=[main_preview_path_state, screenshot_paths_state],
+            outputs=[preview_image, preview_video, screenshot_gallery]
+        )
+        screenshot_paths_state.change(
+            fn=_update_preview_components,
+            inputs=[main_preview_path_state, screenshot_paths_state],
+            outputs=[preview_image, preview_video, screenshot_gallery]
         )
     return demo