Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on Dec 19, 2025

Commit

903c7b4

verified ·

1 Parent(s): 43ea394

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -68

app.py CHANGED Viewed

@@ -1,14 +1,18 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-import os, shutil, subprocess, tempfile, base64, json
 from io import BytesIO
 from typing import List, Tuple
 import requests
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
-# ------------------- Backend (unchanged) -------------------
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
@@ -29,7 +33,6 @@ SYSTEM_INSTRUCTION = (
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
-# Optional import – fallback to a dummy client if the library is missing
 try:
     from mistralai import Mistral
 except Exception:
@@ -118,11 +121,7 @@ def convert_to_jpeg_bytes(img_bytes: bytes, base_h: int = 480) -> bytes:
 def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
     return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
-def extract_best_frames_bytes(
-    media_path: str,
-    sample_count: int = 5,
-    timeout_extract: int = 15,
-) -> List[bytes]:
     frames: List[bytes] = []
     if not FFMPEG_BIN or not os.path.exists(media_path):
         return frames
@@ -145,22 +144,15 @@ def extract_best_frames_bytes(
             tmp,
         ]
         try:
-            subprocess.run(
-                cmd,
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-                timeout=timeout_extract,
-            )
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
                 with open(tmp, "rb") as f:
                     frames.append(f.read())
         except Exception:
             pass
         finally:
-            try:
-                os.remove(tmp)
-            except Exception:
-                pass
     return frames
 def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
@@ -170,37 +162,19 @@ def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
         else:
             api_key = getattr(client, "api_key", "") or DEFAULT_KEY
             url = "https://api.mistral.ai/v1/chat/completions"
-            headers = (
-                {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-                if api_key
-                else {"Content-Type": "application/json"}
-            )
-            r = requests.post(
-                url,
-                json={"model": model, "messages": messages},
-                headers=headers,
-                timeout=timeout,
-            )
             r.raise_for_status()
             res = r.json()
         choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
         if not choices:
             return str(res)
         first = choices[0]
-        msg = (
-            first.message
-            if hasattr(first, "message")
-            else (first.get("message") if isinstance(first, dict) else first)
-        )
-        content = (
-            msg.get("content")
-            if isinstance(msg, dict)
-            else getattr(msg, "content", None)
-        )
         return content.strip() if isinstance(content, str) else str(content)
     except Exception as e:
         return f"Error during model call: {e}"
 def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120) -> str:
     fname = filename or os.path.basename(path)
     try:
@@ -228,12 +202,10 @@ def determine_media_type(src: str) -> Tuple[bool, bool]:
     is_image = False
     is_video = False
     ext = ext_from_src(src)
     if ext in IMAGE_EXTS:
         is_image = True
     if ext in VIDEO_EXTS:
         is_video = True
     if is_remote(src):
         head = safe_head(src)
         if head:
@@ -249,13 +221,10 @@ def analyze_image_structured(client, img_bytes: bytes, prompt: str) -> str:
     data_url = b64_bytes(jpeg, mime="image/jpeg")
     messages = [
         {"role": "system", "content": SYSTEM_INSTRUCTION},
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": data_url},
-            ],
-        },
     ]
     return chat_complete(client, PIXTRAL_MODEL, messages)
@@ -288,21 +257,20 @@ def analyze_video_cohesive(client, video_path: str, prompt: str) -> str:
                 )
             except Exception:
                 continue
-        content = [
-            {"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}
-        ] + image_entries
         messages = [
             {"role": "system", "content": SYSTEM_INSTRUCTION},
             {"role": "user", "content": content},
         ]
         return chat_complete(client, PIXTRAL_MODEL, messages)
-def process_media(src: str, custom_prompt: str, api_key: str, progress) -> str:
     client = get_client(api_key)
     prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
     if not src:
         return "No URL or path provided."
-    progress(0.05, desc="Determining media type")
     is_image, is_video = determine_media_type(src)
     if is_image:
@@ -310,7 +278,8 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress) -> str:
             raw = fetch_bytes(src)
         except Exception as e:
             return f"Error fetching image: {e}"
-        progress(0.2, desc="Analyzing image")
         try:
             return analyze_image_structured(client, raw, prompt)
         except UnidentifiedImageError:
@@ -325,23 +294,21 @@ def process_media(src: str, custom_prompt: str, api_key: str, progress) -> str:
             return f"Error fetching video: {e}"
         tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
         try:
-            progress(0.2, desc="Analyzing video")
             return analyze_video_cohesive(client, tmp_path, prompt)
         finally:
-            try:
-                os.remove(tmp_path)
-            except Exception:
-                pass
-    # Fallback: treat as image
     try:
         raw = fetch_bytes(src)
-        progress(0.2, desc="Treating as image")
         return analyze_image_structured(client, raw, prompt)
     except Exception as e:
         return f"Unable to determine media type or fetch file: {e}"
-# ------------------- Gradio UI (fixed) -------------------
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
@@ -396,8 +363,7 @@ def create_demo():
             except Exception:
                 return empty_img, empty_vid
-        url_input.change(fn=load_preview, inputs=[url_input],
-                         outputs=[preview_image, preview_video])
         def clear_all():
             return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle"
@@ -407,11 +373,10 @@ def create_demo():
             return "busy"
         submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state])
-        def worker(url: str, prompt: str, key: str, progress):
             return process_media(url or "", prompt or "", key or "", progress)
-        submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key],
-                        outputs=[output_md], queue=True).then(
             fn=lambda res: ("error", "**Error:** no result returned.") if not res else
                            ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
             inputs=[output_md],

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+import os
+import shutil
+import subprocess
+import tempfile
+import base64
+import json
 from io import BytesIO
 from typing import List, Tuple
 import requests
 from PIL import Image, ImageFile, UnidentifiedImageError
 import gradio as gr
 DEFAULT_KEY = os.getenv("MISTRAL_API_KEY", "")
 PIXTRAL_MODEL = "pixtral-12b-2409"
 VIDEO_MODEL = "voxtral-mini-latest"
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 Image.MAX_IMAGE_PIXELS = 10000 * 10000
 try:
     from mistralai import Mistral
 except Exception:
 def b64_bytes(b: bytes, mime: str = "image/jpeg") -> str:
     return f"data:{mime};base64," + base64.b64encode(b).decode("utf-8")
+def extract_best_frames_bytes(media_path: str, sample_count: int = 5, timeout_extract: int = 15) -> List[bytes]:
     frames: List[bytes] = []
     if not FFMPEG_BIN or not os.path.exists(media_path):
         return frames
             tmp,
         ]
         try:
+            subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=timeout_extract)
             if os.path.exists(tmp) and os.path.getsize(tmp) > 0:
                 with open(tmp, "rb") as f:
                     frames.append(f.read())
         except Exception:
             pass
         finally:
+            try: os.remove(tmp)
+            except Exception: pass
     return frames
 def chat_complete(client, model: str, messages, timeout: int = 120) -> str:
         else:
             api_key = getattr(client, "api_key", "") or DEFAULT_KEY
             url = "https://api.mistral.ai/v1/chat/completions"
+            headers = ({"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"} if api_key else {"Content-Type": "application/json"})
+            r = requests.post(url, json={"model": model, "messages": messages}, headers=headers, timeout=timeout)
             r.raise_for_status()
             res = r.json()
         choices = getattr(res, "choices", None) or (res.get("choices") if isinstance(res, dict) else [])
         if not choices:
             return str(res)
         first = choices[0]
+        msg = (first.message if hasattr(first, "message") else (first.get("message") if isinstance(first, dict) else first))
+        content = (msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None))
         return content.strip() if isinstance(content, str) else str(content)
     except Exception as e:
         return f"Error during model call: {e}"
 def upload_file_to_mistral(client, path: str, filename: str | None = None, purpose: str = "batch", timeout: int = 120) -> str:
     fname = filename or os.path.basename(path)
     try:
     is_image = False
     is_video = False
     ext = ext_from_src(src)
     if ext in IMAGE_EXTS:
         is_image = True
     if ext in VIDEO_EXTS:
         is_video = True
     if is_remote(src):
         head = safe_head(src)
         if head:
     data_url = b64_bytes(jpeg, mime="image/jpeg")
     messages = [
         {"role": "system", "content": SYSTEM_INSTRUCTION},
+        {"role": "user", "content": [
+            {"type": "text", "text": prompt},
+            {"type": "image_url", "image_url": data_url},
+        ]},
     ]
     return chat_complete(client, PIXTRAL_MODEL, messages)
                 )
             except Exception:
                 continue
+        content = [{"type": "text", "text": prompt + "\n\nPlease consolidate observations across these frames into a single cohesive narrative."}] + image_entries
         messages = [
             {"role": "system", "content": SYSTEM_INSTRUCTION},
             {"role": "user", "content": content},
         ]
         return chat_complete(client, PIXTRAL_MODEL, messages)
+def process_media(src: str, custom_prompt: str, api_key: str, progress=None) -> str:
     client = get_client(api_key)
     prompt = (custom_prompt or "").strip() or "Please provide a detailed visual review."
     if not src:
         return "No URL or path provided."
+    if progress is not None:
+        progress(0.05, desc="Determining media type")
     is_image, is_video = determine_media_type(src)
     if is_image:
             raw = fetch_bytes(src)
         except Exception as e:
             return f"Error fetching image: {e}"
+        if progress is not None:
+            progress(0.2, desc="Analyzing image")
         try:
             return analyze_image_structured(client, raw, prompt)
         except UnidentifiedImageError:
             return f"Error fetching video: {e}"
         tmp_path = save_bytes_to_temp(raw, suffix=ext_from_src(src) or ".mp4")
         try:
+            if progress is not None:
+                progress(0.2, desc="Analyzing video")
             return analyze_video_cohesive(client, tmp_path, prompt)
         finally:
+            try: os.remove(tmp_path)
+            except Exception: pass
     try:
         raw = fetch_bytes(src)
+        if progress is not None:
+            progress(0.2, desc="Treating as image")
         return analyze_image_structured(client, raw, prompt)
     except Exception as e:
         return f"Unable to determine media type or fetch file: {e}"
 css = ".preview_media img, .preview_media video { max-width: 100%; height: auto; border-radius:6px; }"
 def _btn_label_for_status(status: str) -> str:
             except Exception:
                 return empty_img, empty_vid
+        url_input.change(fn=load_preview, inputs=[url_input], outputs=[preview_image, preview_video])
         def clear_all():
             return "", gr.update(value=None, visible=False), gr.update(value=None, visible=False), "idle"
             return "busy"
         submit_btn.click(fn=start_busy, inputs=[], outputs=[status_state])
+        def worker(url: str, prompt: str, key: str, progress=gr.Progress()):
             return process_media(url or "", prompt or "", key or "", progress)
+        submit_btn.click(fn=worker, inputs=[url_input, custom_prompt, api_key], outputs=[output_md], queue=True).then(
             fn=lambda res: ("error", "**Error:** no result returned.") if not res else
                            ("error", f"**Error:** {res}") if isinstance(res, str) and res.lower().startswith("error") else ("done", res),
             inputs=[output_md],