Image-To-Flux-Prompt

Running

App Files Files Community

Hug0endob commited on Dec 14, 2025

Commit

0fef3e3

verified ·

1 Parent(s): 019adc8

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -115

app.py CHANGED Viewed

@@ -1,7 +1,4 @@
 #!/usr/bin/env python3
-"""
-Flux - multimodal visual analyzer (Mistral + Gradio)
-"""
 import os
 import subprocess
@@ -31,17 +28,21 @@ SYSTEM_INSTRUCTION = (
 IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
 VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
 def get_client(key: str = None):
     api_key = (key or "").strip() or DEFAULT_KEY
     return Mistral(api_key=api_key)
 def is_remote(src: str) -> bool:
     return bool(src) and src.startswith(("http://", "https://"))
 def ext_from_src(src: str) -> str:
     _, ext = os.path.splitext((src or "").split("?")[0])
     return ext.lower()
 def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> bytes:
     if is_remote(src):
         with requests.get(src, timeout=timeout, stream=True) as r:
@@ -65,12 +66,9 @@ def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> byte
     with open(src, "rb") as f:
         return f.read()
 def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
-    try:
-        img = Image.open(BytesIO(media_bytes))
-    except UnidentifiedImageError:
-        raise
-    # handle animated GIFs by taking first frame
     try:
         img.seek(0)
     except Exception:
@@ -84,9 +82,11 @@ def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
     img.save(buf, format="JPEG", quality=85)
     return buf.getvalue()
 def b64_jpeg(img_bytes: bytes) -> str:
     return base64.b64encode(img_bytes).decode("utf-8")
 def save_bytes_to_temp(b: bytes, suffix: str):
     fd, path = tempfile.mkstemp(suffix=suffix)
     os.close(fd)
@@ -94,38 +94,32 @@ def save_bytes_to_temp(b: bytes, suffix: str):
         f.write(b)
     return path
-def choose_model_for_src(src: str):
-    ext = ext_from_src(src)
-    if ext in VIDEO_EXTS:
-        return DEFAULT_VIDEO_MODEL
-    if ext in IMAGE_EXTS:
-        return DEFAULT_IMAGE_MODEL
-    return DEFAULT_VIDEO_MODEL if is_remote(src) else DEFAULT_IMAGE_MODEL
 def build_messages_for_image(prompt: str, b64_jpg: str):
-    # Use a clear textual message with data URL; Mistral SDK supports structured image objects,
-    # but this textual form is broadly compatible.
     content = (
         f"{prompt}\n\nImage (data URI follows):\n\ndata:image/jpeg;base64,{b64_jpg}\n\n"
-        "Instruction: Analyze only visible, provided pixels. Do not assume unseen frames."
     )
     return [
         {"role": "system", "content": SYSTEM_INSTRUCTION},
         {"role": "user", "content": content},
     ]
 def build_messages_for_text(prompt: str, extra_text: str):
     return [
         {"role": "system", "content": SYSTEM_INSTRUCTION},
         {"role": "user", "content": f"{prompt}\n\n{extra_text}"},
     ]
 def extract_delta(chunk):
     if not chunk:
         return None
     data = getattr(chunk, "data", None) or getattr(chunk, "response", None) or getattr(chunk, "delta", None)
     if not data:
         return None
     try:
         content = data.choices[0].delta.content
         if content is None:
@@ -133,15 +127,6 @@ def extract_delta(chunk):
         return str(content)
     except Exception:
         pass
-    try:
-        c = data.choices[0].delta
-        if isinstance(c, dict):
-            txt = c.get("content") or c.get("text")
-            if txt is None:
-                return None
-            return str(txt)
-    except Exception:
-        pass
     try:
         msg = data.choices[0].message
         if isinstance(msg, dict):
@@ -158,6 +143,105 @@ def extract_delta(chunk):
     except Exception:
         return None
 def generate_final_text(src: str, custom_prompt: str, api_key: str):
     client = get_client(api_key)
     prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
@@ -212,7 +296,7 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
         except Exception as e:
             parts.append(f"[Model error: {e}]")
-    # Image (or frame)
     if is_image:
         try:
             raw = fetch_bytes(src)
@@ -221,102 +305,74 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
         except Exception as e:
             return f"Error processing image: {e}"
         msgs = build_messages_for_image(prompt, b64)
-        stream_and_collect(choose_model_for_src(src), msgs)
         return "".join(parts).strip()
-    # Remote video: send URL and explicit instruction to not hallucinate unseen frames
     if is_remote(src):
-        extra = (
-            f"Remote video URL: {src}\n\n"
-            "IMPORTANT: The model cannot access the video stream. Analyze only metadata, thumbnails, or "
-            "user-provided transcript/description. Do not invent frames or events."
-        )
-        msgs = build_messages_for_text(prompt, extra)
-        stream_and_collect(choose_model_for_src(src), msgs)
-        return "".join(parts).strip()
-    # Local video: attempt frame sampling with ffmpeg and send the clearest frame
     tmp_media = None
     try:
         media_bytes = fetch_bytes(src)
         _, ext = os.path.splitext(src) if src else ("", ".mp4")
         ext = ext or ".mp4"
         tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
-        ffmpeg = shutil.which("ffmpeg")
-        if ffmpeg:
-            # Try to probe duration and extract up to N frames evenly spaced
-            sample_count = 5
-            tmp_frames = []
-            try:
-                # get duration in seconds
-                probe_cmd = [ffmpeg, "-v", "error", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", tmp_media]
-                proc = subprocess.Popen(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                out, err = proc.communicate(timeout=10)
-                duration = None
-                try:
-                    duration = float(out.strip().split(b"\n")[0]) if out else None
-                except Exception:
-                    duration = None
-                # choose timestamps
-                timestamps = []
-                if duration and duration > 0:
-                    for i in range(1, sample_count + 1):
-                        t = (duration * i) / (sample_count + 1)
-                        timestamps.append(t)
-                else:
-                    # fallback fixed offsets
-                    timestamps = [0.5, 1.0, 2.0][:sample_count]
-                # extract frames
-                for i, t in enumerate(timestamps):
-                    fd, tmp_frame = tempfile.mkstemp(suffix=f"_{i}.jpg")
-                    os.close(fd)
-                    cmd = [
-                        ffmpeg, "-nostdin", "-y", "-i", tmp_media,
-                        "-ss", str(t),
-                        "-frames:v", "1",
-                        "-q:v", "2",
-                        tmp_frame
-                    ]
-                    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-                    try:
-                        out, err = proc.communicate(timeout=15)
-                    except subprocess.TimeoutExpired:
-                        try:
-                            proc.kill()
-                        except Exception:
-                            pass
-                        out, err = proc.communicate()
-                    if proc.returncode == 0 and os.path.exists(tmp_frame) and os.path.getsize(tmp_frame) > 0:
-                        tmp_frames.append(tmp_frame)
-                    else:
-                        try:
-                            if os.path.exists(tmp_frame):
-                                os.remove(tmp_frame)
-                        except Exception:
-                            pass
-                # pick best frame by size (simple heuristic) or first
-                chosen = None
-                if tmp_frames:
-                    chosen = max(tmp_frames, key=lambda p: os.path.getsize(p) if os.path.exists(p) else 0)
-                    with open(chosen, "rb") as f:
-                        frame_bytes = f.read()
-                    try:
-                        jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
-                        b64 = b64_jpeg(jpg)
-                        msgs = build_messages_for_image(prompt, b64)
-                        stream_and_collect(choose_model_for_src(src), msgs)
-                        return "".join(parts).strip()
-                    finally:
-                        for fpath in tmp_frames:
-                            try:
-                                if os.path.exists(fpath):
-                                    os.remove(fpath)
-                            except Exception:
-                                pass
-                # no frames extracted
-            except Exception:
-                pass
-        return "Unable to process the provided file. Provide a direct image/frame URL or a remote video URL."
     finally:
         try:
             if tmp_media and os.path.exists(tmp_media):
@@ -324,7 +380,8 @@ def generate_final_text(src: str, custom_prompt: str, api_key: str):
         except Exception:
             pass
-# UI
 css = """
 .preview_column { min-width: 380px; }
 .preview_media img, .preview_media video { max-width: 100%; height: auto; }
@@ -345,6 +402,7 @@ def load_preview(url: str):
     except Exception:
         return None, None, "Preview failed"
 with gr.Blocks(title="Flux", css=css) as demo:
     with gr.Row():
         with gr.Column(scale=1, elem_classes="preview_column"):

 #!/usr/bin/env python3
 import os
 import subprocess
 IMAGE_EXTS = (".jpg", ".jpeg", ".png", ".webp", ".gif")
 VIDEO_EXTS = (".mp4", ".mov", ".webm", ".mkv", ".avi", ".flv")
 def get_client(key: str = None):
     api_key = (key or "").strip() or DEFAULT_KEY
     return Mistral(api_key=api_key)
 def is_remote(src: str) -> bool:
     return bool(src) and src.startswith(("http://", "https://"))
 def ext_from_src(src: str) -> str:
     _, ext = os.path.splitext((src or "").split("?")[0])
     return ext.lower()
 def fetch_bytes(src: str, stream_threshold=STREAM_THRESHOLD, timeout=60) -> bytes:
     if is_remote(src):
         with requests.get(src, timeout=timeout, stream=True) as r:
     with open(src, "rb") as f:
         return f.read()
 def convert_to_jpeg_bytes(media_bytes: bytes, base_h=480) -> bytes:
+    img = Image.open(BytesIO(media_bytes))
     try:
         img.seek(0)
     except Exception:
     img.save(buf, format="JPEG", quality=85)
     return buf.getvalue()
 def b64_jpeg(img_bytes: bytes) -> str:
     return base64.b64encode(img_bytes).decode("utf-8")
 def save_bytes_to_temp(b: bytes, suffix: str):
     fd, path = tempfile.mkstemp(suffix=suffix)
     os.close(fd)
         f.write(b)
     return path
 def build_messages_for_image(prompt: str, b64_jpg: str):
     content = (
         f"{prompt}\n\nImage (data URI follows):\n\ndata:image/jpeg;base64,{b64_jpg}\n\n"
+        "Instruction: Analyze only visible, provided pixels."
     )
     return [
         {"role": "system", "content": SYSTEM_INSTRUCTION},
         {"role": "user", "content": content},
     ]
 def build_messages_for_text(prompt: str, extra_text: str):
     return [
         {"role": "system", "content": SYSTEM_INSTRUCTION},
         {"role": "user", "content": f"{prompt}\n\n{extra_text}"},
     ]
 def extract_delta(chunk):
     if not chunk:
         return None
     data = getattr(chunk, "data", None) or getattr(chunk, "response", None) or getattr(chunk, "delta", None)
     if not data:
         return None
+    # try common shapes
     try:
         content = data.choices[0].delta.content
         if content is None:
         return str(content)
     except Exception:
         pass
     try:
         msg = data.choices[0].message
         if isinstance(msg, dict):
     except Exception:
         return None
+def extract_best_frame_bytes(media_path: str, sample_count: int = 5, timeout_probe: int = 10, timeout_extract: int = 15):
+    ffmpeg = shutil.which("ffmpeg")
+    if not ffmpeg or not os.path.exists(media_path):
+        return None
+    tmp_frames = []
+    try:
+        probe_cmd = [ffmpeg, "-v", "error", "-show_entries", "format=duration",
+                     "-of", "default=noprint_wrappers=1:nokey=1", media_path]
+        proc = subprocess.Popen(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        out, _ = proc.communicate(timeout=timeout_probe)
+        duration = None
+        try:
+            duration = float(out.strip().split(b"\n")[0]) if out else None
+        except Exception:
+            duration = None
+        if duration and duration > 0:
+            timestamps = [(duration * i) / (sample_count + 1) for i in range(1, sample_count + 1)]
+        else:
+            timestamps = [0.5, 1.0, 2.0][:sample_count]
+        for i, t in enumerate(timestamps):
+            fd, tmp_frame = tempfile.mkstemp(suffix=f"_{i}.jpg")
+            os.close(fd)
+            cmd = [
+                ffmpeg, "-nostdin", "-y", "-i", media_path,
+                "-ss", str(t),
+                "-frames:v", "1",
+                "-q:v", "2",
+                tmp_frame
+            ]
+            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            try:
+                proc.communicate(timeout=timeout_extract)
+            except subprocess.TimeoutExpired:
+                try:
+                    proc.kill()
+                except Exception:
+                    pass
+                proc.communicate()
+            if proc.returncode == 0 and os.path.exists(tmp_frame) and os.path.getsize(tmp_frame) > 0:
+                tmp_frames.append(tmp_frame)
+            else:
+                try:
+                    if os.path.exists(tmp_frame):
+                        os.remove(tmp_frame)
+                except Exception:
+                    pass
+        if not tmp_frames:
+            return None
+        chosen = max(tmp_frames, key=lambda p: os.path.getsize(p) if os.path.exists(p) else 0)
+        with open(chosen, "rb") as f:
+            data = f.read()
+        return data
+    finally:
+        for fpath in tmp_frames:
+            try:
+                if os.path.exists(fpath):
+                    os.remove(fpath)
+            except Exception:
+                pass
+def upload_file_to_mistral(client, path, filename=None, purpose="batch"):
+    fname = filename or os.path.basename(path)
+    # Try SDK upload
+    try:
+        with open(path, "rb") as fh:
+            res = client.files.upload(file={"file_name": fname, "content": fh}, purpose=purpose)
+        # try to extract id
+        fid = getattr(res, "id", None) or (res.get("id") if isinstance(res, dict) else None)
+        if not fid:
+            try:
+                fid = res["data"][0]["id"]
+            except Exception:
+                pass
+        if not fid:
+            raise RuntimeError(f"No file id returned: {res}")
+        return fid
+    except Exception:
+        # Fallback to HTTP upload
+        api_key = client.api_key if hasattr(client, "api_key") else os.getenv("MISTRAL_API_KEY", "")
+        url = "https://api.mistral.ai/v1/files"
+        headers = {"Authorization": f"Bearer {api_key}"} if api_key else {}
+        with open(path, "rb") as fh:
+            files = {"file": (fname, fh)}
+            data = {"purpose": purpose}
+            r = requests.post(url, headers=headers, files=files, data=data, timeout=120)
+            r.raise_for_status()
+            jr = r.json()
+            fid = jr.get("id") or jr.get("data", [{}])[0].get("id")
+            if not fid:
+                raise RuntimeError(f"Upload failed to return id: {jr}")
+            return fid
 def generate_final_text(src: str, custom_prompt: str, api_key: str):
     client = get_client(api_key)
     prompt = (custom_prompt.strip() if custom_prompt and custom_prompt.strip() else "Please provide a detailed visual review.")
         except Exception as e:
             parts.append(f"[Model error: {e}]")
+    # Image path: convert and send
     if is_image:
         try:
             raw = fetch_bytes(src)
         except Exception as e:
             return f"Error processing image: {e}"
         msgs = build_messages_for_image(prompt, b64)
+        stream_and_collect(DEFAULT_IMAGE_MODEL, msgs)
         return "".join(parts).strip()
+    # Remote video: download, upload to Mistral Files, reference file id in chat
     if is_remote(src):
+        try:
+            media_bytes = fetch_bytes(src, timeout=120)
+        except Exception as e:
+            return f"Error downloading remote media: {e}"
+        ext = ext_from_src(src) or ".mp4"
+        tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
+        try:
+            try:
+                file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src.split("?")[0]))
+            except Exception as e:
+                # If upload fails, fallback to sending a representative frame
+                frame_bytes = extract_best_frame_bytes(tmp_media)
+                if not frame_bytes:
+                    return f"Error uploading to Mistral and no frame fallback available: {e}"
+                try:
+                    jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
+                except UnidentifiedImageError:
+                    jpg = frame_bytes
+                b64 = b64_jpeg(jpg)
+                msgs = build_messages_for_image(prompt, b64)
+                stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
+                return "".join(parts).strip()
+            extra = (
+                f"Remote video uploaded to Mistral Files with id: {file_id}\n\n"
+                "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
+            )
+            msgs = build_messages_for_text(prompt, extra)
+            stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
+            return "".join(parts).strip()
+        finally:
+            try:
+                if tmp_media and os.path.exists(tmp_media):
+                    os.remove(tmp_media)
+            except Exception:
+                pass
+    # Local video: try upload to Mistral; otherwise fallback to frames
     tmp_media = None
     try:
         media_bytes = fetch_bytes(src)
         _, ext = os.path.splitext(src) if src else ("", ".mp4")
         ext = ext or ".mp4"
         tmp_media = save_bytes_to_temp(media_bytes, suffix=ext)
+        try:
+            file_id = upload_file_to_mistral(client, tmp_media, filename=os.path.basename(src))
+            extra = (
+                f"Local video uploaded to Mistral Files with id: {file_id}\n\n"
+                "Instruction: Analyze the video contents using the uploaded file id. Do not invent frames not present."
+            )
+            msgs = build_messages_for_text(prompt, extra)
+            stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
+            return "".join(parts).strip()
+        except Exception:
+            # fallback to extracting a best frame
+            frame_bytes = extract_best_frame_bytes(tmp_media)
+            if not frame_bytes:
+                return "Unable to process the provided file. Provide a direct image/frame URL or a remote video URL."
+            jpg = convert_to_jpeg_bytes(frame_bytes, base_h=480)
+            b64 = b64_jpeg(jpg)
+            msgs = build_messages_for_image(prompt, b64)
+            stream_and_collect(DEFAULT_VIDEO_MODEL, msgs)
+            return "".join(parts).strip()
     finally:
         try:
             if tmp_media and os.path.exists(tmp_media):
         except Exception:
             pass
+# --- Minimal Gradio UI ---
 css = """
 .preview_column { min-width: 380px; }
 .preview_media img, .preview_media video { max-width: 100%; height: auto; }
     except Exception:
         return None, None, "Preview failed"
 with gr.Blocks(title="Flux", css=css) as demo:
     with gr.Row():
         with gr.Column(scale=1, elem_classes="preview_column"):