Spaces:
Build error
Build error
File size: 7,535 Bytes
cd5ca02 b3b505f cd5ca02 e4bf697 a8bd35c 7766a5c cd5ca02 b3b505f e4bf697 b3b505f e4bf697 b3b505f 71b45b9 cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f a8bd35c cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f cd5ca02 b3b505f 7766a5c b3b505f 7766a5c b3b505f 7766a5c b3b505f cd5ca02 b3b505f a8bd35c b3b505f a8bd35c 7766a5c a8bd35c 7766a5c b3b505f a8bd35c 028a367 b3b505f a8bd35c b3b505f cd5ca02 b3b505f 7766a5c b3b505f cd5ca02 b3b505f 851e8b5 b3b505f 71b45b9 b3b505f 71b45b9 a8bd35c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | #!/usr/bin/env python3
import os, io, re, sys, subprocess, hashlib, pathlib, time
from typing import Optional
import requests
from PIL import Image, ImageSequence, UnidentifiedImageError # Import UnidentifiedImageError
import gradio as gr
MODEL_DIR = pathlib.Path("model")
MODEL_DIR.mkdir(exist_ok=True, parents=True)
# Public mradermacher GGUF links (no tokens)
PRIMARY_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_s.gguf"
FALLBACK_URL = "https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-q4_k_m.gguf"
PRIMARY_NAME = MODEL_DIR / "llama-joycaption-q4_k_s.gguf"
FALLBACK_NAME = MODEL_DIR / "llama-joycaption-q4_k_m.gguf"
# Generation params
MAX_TOKENS = 128
TEMPERATURE = 0.2
TOP_P = 0.95
STOP = ["\n"]
def download_file(url: str, dest: pathlib.Path, timeout=120):
if dest.exists():
return
print("Downloading", url)
with requests.get(url, stream=True, timeout=timeout) as r:
r.raise_for_status()
total = int(r.headers.get("content-length", 0) or 0)
done = 0
with open(dest, "wb") as f:
for chunk in r.iter_content(8192):
if not chunk: continue
f.write(chunk)
done += len(chunk)
if total:
pct = done * 100 // total
print(f"\r{dest.name}: {pct}% ", end="", flush=True)
print()
def mp4_to_gif(mp4_bytes: bytes) -> bytes:
files = {"new-file": ("video.mp4", mp4_bytes, "video/mp4")}
resp = requests.post("https://s.ezgif.com/video-to-gif", files=files, data={"file":"video.mp4"}, timeout=120)
resp.raise_for_status()
m = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text) or re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
if not m:
raise RuntimeError("GIF URL not found")
gif_url = m.group(1)
if gif_url.startswith("//"): gif_url = "https:" + gif_url
elif gif_url.startswith("/"): gif_url = "https://s.ezgif.com" + gif_url
r2 = requests.get(gif_url, timeout=60); r2.raise_for_status(); return r2.content
def load_first_frame(raw: bytes):
# Added specific handling for PIL errors
try:
img = Image.open(io.BytesIO(raw))
if getattr(img, "is_animated", False):
# Also wrap ImageSequence.Iterator in case of corrupted animated images
try:
img = next(ImageSequence.Iterator(img))
except Exception as e:
raise ValueError(f"Could not extract first frame from animated image: {e}")
if img.mode != "RGB":
img = img.convert("RGB")
return img
except UnidentifiedImageError:
raise ValueError("Could not identify image format or image is corrupted.")
except Exception as e:
# Catch other PIL errors (e.g., IOErrors during parsing)
raise ValueError(f"Failed to load or process image with PIL: {e}")
def rebuild_llama_cpp():
env = os.environ.copy()
env["PIP_NO_BINARY"] = "llama-cpp-python"
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], env=env)
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "cmake", "wheel", "setuptools"], env=env)
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "llama-cpp-python"], env=env)
_llama = None
def ensure_model():
global _llama
if _llama is not None:
return
# try primary then fallback
for url, path in ((PRIMARY_URL, PRIMARY_NAME), (FALLBACK_URL, FALLBACK_NAME)):
try:
download_file(url, path)
import importlib
llama_cpp = importlib.import_module("llama_cpp")
Llama = getattr(llama_cpp, "Llama")
print("Loading", path)
_llama = Llama(model_path=str(path), n_ctx=2048, n_gpu_layers=0, verbose=False)
print("Loaded model:", path.name)
return
except Exception as e:
print("Load failed for", path.name, ":", e)
# rebuild once
try:
print("Rebuilding llama-cpp-python from source...")
rebuild_llama_cpp()
except Exception as e:
raise RuntimeError("Rebuild failed: " + str(e))
# retry primary
try:
import importlib
download_file(PRIMARY_URL, PRIMARY_NAME)
llama_cpp = importlib.reload(importlib.import_module("llama_cpp"))
Llama = getattr(llama_cpp, "Llama")
_llama = Llama(model_path=str(PRIMARY_NAME), n_ctx=2048, n_gpu_layers=0, verbose=False)
print("Loaded after rebuild.")
return
except Exception as e:
raise RuntimeError("Load after rebuild failed: " + str(e))
def build_prompt(img_tag: str, user_prompt: str):
# Minimal prompt: image placeholder and the user request
return f"<image>{img_tag}</image>\n{user_prompt}\nAnswer:"
def generate_caption_from_url(url: str, prompt: str="Describe the image."):
if not url:
return "No URL provided."
try:
r = requests.get(url, timeout=30); r.raise_for_status(); raw = r.content
except Exception as e:
return "Download error: " + str(e)
try:
lower = url.lower().split("?")[0]
if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1:
try:
raw = mp4_to_gif(raw)
except Exception as e:
return "MP4→GIF conversion failed: " + str(e)
img = load_first_frame(raw) # This function now has specific error handling
except ValueError as e: # Catch the specific ValueError raised by load_first_frame
return "Image processing error: " + str(e)
except Exception as e: # General fallback for other unexpected image issues
return "An unexpected image processing error occurred: " + str(e)
# Added try-except for image resizing as well
try:
img = img.resize((512,512), resample=Image.BICUBIC)
except Exception as e:
print(f"Warning: Image resizing failed: {e}. Attempting to proceed without resizing.")
# Optionally, you might want to return an error here if resizing is critical.
# For captioning, not resizing might just lead to a slightly different result.
# create a tiny base64 tag to signal image presence (model must understand this format)
import base64
buf = io.BytesIO()
try:
img.save(buf, format="PNG") # Wrap image save in try-except
except Exception as e:
return "Failed to encode image to base64: " + str(e)
b64 = base64.b64encode(buf.getvalue()).decode()
img_tag = b64 # minimal
prompt_text = build_prompt(img_tag, prompt or "Describe the image.")
try:
ensure_model()
# call llama-cpp model
out = _llama(prompt_text, max_tokens=MAX_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, stop=STOP)
text = out.get("choices", [{}])[0].get("text", "")
return text.strip()
except Exception as e:
return "Inference error: " + str(e)
iface = gr.Interface(
fn=generate_caption_from_url,
inputs=[gr.Textbox(label="Image / GIF / MP4 URL"), gr.Textbox(label="Prompt", value="Describe the image.")],
outputs=gr.Textbox(label="Generated caption"),
title="JoyCaption (minimal GGUF, auto-rebuild)",
description="No tokens required. Downloads a public GGUF and runs locally via llama-cpp."
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860) |