Spaces:
Build error
Build error
File size: 8,105 Bytes
7766a5c 028a367 49d3ba7 d125cdc e76c937 e4bf697 7766a5c d125cdc 49d3ba7 d125cdc 49d3ba7 e4bf697 58e5fdf e4bf697 58e5fdf e4bf697 58e5fdf e4bf697 58e5fdf e4bf697 58e5fdf e4bf697 e76c937 e4bf697 e76c937 e4bf697 e76c937 58e5fdf e4bf697 58e5fdf e4bf697 58e5fdf e4bf697 58e5fdf e4bf697 58e5fdf 71b45b9 7766a5c d125cdc 851e8b5 49d3ba7 028a367 851e8b5 7766a5c 851e8b5 49d3ba7 d125cdc 49d3ba7 7766a5c 49d3ba7 7766a5c 49d3ba7 028a367 49d3ba7 028a367 49d3ba7 028a367 49d3ba7 7766a5c 49d3ba7 71b45b9 49d3ba7 851e8b5 71b45b9 d125cdc 851e8b5 71b45b9 e76c937 d125cdc 71b45b9 49d3ba7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 | import os
import io
import time
import sys
import subprocess
import requests
from PIL import Image, ImageSequence
import gradio as gr
# llama-cpp-python import
try:
from llama_cpp import Llama
except Exception as e:
raise RuntimeError("llama-cpp-python import failed: " + str(e))
MODEL_DIR = "model"
MODEL_MAIN = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_m.gguf")
MODEL_FALLBACK = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")
# Candidate direct-download URLs (try in order)
CANDIDATES = [
# Primary Q4_K_M (Jasaga then mradermacher)
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
MODEL_MAIN),
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
MODEL_MAIN),
# Fallback Q4_K_S (mradermacher / Jasaga)
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
MODEL_FALLBACK),
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
MODEL_FALLBACK),
]
def download_curl(url: str, path: str) -> bool:
os.makedirs(os.path.dirname(path), exist_ok=True)
try:
subprocess.check_call(["curl", "-L", "-C", "-", "-o", path, url])
return True
except Exception:
try:
if os.path.exists(path):
os.remove(path)
except Exception:
pass
return False
def is_valid_gguf(path: str) -> bool:
try:
with open(path, "rb") as f:
head = f.read(8)
return head.startswith(b"GGUF")
except Exception:
return False
def ensure_models_downloaded():
# If main present and valid, done.
if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
sys.stderr.write(f"Found valid main model: {MODEL_MAIN}\n")
return
# If fallback present and valid, done.
if os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
sys.stderr.write(f"Found valid fallback model: {MODEL_FALLBACK}\n")
return
sys.stderr.write("Model(s) missing or invalid; attempting downloads...\n")
for url, dest in CANDIDATES:
sys.stderr.write(f"Downloading {url} -> {dest}\n")
ok = download_curl(url, dest)
if not ok:
sys.stderr.write(f"Download failed for {url}\n")
continue
if is_valid_gguf(dest):
sys.stderr.write(f"Downloaded and verified GGUF at {dest}\n")
# if we downloaded fallback but main missing, don't copy; we'll try to load fallback later
if dest == MODEL_MAIN:
return
# if dest is fallback, still continue loop to attempt main first (if available)
else:
sys.stderr.write(f"Downloaded file at {dest} is not a valid GGUF (header mismatch). Removing.\n")
try:
os.remove(dest)
except Exception:
pass
sys.stderr.write("Download attempts finished.\n")
def try_load_model(path: str, n_ctx: int = 2048, n_threads: int = 2):
try:
sys.stderr.write(f"Initializing Llama with model {path}...\n")
llm = Llama(model_path=path, n_ctx=n_ctx, n_threads=n_threads)
sys.stderr.write("Model loaded successfully.\n")
return llm
except Exception as e:
sys.stderr.write(f"Failed to load model {path}: {e}\n")
return None
# Ensure at least one model file is present (download if needed)
ensure_models_downloaded()
# Prefer main, then fallback
model_to_try = None
if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
model_to_try = MODEL_MAIN
elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
model_to_try = MODEL_FALLBACK
else:
# attempt to download again and pick whatever exists
ensure_models_downloaded()
if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
model_to_try = MODEL_MAIN
elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
model_to_try = MODEL_FALLBACK
if model_to_try is None:
raise FileNotFoundError("No valid GGUF model found. Place a compatible GGUF under model/ with filename\n"
"model/llama-joycaption-q4_k_m.gguf or model/llama-joycaption-q4_k_s.gguf.")
# Attempt to load chosen model; if load fails for magic/version, try fallback (if different)
llm = try_load_model(model_to_try, n_ctx=2048, n_threads=2)
if llm is None and model_to_try == MODEL_MAIN and os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
sys.stderr.write("Primary model failed to load; attempting fallback model.\n")
llm = try_load_model(MODEL_FALLBACK, n_ctx=2048, n_threads=2)
if llm is None:
# Provide clear diagnostic and exit
sys.stderr.write("\nERROR: All model load attempts failed. Likely causes:\n"
" - The GGUF uses a newer GGUF version not supported by the installed llama.cpp/llama-cpp-python.\n"
" - The file is corrupted despite the header check.\n\n"
"Recommended fixes:\n"
" - Install a newer llama.cpp/llama-cpp-python built from main/master (supports newer GGUF versions).\n"
" - Or place a known-compatible GGUF (Q4_K_S from mradermacher or older GGUF) at model/llama-joycaption-q4_k_m.gguf\n"
" - To inspect the header run: hexdump -n4 model/llama-joycaption-q4_k_m.gguf\n")
raise RuntimeError("Model load failed for all candidates.")
def download_bytes(url: str, timeout: int = 30) -> bytes:
with requests.get(url, stream=True, timeout=timeout) as r:
r.raise_for_status()
return r.content
def load_first_frame_from_bytes(raw: bytes):
img = Image.open(io.BytesIO(raw))
if getattr(img, "is_animated", False):
img = next(ImageSequence.Iterator(img))
if img.mode != "RGB":
img = img.convert("RGB")
return img
def make_prompt_for_image(image_path: str, user_prompt: str = "Describe the image."):
# JoyCaption-style multimodal GGUFs accept <img>{path}</img>
return f"<img>{image_path}</img>\nUser: {user_prompt}\nAssistant:"
def generate_caption_from_url(url: str, prompt: str = "Describe the image."):
if not url:
return "No URL provided."
try:
raw = download_bytes(url)
except Exception as e:
return f"Download error: {e}"
try:
img = load_first_frame_from_bytes(raw)
except Exception as e:
return f"Image processing error: {e}"
tmp_dir = "/tmp/joycap"
os.makedirs(tmp_dir, exist_ok=True)
ts = int(time.time() * 1000)
tmp_path = os.path.join(tmp_dir, f"{ts}.jpg")
try:
img.save(tmp_path, format="JPEG", quality=85)
except Exception as e:
return f"Failed to save temp image: {e}"
prompt_full = make_prompt_for_image(tmp_path, prompt)
try:
resp = llm.create(
prompt=prompt_full,
max_tokens=256,
temperature=0.2,
top_p=0.95,
stop=["User:", "Assistant:"],
)
text = resp.get("choices", [{}])[0].get("text", "").strip()
return text or "No caption generated."
except Exception as e:
return f"Inference error: {e}"
finally:
try:
os.remove(tmp_path)
except Exception:
pass
iface = gr.Interface(
fn=generate_caption_from_url,
inputs=[
gr.Textbox(label="Image URL", placeholder="https://example.com/photo.jpg"),
gr.Textbox(label="Prompt (optional)", value="Describe the image."),
],
outputs=gr.Textbox(label="Generated caption"),
title="JoyCaption GGUF (Q4_K)",
description="Runs a quantized JoyCaption GGUF locally via llama.cpp (no external API).",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)
|