Spaces:
Build error
Build error
File size: 7,131 Bytes
7766a5c 028a367 49d3ba7 d125cdc e76c937 e4bf697 7766a5c d125cdc 49d3ba7 d125cdc 49d3ba7 e4bf697 e76c937 e4bf697 e76c937 e4bf697 e76c937 e4bf697 e76c937 e4bf697 e76c937 49d3ba7 e76c937 71b45b9 7766a5c d125cdc 851e8b5 49d3ba7 028a367 851e8b5 7766a5c 851e8b5 49d3ba7 d125cdc 49d3ba7 d125cdc e4bf697 49d3ba7 028a367 49d3ba7 7766a5c 49d3ba7 7766a5c 49d3ba7 028a367 49d3ba7 028a367 49d3ba7 028a367 49d3ba7 7766a5c 49d3ba7 71b45b9 49d3ba7 851e8b5 71b45b9 d125cdc 851e8b5 71b45b9 e76c937 d125cdc 71b45b9 49d3ba7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 | import os
import io
import time
import sys
import subprocess
import requests
from PIL import Image, ImageSequence
import gradio as gr
# llama-cpp-python import
try:
from llama_cpp import Llama
except Exception as e:
raise RuntimeError("llama-cpp-python import failed: " + str(e))
MODEL_DIR = "model"
EXPECTED_TARGET = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_m.gguf")
# Candidate direct-download URLs (try in order)
CANDIDATES = [
# Jasaga7818 copy (often a direct GGUF)
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
EXPECTED_TARGET),
# mradermacher (alternate host)
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
EXPECTED_TARGET),
# Fallback to Q4_K_S (Jasaga)
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")),
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")),
]
def download_curl(url: str, path: str) -> bool:
os.makedirs(os.path.dirname(path), exist_ok=True)
try:
# Use curl for resume support and progress in logs
subprocess.check_call(["curl", "-L", "-C", "-", "-o", path, url])
return True
except Exception:
try:
if os.path.exists(path):
os.remove(path)
except Exception:
pass
return False
def is_valid_gguf(path: str) -> bool:
# GGUF files start with "GGUF" in ASCII at offset 0 (0x47 0x47 0x55 0x46).
# Some converted uploads may be HTML pages or redirects; check header.
try:
with open(path, "rb") as f:
head = f.read(8)
return head.startswith(b"GGUF")
except Exception:
return False
def ensure_model() -> str:
# If already present (and valid), use it.
if os.path.exists(EXPECTED_TARGET) and is_valid_gguf(EXPECTED_TARGET):
sys.stderr.write(f"Model already present and valid at {EXPECTED_TARGET}\n")
return EXPECTED_TARGET
sys.stderr.write("Model not found locally or invalid, attempting download (several GB)...\n")
for url, dest in CANDIDATES:
sys.stderr.write(f"Attempting download: {url} -> {dest}\n")
if download_curl(url, dest):
sys.stderr.write(f"Downloaded candidate to {dest}; verifying header...\n")
if is_valid_gguf(dest):
# If candidate wasn't the expected filename, create symlink so rest of code can use EXPECTED_TARGET.
if os.path.abspath(dest) != os.path.abspath(EXPECTED_TARGET):
try:
if os.path.exists(EXPECTED_TARGET):
os.remove(EXPECTED_TARGET)
os.symlink(os.path.basename(dest), EXPECTED_TARGET)
sys.stderr.write(f"Created symlink {EXPECTED_TARGET} -> {os.path.basename(dest)}\n")
except Exception:
# fallback: copy
try:
import shutil
shutil.copyfile(dest, EXPECTED_TARGET)
sys.stderr.write(f"Copied {dest} to {EXPECTED_TARGET}\n")
except Exception:
sys.stderr.write("Warning: failed to symlink or copy candidate to expected filename.\n")
sys.stderr.write("Model verified as GGUF and ready.\n")
return EXPECTED_TARGET
else:
sys.stderr.write("Downloaded file is not a valid GGUF (header mismatch). Removing and trying next.\n")
try:
os.remove(dest)
except Exception:
pass
else:
sys.stderr.write("Download failed for candidate; trying next.\n")
raise FileNotFoundError("Failed to download a valid GGUF model from candidates. Check URLs and repo availability.")
# Ensure model exists and is a GGUF before importing/initializing Llama
MODEL_PATH = ensure_model()
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(f"Model not found at {MODEL_PATH} after download attempt.")
def download_bytes(url: str, timeout: int = 30) -> bytes:
with requests.get(url, stream=True, timeout=timeout) as r:
r.raise_for_status()
return r.content
def load_first_frame_from_bytes(raw: bytes):
img = Image.open(io.BytesIO(raw))
if getattr(img, "is_animated", False):
img = next(ImageSequence.Iterator(img))
if img.mode != "RGB":
img = img.convert("RGB")
return img
def make_prompt_for_image(image_path: str, user_prompt: str = "Describe the image."):
# JoyCaption-style multimodal GGUFs accept <img>{path}</img>
return f"<img>{image_path}</img>\nUser: {user_prompt}\nAssistant:"
# Initialize model (low-resource options)
print("Loading GGUF model (this can take 30–120s)...", file=sys.stderr)
# Adjust n_threads for the Space CPU; increase if you know you have more cores available.
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=2)
def generate_caption_from_url(url: str, prompt: str = "Describe the image."):
if not url:
return "No URL provided."
try:
raw = download_bytes(url)
except Exception as e:
return f"Download error: {e}"
try:
img = load_first_frame_from_bytes(raw)
except Exception as e:
return f"Image processing error: {e}"
tmp_dir = "/tmp/joycap"
os.makedirs(tmp_dir, exist_ok=True)
ts = int(time.time() * 1000)
tmp_path = os.path.join(tmp_dir, f"{ts}.jpg")
try:
img.save(tmp_path, format="JPEG", quality=85)
except Exception as e:
return f"Failed to save temp image: {e}"
prompt_full = make_prompt_for_image(tmp_path, prompt)
try:
resp = llm.create(
prompt=prompt_full,
max_tokens=256,
temperature=0.2,
top_p=0.95,
stop=["User:", "Assistant:"],
)
text = resp.get("choices", [{}])[0].get("text", "").strip()
return text or "No caption generated."
except Exception as e:
return f"Inference error: {e}"
finally:
try:
os.remove(tmp_path)
except Exception:
pass
iface = gr.Interface(
fn=generate_caption_from_url,
inputs=[
gr.Textbox(label="Image URL", placeholder="https://example.com/photo.jpg"),
gr.Textbox(label="Prompt (optional)", value="Describe the image."),
],
outputs=gr.Textbox(label="Generated caption"),
title="JoyCaption GGUF (Q4_K)",
description="Runs a quantized JoyCaption GGUF locally via llama.cpp (no external API).",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)
|