Spaces:
Build error
Build error
File size: 3,007 Bytes
7766a5c 028a367 49d3ba7 d125cdc f275d7c 7766a5c d125cdc 49d3ba7 d125cdc 49d3ba7 d125cdc 49d3ba7 d125cdc 71b45b9 7766a5c d125cdc 851e8b5 49d3ba7 028a367 851e8b5 7766a5c 851e8b5 49d3ba7 d125cdc 49d3ba7 d125cdc 49d3ba7 028a367 49d3ba7 7766a5c 49d3ba7 7766a5c 49d3ba7 028a367 49d3ba7 028a367 49d3ba7 028a367 49d3ba7 7766a5c 49d3ba7 71b45b9 49d3ba7 851e8b5 71b45b9 d125cdc 851e8b5 71b45b9 d125cdc 71b45b9 49d3ba7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | import os
import io
import time
import sys
import requests
from PIL import Image, ImageSequence
import gradio as gr
# llama-cpp-python import
try:
from llama_cpp import Llama
except Exception as e:
raise RuntimeError("llama-cpp-python import failed: " + str(e))
MODEL_PATH = os.path.join("model", "llama-joycaption-q4_k_m.gguf")
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(f"Model not found at {MODEL_PATH}. Ensure start.sh downloaded the GGUF.")
def download_bytes(url: str, timeout: int = 30) -> bytes:
with requests.get(url, stream=True, timeout=timeout) as r:
r.raise_for_status()
return r.content
def load_first_frame_from_bytes(raw: bytes):
img = Image.open(io.BytesIO(raw))
if getattr(img, "is_animated", False):
img = next(ImageSequence.Iterator(img))
if img.mode != "RGB":
img = img.convert("RGB")
return img
def make_prompt_for_image(image_path: str, user_prompt: str = "Describe the image."):
# JoyCaption-style multimodal GGUFs accept <img>{path}</img>
return f"<img>{image_path}</img>\nUser: {user_prompt}\nAssistant:"
# Initialize model (low-resource options)
print("Loading GGUF model (this can take 30–120s)...", file=sys.stderr)
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=2)
def generate_caption_from_url(url: str, prompt: str = "Describe the image."):
if not url:
return "No URL provided."
try:
raw = download_bytes(url)
except Exception as e:
return f"Download error: {e}"
try:
img = load_first_frame_from_bytes(raw)
except Exception as e:
return f"Image processing error: {e}"
tmp_dir = "/tmp/joycap"
os.makedirs(tmp_dir, exist_ok=True)
ts = int(time.time() * 1000)
tmp_path = os.path.join(tmp_dir, f"{ts}.jpg")
try:
img.save(tmp_path, format="JPEG", quality=85)
except Exception as e:
return f"Failed to save temp image: {e}"
prompt_full = make_prompt_for_image(tmp_path, prompt)
try:
resp = llm.create(
prompt=prompt_full,
max_tokens=256,
temperature=0.2,
top_p=0.95,
stop=["User:", "Assistant:"],
)
text = resp.get("choices", [{}])[0].get("text", "").strip()
return text or "No caption generated."
except Exception as e:
return f"Inference error: {e}"
finally:
try:
os.remove(tmp_path)
except Exception:
pass
iface = gr.Interface(
fn=generate_caption_from_url,
inputs=[
gr.Textbox(label="Image URL", placeholder="https://example.com/photo.jpg"),
gr.Textbox(label="Prompt (optional)", value="Describe the image."),
],
outputs=gr.Textbox(label="Generated caption"),
title="JoyCaption GGUF (Q4_K_M)",
description="Runs a quantized JoyCaption GGUF locally via llama.cpp (no external API).",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)
|