Hug0endob's picture
Update app.py
e4bf697 verified
raw
history blame
7.13 kB
import os
import io
import time
import sys
import subprocess
import requests
from PIL import Image, ImageSequence
import gradio as gr
# llama-cpp-python import
try:
from llama_cpp import Llama
except Exception as e:
raise RuntimeError("llama-cpp-python import failed: " + str(e))
MODEL_DIR = "model"
EXPECTED_TARGET = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_m.gguf")
# Candidate direct-download URLs (try in order)
CANDIDATES = [
# Jasaga7818 copy (often a direct GGUF)
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
EXPECTED_TARGET),
# mradermacher (alternate host)
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
EXPECTED_TARGET),
# Fallback to Q4_K_S (Jasaga)
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")),
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")),
]
def download_curl(url: str, path: str) -> bool:
os.makedirs(os.path.dirname(path), exist_ok=True)
try:
# Use curl for resume support and progress in logs
subprocess.check_call(["curl", "-L", "-C", "-", "-o", path, url])
return True
except Exception:
try:
if os.path.exists(path):
os.remove(path)
except Exception:
pass
return False
def is_valid_gguf(path: str) -> bool:
# GGUF files start with "GGUF" in ASCII at offset 0 (0x47 0x47 0x55 0x46).
# Some converted uploads may be HTML pages or redirects; check header.
try:
with open(path, "rb") as f:
head = f.read(8)
return head.startswith(b"GGUF")
except Exception:
return False
def ensure_model() -> str:
# If already present (and valid), use it.
if os.path.exists(EXPECTED_TARGET) and is_valid_gguf(EXPECTED_TARGET):
sys.stderr.write(f"Model already present and valid at {EXPECTED_TARGET}\n")
return EXPECTED_TARGET
sys.stderr.write("Model not found locally or invalid, attempting download (several GB)...\n")
for url, dest in CANDIDATES:
sys.stderr.write(f"Attempting download: {url} -> {dest}\n")
if download_curl(url, dest):
sys.stderr.write(f"Downloaded candidate to {dest}; verifying header...\n")
if is_valid_gguf(dest):
# If candidate wasn't the expected filename, create symlink so rest of code can use EXPECTED_TARGET.
if os.path.abspath(dest) != os.path.abspath(EXPECTED_TARGET):
try:
if os.path.exists(EXPECTED_TARGET):
os.remove(EXPECTED_TARGET)
os.symlink(os.path.basename(dest), EXPECTED_TARGET)
sys.stderr.write(f"Created symlink {EXPECTED_TARGET} -> {os.path.basename(dest)}\n")
except Exception:
# fallback: copy
try:
import shutil
shutil.copyfile(dest, EXPECTED_TARGET)
sys.stderr.write(f"Copied {dest} to {EXPECTED_TARGET}\n")
except Exception:
sys.stderr.write("Warning: failed to symlink or copy candidate to expected filename.\n")
sys.stderr.write("Model verified as GGUF and ready.\n")
return EXPECTED_TARGET
else:
sys.stderr.write("Downloaded file is not a valid GGUF (header mismatch). Removing and trying next.\n")
try:
os.remove(dest)
except Exception:
pass
else:
sys.stderr.write("Download failed for candidate; trying next.\n")
raise FileNotFoundError("Failed to download a valid GGUF model from candidates. Check URLs and repo availability.")
# Ensure model exists and is a GGUF before importing/initializing Llama
MODEL_PATH = ensure_model()
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(f"Model not found at {MODEL_PATH} after download attempt.")
def download_bytes(url: str, timeout: int = 30) -> bytes:
with requests.get(url, stream=True, timeout=timeout) as r:
r.raise_for_status()
return r.content
def load_first_frame_from_bytes(raw: bytes):
img = Image.open(io.BytesIO(raw))
if getattr(img, "is_animated", False):
img = next(ImageSequence.Iterator(img))
if img.mode != "RGB":
img = img.convert("RGB")
return img
def make_prompt_for_image(image_path: str, user_prompt: str = "Describe the image."):
# JoyCaption-style multimodal GGUFs accept <img>{path}</img>
return f"<img>{image_path}</img>\nUser: {user_prompt}\nAssistant:"
# Initialize model (low-resource options)
print("Loading GGUF model (this can take 30–120s)...", file=sys.stderr)
# Adjust n_threads for the Space CPU; increase if you know you have more cores available.
llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=2)
def generate_caption_from_url(url: str, prompt: str = "Describe the image."):
if not url:
return "No URL provided."
try:
raw = download_bytes(url)
except Exception as e:
return f"Download error: {e}"
try:
img = load_first_frame_from_bytes(raw)
except Exception as e:
return f"Image processing error: {e}"
tmp_dir = "/tmp/joycap"
os.makedirs(tmp_dir, exist_ok=True)
ts = int(time.time() * 1000)
tmp_path = os.path.join(tmp_dir, f"{ts}.jpg")
try:
img.save(tmp_path, format="JPEG", quality=85)
except Exception as e:
return f"Failed to save temp image: {e}"
prompt_full = make_prompt_for_image(tmp_path, prompt)
try:
resp = llm.create(
prompt=prompt_full,
max_tokens=256,
temperature=0.2,
top_p=0.95,
stop=["User:", "Assistant:"],
)
text = resp.get("choices", [{}])[0].get("text", "").strip()
return text or "No caption generated."
except Exception as e:
return f"Inference error: {e}"
finally:
try:
os.remove(tmp_path)
except Exception:
pass
iface = gr.Interface(
fn=generate_caption_from_url,
inputs=[
gr.Textbox(label="Image URL", placeholder="https://example.com/photo.jpg"),
gr.Textbox(label="Prompt (optional)", value="Describe the image."),
],
outputs=gr.Textbox(label="Generated caption"),
title="JoyCaption GGUF (Q4_K)",
description="Runs a quantized JoyCaption GGUF locally via llama.cpp (no external API).",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)