Hug0endob's picture
Update app.py
58e5fdf verified
raw
history blame
8.11 kB
import os
import io
import time
import sys
import subprocess
import requests
from PIL import Image, ImageSequence
import gradio as gr
# llama-cpp-python import
try:
from llama_cpp import Llama
except Exception as e:
raise RuntimeError("llama-cpp-python import failed: " + str(e))
MODEL_DIR = "model"
MODEL_MAIN = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_m.gguf")
MODEL_FALLBACK = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")
# Candidate direct-download URLs (try in order)
CANDIDATES = [
# Primary Q4_K_M (Jasaga then mradermacher)
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
MODEL_MAIN),
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
MODEL_MAIN),
# Fallback Q4_K_S (mradermacher / Jasaga)
("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
MODEL_FALLBACK),
("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
MODEL_FALLBACK),
]
def download_curl(url: str, path: str) -> bool:
os.makedirs(os.path.dirname(path), exist_ok=True)
try:
subprocess.check_call(["curl", "-L", "-C", "-", "-o", path, url])
return True
except Exception:
try:
if os.path.exists(path):
os.remove(path)
except Exception:
pass
return False
def is_valid_gguf(path: str) -> bool:
try:
with open(path, "rb") as f:
head = f.read(8)
return head.startswith(b"GGUF")
except Exception:
return False
def ensure_models_downloaded():
# If main present and valid, done.
if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
sys.stderr.write(f"Found valid main model: {MODEL_MAIN}\n")
return
# If fallback present and valid, done.
if os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
sys.stderr.write(f"Found valid fallback model: {MODEL_FALLBACK}\n")
return
sys.stderr.write("Model(s) missing or invalid; attempting downloads...\n")
for url, dest in CANDIDATES:
sys.stderr.write(f"Downloading {url} -> {dest}\n")
ok = download_curl(url, dest)
if not ok:
sys.stderr.write(f"Download failed for {url}\n")
continue
if is_valid_gguf(dest):
sys.stderr.write(f"Downloaded and verified GGUF at {dest}\n")
# if we downloaded fallback but main missing, don't copy; we'll try to load fallback later
if dest == MODEL_MAIN:
return
# if dest is fallback, still continue loop to attempt main first (if available)
else:
sys.stderr.write(f"Downloaded file at {dest} is not a valid GGUF (header mismatch). Removing.\n")
try:
os.remove(dest)
except Exception:
pass
sys.stderr.write("Download attempts finished.\n")
def try_load_model(path: str, n_ctx: int = 2048, n_threads: int = 2):
try:
sys.stderr.write(f"Initializing Llama with model {path}...\n")
llm = Llama(model_path=path, n_ctx=n_ctx, n_threads=n_threads)
sys.stderr.write("Model loaded successfully.\n")
return llm
except Exception as e:
sys.stderr.write(f"Failed to load model {path}: {e}\n")
return None
# Ensure at least one model file is present (download if needed)
ensure_models_downloaded()
# Prefer main, then fallback
model_to_try = None
if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
model_to_try = MODEL_MAIN
elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
model_to_try = MODEL_FALLBACK
else:
# attempt to download again and pick whatever exists
ensure_models_downloaded()
if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
model_to_try = MODEL_MAIN
elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
model_to_try = MODEL_FALLBACK
if model_to_try is None:
raise FileNotFoundError("No valid GGUF model found. Place a compatible GGUF under model/ with filename\n"
"model/llama-joycaption-q4_k_m.gguf or model/llama-joycaption-q4_k_s.gguf.")
# Attempt to load chosen model; if load fails for magic/version, try fallback (if different)
llm = try_load_model(model_to_try, n_ctx=2048, n_threads=2)
if llm is None and model_to_try == MODEL_MAIN and os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
sys.stderr.write("Primary model failed to load; attempting fallback model.\n")
llm = try_load_model(MODEL_FALLBACK, n_ctx=2048, n_threads=2)
if llm is None:
# Provide clear diagnostic and exit
sys.stderr.write("\nERROR: All model load attempts failed. Likely causes:\n"
" - The GGUF uses a newer GGUF version not supported by the installed llama.cpp/llama-cpp-python.\n"
" - The file is corrupted despite the header check.\n\n"
"Recommended fixes:\n"
" - Install a newer llama.cpp/llama-cpp-python built from main/master (supports newer GGUF versions).\n"
" - Or place a known-compatible GGUF (Q4_K_S from mradermacher or older GGUF) at model/llama-joycaption-q4_k_m.gguf\n"
" - To inspect the header run: hexdump -n4 model/llama-joycaption-q4_k_m.gguf\n")
raise RuntimeError("Model load failed for all candidates.")
def download_bytes(url: str, timeout: int = 30) -> bytes:
with requests.get(url, stream=True, timeout=timeout) as r:
r.raise_for_status()
return r.content
def load_first_frame_from_bytes(raw: bytes):
img = Image.open(io.BytesIO(raw))
if getattr(img, "is_animated", False):
img = next(ImageSequence.Iterator(img))
if img.mode != "RGB":
img = img.convert("RGB")
return img
def make_prompt_for_image(image_path: str, user_prompt: str = "Describe the image."):
# JoyCaption-style multimodal GGUFs accept <img>{path}</img>
return f"<img>{image_path}</img>\nUser: {user_prompt}\nAssistant:"
def generate_caption_from_url(url: str, prompt: str = "Describe the image."):
if not url:
return "No URL provided."
try:
raw = download_bytes(url)
except Exception as e:
return f"Download error: {e}"
try:
img = load_first_frame_from_bytes(raw)
except Exception as e:
return f"Image processing error: {e}"
tmp_dir = "/tmp/joycap"
os.makedirs(tmp_dir, exist_ok=True)
ts = int(time.time() * 1000)
tmp_path = os.path.join(tmp_dir, f"{ts}.jpg")
try:
img.save(tmp_path, format="JPEG", quality=85)
except Exception as e:
return f"Failed to save temp image: {e}"
prompt_full = make_prompt_for_image(tmp_path, prompt)
try:
resp = llm.create(
prompt=prompt_full,
max_tokens=256,
temperature=0.2,
top_p=0.95,
stop=["User:", "Assistant:"],
)
text = resp.get("choices", [{}])[0].get("text", "").strip()
return text or "No caption generated."
except Exception as e:
return f"Inference error: {e}"
finally:
try:
os.remove(tmp_path)
except Exception:
pass
iface = gr.Interface(
fn=generate_caption_from_url,
inputs=[
gr.Textbox(label="Image URL", placeholder="https://example.com/photo.jpg"),
gr.Textbox(label="Prompt (optional)", value="Describe the image."),
],
outputs=gr.Textbox(label="Generated caption"),
title="JoyCaption GGUF (Q4_K)",
description="Runs a quantized JoyCaption GGUF locally via llama.cpp (no external API).",
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)