File size: 8,105 Bytes
7766a5c
028a367
49d3ba7
d125cdc
e76c937
e4bf697
7766a5c
 
 
d125cdc
49d3ba7
 
 
d125cdc
49d3ba7
e4bf697
58e5fdf
 
e4bf697
 
 
58e5fdf
e4bf697
58e5fdf
e4bf697
58e5fdf
 
e4bf697
58e5fdf
 
 
e4bf697
 
 
e76c937
 
 
 
 
e4bf697
 
e76c937
e4bf697
 
 
 
 
 
 
 
 
 
e76c937
 
58e5fdf
 
 
 
 
 
 
 
 
 
 
e4bf697
58e5fdf
 
 
 
 
 
 
 
 
 
 
e4bf697
58e5fdf
 
 
 
 
e4bf697
58e5fdf
e4bf697
58e5fdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71b45b9
7766a5c
d125cdc
 
 
851e8b5
49d3ba7
028a367
851e8b5
 
 
 
7766a5c
851e8b5
49d3ba7
d125cdc
49d3ba7
 
 
7766a5c
 
 
 
 
 
 
 
 
 
 
49d3ba7
 
 
 
7766a5c
49d3ba7
 
 
028a367
49d3ba7
028a367
49d3ba7
 
 
 
 
 
028a367
49d3ba7
 
7766a5c
 
49d3ba7
 
 
 
 
71b45b9
49d3ba7
851e8b5
71b45b9
d125cdc
851e8b5
71b45b9
 
e76c937
d125cdc
71b45b9
 
 
49d3ba7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import os
import io
import time
import sys
import subprocess
import requests
from PIL import Image, ImageSequence
import gradio as gr

# llama-cpp-python import
try:
    from llama_cpp import Llama
except Exception as e:
    raise RuntimeError("llama-cpp-python import failed: " + str(e))

MODEL_DIR = "model"
MODEL_MAIN = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_m.gguf")
MODEL_FALLBACK = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")

# Candidate direct-download URLs (try in order)
CANDIDATES = [
    # Primary Q4_K_M (Jasaga then mradermacher)
    ("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
     MODEL_MAIN),
    ("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
     MODEL_MAIN),
    # Fallback Q4_K_S (mradermacher / Jasaga)
    ("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
     MODEL_FALLBACK),
    ("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
     MODEL_FALLBACK),
]

def download_curl(url: str, path: str) -> bool:
    os.makedirs(os.path.dirname(path), exist_ok=True)
    try:
        subprocess.check_call(["curl", "-L", "-C", "-", "-o", path, url])
        return True
    except Exception:
        try:
            if os.path.exists(path):
                os.remove(path)
        except Exception:
            pass
        return False

def is_valid_gguf(path: str) -> bool:
    try:
        with open(path, "rb") as f:
            head = f.read(8)
        return head.startswith(b"GGUF")
    except Exception:
        return False

def ensure_models_downloaded():
    # If main present and valid, done.
    if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
        sys.stderr.write(f"Found valid main model: {MODEL_MAIN}\n")
        return
    # If fallback present and valid, done.
    if os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
        sys.stderr.write(f"Found valid fallback model: {MODEL_FALLBACK}\n")
        return

    sys.stderr.write("Model(s) missing or invalid; attempting downloads...\n")
    for url, dest in CANDIDATES:
        sys.stderr.write(f"Downloading {url} -> {dest}\n")
        ok = download_curl(url, dest)
        if not ok:
            sys.stderr.write(f"Download failed for {url}\n")
            continue
        if is_valid_gguf(dest):
            sys.stderr.write(f"Downloaded and verified GGUF at {dest}\n")
            # if we downloaded fallback but main missing, don't copy; we'll try to load fallback later
            if dest == MODEL_MAIN:
                return
            # if dest is fallback, still continue loop to attempt main first (if available)
        else:
            sys.stderr.write(f"Downloaded file at {dest} is not a valid GGUF (header mismatch). Removing.\n")
            try:
                os.remove(dest)
            except Exception:
                pass

    sys.stderr.write("Download attempts finished.\n")

def try_load_model(path: str, n_ctx: int = 2048, n_threads: int = 2):
    try:
        sys.stderr.write(f"Initializing Llama with model {path}...\n")
        llm = Llama(model_path=path, n_ctx=n_ctx, n_threads=n_threads)
        sys.stderr.write("Model loaded successfully.\n")
        return llm
    except Exception as e:
        sys.stderr.write(f"Failed to load model {path}: {e}\n")
        return None

# Ensure at least one model file is present (download if needed)
ensure_models_downloaded()

# Prefer main, then fallback
model_to_try = None
if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
    model_to_try = MODEL_MAIN
elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
    model_to_try = MODEL_FALLBACK
else:
    # attempt to download again and pick whatever exists
    ensure_models_downloaded()
    if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
        model_to_try = MODEL_MAIN
    elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
        model_to_try = MODEL_FALLBACK

if model_to_try is None:
    raise FileNotFoundError("No valid GGUF model found. Place a compatible GGUF under model/ with filename\n"
                            "model/llama-joycaption-q4_k_m.gguf or model/llama-joycaption-q4_k_s.gguf.")

# Attempt to load chosen model; if load fails for magic/version, try fallback (if different)
llm = try_load_model(model_to_try, n_ctx=2048, n_threads=2)
if llm is None and model_to_try == MODEL_MAIN and os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
    sys.stderr.write("Primary model failed to load; attempting fallback model.\n")
    llm = try_load_model(MODEL_FALLBACK, n_ctx=2048, n_threads=2)

if llm is None:
    # Provide clear diagnostic and exit
    sys.stderr.write("\nERROR: All model load attempts failed. Likely causes:\n"
                     " - The GGUF uses a newer GGUF version not supported by the installed llama.cpp/llama-cpp-python.\n"
                     " - The file is corrupted despite the header check.\n\n"
                     "Recommended fixes:\n"
                     " - Install a newer llama.cpp/llama-cpp-python built from main/master (supports newer GGUF versions).\n"
                     " - Or place a known-compatible GGUF (Q4_K_S from mradermacher or older GGUF) at model/llama-joycaption-q4_k_m.gguf\n"
                     " - To inspect the header run: hexdump -n4 model/llama-joycaption-q4_k_m.gguf\n")
    raise RuntimeError("Model load failed for all candidates.")

def download_bytes(url: str, timeout: int = 30) -> bytes:
    with requests.get(url, stream=True, timeout=timeout) as r:
        r.raise_for_status()
        return r.content

def load_first_frame_from_bytes(raw: bytes):
    img = Image.open(io.BytesIO(raw))
    if getattr(img, "is_animated", False):
        img = next(ImageSequence.Iterator(img))
    if img.mode != "RGB":
        img = img.convert("RGB")
    return img

def make_prompt_for_image(image_path: str, user_prompt: str = "Describe the image."):
    # JoyCaption-style multimodal GGUFs accept <img>{path}</img>
    return f"<img>{image_path}</img>\nUser: {user_prompt}\nAssistant:"

def generate_caption_from_url(url: str, prompt: str = "Describe the image."):
    if not url:
        return "No URL provided."
    try:
        raw = download_bytes(url)
    except Exception as e:
        return f"Download error: {e}"
    try:
        img = load_first_frame_from_bytes(raw)
    except Exception as e:
        return f"Image processing error: {e}"

    tmp_dir = "/tmp/joycap"
    os.makedirs(tmp_dir, exist_ok=True)
    ts = int(time.time() * 1000)
    tmp_path = os.path.join(tmp_dir, f"{ts}.jpg")
    try:
        img.save(tmp_path, format="JPEG", quality=85)
    except Exception as e:
        return f"Failed to save temp image: {e}"

    prompt_full = make_prompt_for_image(tmp_path, prompt)
    try:
        resp = llm.create(
            prompt=prompt_full,
            max_tokens=256,
            temperature=0.2,
            top_p=0.95,
            stop=["User:", "Assistant:"],
        )
        text = resp.get("choices", [{}])[0].get("text", "").strip()
        return text or "No caption generated."
    except Exception as e:
        return f"Inference error: {e}"
    finally:
        try:
            os.remove(tmp_path)
        except Exception:
            pass

iface = gr.Interface(
    fn=generate_caption_from_url,
    inputs=[
        gr.Textbox(label="Image URL", placeholder="https://example.com/photo.jpg"),
        gr.Textbox(label="Prompt (optional)", value="Describe the image."),
    ],
    outputs=gr.Textbox(label="Generated caption"),
    title="JoyCaption GGUF (Q4_K)",
    description="Runs a quantized JoyCaption GGUF locally via llama.cpp (no external API).",
)

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", server_port=7860)