Hug0endob commited on
Commit
cd5ca02
·
verified ·
1 Parent(s): 6051035

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +283 -156
app.py CHANGED
@@ -1,143 +1,87 @@
 
 
 
1
  import os
2
  import io
3
- import time
4
  import sys
 
 
 
5
  import subprocess
 
 
6
  import requests
7
  from PIL import Image, ImageSequence
8
  import gradio as gr
9
 
10
- # llama-cpp-python import
11
- try:
12
- from llama_cpp import Llama
13
- except Exception as e:
14
- raise RuntimeError("llama-cpp-python import failed: " + str(e))
15
-
16
- MODEL_DIR = "model"
17
- MODEL_MAIN = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_m.gguf")
18
- MODEL_FALLBACK = os.path.join(MODEL_DIR, "llama-joycaption-q4_k_s.gguf")
19
-
20
- # Candidate direct-download URLs (try in order)
21
- CANDIDATES = [
22
- # Primary Q4_K_M (Jasaga then mradermacher)
23
- ("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
24
- MODEL_MAIN),
25
- ("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_m.gguf",
26
- MODEL_MAIN),
27
- # Fallback Q4_K_S (mradermacher / Jasaga)
28
- ("https://huggingface.co/mradermacher/llama-joycaption-beta-one-hf-llava-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
29
- MODEL_FALLBACK),
30
- ("https://huggingface.co/Jasaga7818/llama-joycaption-beta-one-hf-llava-Q4_K_M-GGUF/resolve/main/llama-joycaption-beta-one-hf-llava-q4_k_s.gguf",
31
- MODEL_FALLBACK),
32
- ]
33
-
34
- def download_curl(url: str, path: str) -> bool:
35
- os.makedirs(os.path.dirname(path), exist_ok=True)
36
- try:
37
- subprocess.check_call(["curl", "-L", "-C", "-", "-o", path, url])
38
- return True
39
- except Exception:
40
- try:
41
- if os.path.exists(path):
42
- os.remove(path)
43
- except Exception:
44
- pass
45
- return False
46
 
47
- def is_valid_gguf(path: str) -> bool:
48
- try:
49
- with open(path, "rb") as f:
50
- head = f.read(8)
51
- return head.startswith(b"GGUF")
52
- except Exception:
53
- return False
54
-
55
- def ensure_models_downloaded():
56
- # If main present and valid, done.
57
- if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
58
- sys.stderr.write(f"Found valid main model: {MODEL_MAIN}\n")
59
- return
60
- # If fallback present and valid, done.
61
- if os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
62
- sys.stderr.write(f"Found valid fallback model: {MODEL_FALLBACK}\n")
63
- return
64
-
65
- sys.stderr.write("Model(s) missing or invalid; attempting downloads...\n")
66
- for url, dest in CANDIDATES:
67
- sys.stderr.write(f"Downloading {url} -> {dest}\n")
68
- ok = download_curl(url, dest)
69
- if not ok:
70
- sys.stderr.write(f"Download failed for {url}\n")
71
- continue
72
- if is_valid_gguf(dest):
73
- sys.stderr.write(f"Downloaded and verified GGUF at {dest}\n")
74
- # if we downloaded fallback but main missing, don't copy; we'll try to load fallback later
75
- if dest == MODEL_MAIN:
76
- return
77
- # if dest is fallback, still continue loop to attempt main first (if available)
78
- else:
79
- sys.stderr.write(f"Downloaded file at {dest} is not a valid GGUF (header mismatch). Removing.\n")
80
- try:
81
- os.remove(dest)
82
- except Exception:
83
- pass
84
 
85
- sys.stderr.write("Download attempts finished.\n")
 
 
 
 
 
 
86
 
87
- def try_load_model(path: str, n_ctx: int = 2048, n_threads: int = 2):
88
- try:
89
- sys.stderr.write(f"Initializing Llama with model {path}...\n")
90
- llm = Llama(model_path=path, n_ctx=n_ctx, n_threads=n_threads)
91
- sys.stderr.write("Model loaded successfully.\n")
92
- return llm
93
- except Exception as e:
94
- sys.stderr.write(f"Failed to load model {path}: {e}\n")
95
- return None
96
-
97
- # Ensure at least one model file is present (download if needed)
98
- ensure_models_downloaded()
99
-
100
- # Prefer main, then fallback
101
- model_to_try = None
102
- if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
103
- model_to_try = MODEL_MAIN
104
- elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
105
- model_to_try = MODEL_FALLBACK
106
- else:
107
- # attempt to download again and pick whatever exists
108
- ensure_models_downloaded()
109
- if os.path.exists(MODEL_MAIN) and is_valid_gguf(MODEL_MAIN):
110
- model_to_try = MODEL_MAIN
111
- elif os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
112
- model_to_try = MODEL_FALLBACK
113
-
114
- if model_to_try is None:
115
- raise FileNotFoundError("No valid GGUF model found. Place a compatible GGUF under model/ with filename\n"
116
- "model/llama-joycaption-q4_k_m.gguf or model/llama-joycaption-q4_k_s.gguf.")
117
-
118
- # Attempt to load chosen model; if load fails for magic/version, try fallback (if different)
119
- llm = try_load_model(model_to_try, n_ctx=2048, n_threads=2)
120
- if llm is None and model_to_try == MODEL_MAIN and os.path.exists(MODEL_FALLBACK) and is_valid_gguf(MODEL_FALLBACK):
121
- sys.stderr.write("Primary model failed to load; attempting fallback model.\n")
122
- llm = try_load_model(MODEL_FALLBACK, n_ctx=2048, n_threads=2)
123
-
124
- if llm is None:
125
- # Provide clear diagnostic and exit
126
- sys.stderr.write("\nERROR: All model load attempts failed. Likely causes:\n"
127
- " - The GGUF uses a newer GGUF version not supported by the installed llama.cpp/llama-cpp-python.\n"
128
- " - The file is corrupted despite the header check.\n\n"
129
- "Recommended fixes:\n"
130
- " - Install a newer llama.cpp/llama-cpp-python built from main/master (supports newer GGUF versions).\n"
131
- " - Or place a known-compatible GGUF (Q4_K_S from mradermacher or older GGUF) at model/llama-joycaption-q4_k_m.gguf\n"
132
- " - To inspect the header run: hexdump -n4 model/llama-joycaption-q4_k_m.gguf\n")
133
- raise RuntimeError("Model load failed for all candidates.")
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  def download_bytes(url: str, timeout: int = 30) -> bytes:
136
- with requests.get(url, stream=True, timeout=timeout) as r:
137
- r.raise_for_status()
138
- return r.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- def load_first_frame_from_bytes(raw: bytes):
 
141
  img = Image.open(io.BytesIO(raw))
142
  if getattr(img, "is_animated", False):
143
  img = next(ImageSequence.Iterator(img))
@@ -145,60 +89,243 @@ def load_first_frame_from_bytes(raw: bytes):
145
  img = img.convert("RGB")
146
  return img
147
 
148
- def make_prompt_for_image(image_path: str, user_prompt: str = "Describe the image."):
149
- # JoyCaption-style multimodal GGUFs accept <img>{path}</img>
150
- return f"<img>{image_path}</img>\nUser: {user_prompt}\nAssistant:"
151
 
152
- def generate_caption_from_url(url: str, prompt: str = "Describe the image."):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  if not url:
154
  return "No URL provided."
155
  try:
156
  raw = download_bytes(url)
157
  except Exception as e:
158
  return f"Download error: {e}"
 
 
159
  try:
 
 
 
 
 
160
  img = load_first_frame_from_bytes(raw)
161
  except Exception as e:
162
  return f"Image processing error: {e}"
163
 
164
- tmp_dir = "/tmp/joycap"
165
- os.makedirs(tmp_dir, exist_ok=True)
166
- ts = int(time.time() * 1000)
167
- tmp_path = os.path.join(tmp_dir, f"{ts}.jpg")
168
  try:
169
- img.save(tmp_path, format="JPEG", quality=85)
170
- except Exception as e:
171
- return f"Failed to save temp image: {e}"
172
 
173
- prompt_full = make_prompt_for_image(tmp_path, prompt)
174
  try:
175
- resp = llm.create(
176
- prompt=prompt_full,
177
- max_tokens=256,
178
- temperature=0.2,
179
- top_p=0.95,
180
- stop=["User:", "Assistant:"],
 
 
 
 
181
  )
182
- text = resp.get("choices", [{}])[0].get("text", "").strip()
183
- return text or "No caption generated."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  except Exception as e:
185
  return f"Inference error: {e}"
186
- finally:
187
- try:
188
- os.remove(tmp_path)
189
- except Exception:
190
- pass
191
 
192
- iface = gr.Interface(
 
 
 
 
193
  fn=generate_caption_from_url,
194
  inputs=[
195
- gr.Textbox(label="Image URL", placeholder="https://example.com/photo.jpg"),
196
  gr.Textbox(label="Prompt (optional)", value="Describe the image."),
197
  ],
198
  outputs=gr.Textbox(label="Generated caption"),
199
- title="JoyCaption GGUF (Q4_K)",
200
- description="Runs a quantized JoyCaption GGUF locally via llama.cpp (no external API).",
201
  )
202
 
 
 
 
 
 
203
  if __name__ == "__main__":
204
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
  import os
5
  import io
6
+ import re
7
  import sys
8
+ import time
9
+ import hashlib
10
+ import pathlib
11
  import subprocess
12
+ from typing import Optional
13
+
14
  import requests
15
  from PIL import Image, ImageSequence
16
  import gradio as gr
17
 
18
+ # If you still want to use HF AutoProcessor / LlavaForConditionalGeneration for decoding,
19
+ # keep transformers installed and uncomment the imports below. This file instead uses
20
+ # llama-cpp-python for model inference (GGUF).
21
+ from transformers import AutoProcessor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # ----------------------------------------------------------------------
24
+ # Config: set model URLs and optional checksums
25
+ # ----------------------------------------------------------------------
26
+ MODEL_DIR = pathlib.Path("model")
27
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ # Replace these with your preferred GGUF files (mradermacher or TheBloke variants)
30
+ Q4_K_M_URL = (
31
+ "https://huggingface.co/mradermacher/joycaption-llama/resolve/main/llama-joycaption-q4_k_m.gguf"
32
+ )
33
+ Q4_K_S_URL = (
34
+ "https://huggingface.co/mradermacher/joycaption-llama/resolve/main/llama-joycaption-q4_k_s.gguf"
35
+ )
36
 
37
+ # Optional: set SHA256 checksums to validate downloads (replace with real values)
38
+ Q4_K_M_SHA256: Optional[str] = None
39
+ Q4_K_S_SHA256: Optional[str] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # Generation params
42
+ MAX_NEW_TOKENS = 128
43
+ TEMPERATURE = 0.2
44
+ TOP_P = 0.95
45
+ STOP_STRS = ["\n"]
46
+
47
+ # HF processor/model name used previously for tokenization/chat template
48
+ HF_PROCESSOR_NAME = "fancyfeast/llama-joycaption-beta-one-hf-llava"
49
+ HF_TOKEN = os.getenv("HF_TOKEN") # optional
50
+
51
+ # ----------------------------------------------------------------------
52
+ # Utilities: downloads, checksum, mp4->gif, image load
53
+ # ----------------------------------------------------------------------
54
  def download_bytes(url: str, timeout: int = 30) -> bytes:
55
+ with requests.get(url, stream=True, timeout=timeout) as resp:
56
+ resp.raise_for_status()
57
+ return resp.content
58
+
59
+
60
+ def mp4_to_gif(mp4_bytes: bytes) -> bytes:
61
+ files = {"new-file": ("video.mp4", mp4_bytes, "video/mp4")}
62
+ resp = requests.post(
63
+ "https://s.ezgif.com/video-to-gif",
64
+ files=files,
65
+ data={"file": "video.mp4"},
66
+ timeout=120,
67
+ )
68
+ resp.raise_for_status()
69
+ match = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text)
70
+ if not match:
71
+ match = re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
72
+ if not match:
73
+ raise RuntimeError("Failed to extract GIF URL from ezgif response")
74
+ gif_url = match.group(1)
75
+ if gif_url.startswith("//"):
76
+ gif_url = "https:" + gif_url
77
+ elif gif_url.startswith("/"):
78
+ gif_url = "https://s.ezgif.com" + gif_url
79
+ with requests.get(gif_url, timeout=60) as gif_resp:
80
+ gif_resp.raise_for_status()
81
+ return gif_resp.content
82
 
83
+
84
+ def load_first_frame_from_bytes(raw: bytes) -> Image.Image:
85
  img = Image.open(io.BytesIO(raw))
86
  if getattr(img, "is_animated", False):
87
  img = next(ImageSequence.Iterator(img))
 
89
  img = img.convert("RGB")
90
  return img
91
 
 
 
 
92
 
93
+ def sha256_of_file(path: pathlib.Path) -> str:
94
+ h = hashlib.sha256()
95
+ with open(path, "rb") as f:
96
+ for block in iter(lambda: f.read(65536), b""):
97
+ h.update(block)
98
+ return h.hexdigest()
99
+
100
+
101
+ def download_file(url: str, dest: pathlib.Path, expected_sha256: Optional[str] = None) -> None:
102
+ if dest.is_file():
103
+ if expected_sha256:
104
+ try:
105
+ if sha256_of_file(dest) == expected_sha256:
106
+ return
107
+ except Exception:
108
+ pass
109
+ # remove possibly corrupted/old file
110
+ dest.unlink()
111
+ print(f"Downloading model from {url} -> {dest}")
112
+ with requests.get(url, stream=True, timeout=120) as r:
113
+ r.raise_for_status()
114
+ total = int(r.headers.get("content-length", 0) or 0)
115
+ downloaded = 0
116
+ with open(dest, "wb") as f:
117
+ for chunk in r.iter_content(chunk_size=8192):
118
+ if not chunk:
119
+ continue
120
+ f.write(chunk)
121
+ downloaded += len(chunk)
122
+ if total:
123
+ pct = downloaded * 100 // total
124
+ print(f"\r{dest.name}: {pct}% ", end="", flush=True)
125
+ print()
126
+ if expected_sha256:
127
+ got = sha256_of_file(dest)
128
+ if got != expected_sha256:
129
+ raise ValueError(f"Checksum mismatch for {dest}: got {got}, expected {expected_sha256}")
130
+
131
+
132
+ # ----------------------------------------------------------------------
133
+ # llama-cpp loading + automated rebuild
134
+ # ----------------------------------------------------------------------
135
+ def rebuild_llama_cpp() -> None:
136
+ env = os.environ.copy()
137
+ env["PIP_NO_BINARY"] = "llama-cpp-python"
138
+ # upgrade pip then reinstall
139
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "pip"], env=env)
140
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "cmake", "wheel", "setuptools"], env=env)
141
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "llama-cpp-python"], env=env)
142
+
143
+
144
+ def try_load_gguf() -> "llama_cpp.Llama":
145
+ """
146
+ Download Q4_K_M then Q4_K_S and attempt to load with llama_cpp.Llama.
147
+ If both fail, rebuild llama-cpp-python from source and retry once.
148
+ """
149
+ import importlib
150
+ from pathlib import Path
151
+
152
+ candidates = [
153
+ (Q4_K_M_URL, MODEL_DIR / "llama-joycaption-q4_k_m.gguf", Q4_K_M_SHA256),
154
+ (Q4_K_S_URL, MODEL_DIR / "llama-joycaption-q4_k_s.gguf", Q4_K_S_SHA256),
155
+ ]
156
+
157
+ last_exc = None
158
+
159
+ for url, path, sha in candidates:
160
+ try:
161
+ download_file(url, path, expected_sha256=sha)
162
+ print(f"Attempting to load GGUF: {path}")
163
+ # lazy import so we catch import-time errors before rebuild attempt
164
+ llama_cpp = importlib.import_module("llama_cpp")
165
+ Llama = getattr(llama_cpp, "Llama")
166
+ # minimal params; adjust n_ctx or gpu settings if available
167
+ lm = Llama(model_path=str(path), n_ctx=2048, n_gpu_layers=0, verbose=False)
168
+ print("Model loaded successfully.")
169
+ return lm
170
+ except Exception as e:
171
+ print(f"Loading {path.name} failed: {e}")
172
+ last_exc = e
173
+
174
+ # If both failed, attempt a rebuild then retry first candidate once
175
+ try:
176
+ print("Both GGUF variants failed to load. Rebuilding llama-cpp-python from source...")
177
+ rebuild_llama_cpp()
178
+ except Exception as e:
179
+ print(f"Rebuild failed: {e}")
180
+ raise last_exc or e
181
+
182
+ # After rebuild, import & load primary model
183
+ try:
184
+ import importlib
185
+
186
+ llama_cpp = importlib.reload(importlib.import_module("llama_cpp"))
187
+ Llama = getattr(llama_cpp, "Llama")
188
+ path = candidates[0][1]
189
+ if not path.is_file():
190
+ download_file(candidates[0][0], path, expected_sha256=candidates[0][2])
191
+ lm = Llama(model_path=str(path), n_ctx=2048, n_gpu_layers=0, verbose=False)
192
+ print("Model loaded successfully after rebuild.")
193
+ return lm
194
+ except Exception as e:
195
+ print(f"Load after rebuild failed: {e}")
196
+ raise e
197
+
198
+
199
+ # ----------------------------------------------------------------------
200
+ # Processor and model wrapper
201
+ # ----------------------------------------------------------------------
202
+ # We keep AutoProcessor to reuse the chat template behaviour you used previously.
203
+ processor = AutoProcessor.from_pretrained(
204
+ HF_PROCESSOR_NAME,
205
+ trust_remote_code=True,
206
+ num_additional_image_tokens=1,
207
+ **({} if not HF_TOKEN else {"token": HF_TOKEN}),
208
+ )
209
+
210
+ # Lazy model holder
211
+ class ModelWrapper:
212
+ def __init__(self):
213
+ self.llm = None # llama-cpp Llama instance
214
+
215
+ def ensure_model(self):
216
+ if self.llm is None:
217
+ self.llm = try_load_gguf()
218
+
219
+ def generate(self, prompt: str, max_new_tokens: int = MAX_NEW_TOKENS):
220
+ self.ensure_model()
221
+ # llama-cpp-python call style: model(prompt=..., max_tokens=..., temperature=..., top_p=..., stop=...)
222
+ out = self.llm(prompt, max_tokens=max_new_tokens, temperature=TEMPERATURE, top_p=TOP_P, stop=STOP_STRS)
223
+ # llama-cpp-python responses usually in out["choices"][0]["text"]
224
+ txt = out.get("choices", [{}])[0].get("text", "")
225
+ return txt
226
+
227
+ MODEL = ModelWrapper()
228
+
229
+ # ----------------------------------------------------------------------
230
+ # Inference: convert URL->image, build prompt via processor chat template, run llama-cpp
231
+ # ----------------------------------------------------------------------
232
+ def generate_caption_from_url(url: str, prompt: str = "Describe the image.") -> str:
233
  if not url:
234
  return "No URL provided."
235
  try:
236
  raw = download_bytes(url)
237
  except Exception as e:
238
  return f"Download error: {e}"
239
+
240
+ lower = url.lower().split("?")[0]
241
  try:
242
+ if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1:
243
+ try:
244
+ raw = mp4_to_gif(raw)
245
+ except Exception as e:
246
+ return f"MP4→GIF conversion failed: {e}"
247
  img = load_first_frame_from_bytes(raw)
248
  except Exception as e:
249
  return f"Image processing error: {e}"
250
 
251
+ # Resize to a conservative size (512) expected by many VLMs
 
 
 
252
  try:
253
+ img = img.resize((512, 512), resample=Image.BICUBIC)
254
+ except Exception:
255
+ pass
256
 
 
257
  try:
258
+ # Produce conversation so the processor inserts image token correctly
259
+ conversation = [
260
+ {"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt}]}
261
+ ]
262
+ inputs = processor.apply_chat_template(
263
+ conversation,
264
+ add_generation_prompt=True,
265
+ return_tensors="pt",
266
+ return_dict=True,
267
+ images=img,
268
  )
269
+
270
+ # The processor provides a textual input (input_ids). We'll decode it to a plain prompt
271
+ # string to feed llama-cpp. The processor has a `decode` helper; else we build a simple prompt.
272
+ # Use processor.tokenizer if available to decode input_ids -> text.
273
+ text_prompt = None
274
+ if hasattr(processor, "tokenizer") and getattr(inputs, "input_ids", None) is not None:
275
+ try:
276
+ # inputs may be dict tensors; extract CPU numpy/torch then decode
277
+ input_ids = inputs["input_ids"][0]
278
+ # convert to list of ints if tensor
279
+ import torch
280
+ if hasattr(input_ids, "cpu"):
281
+ ids = input_ids.cpu().numpy().tolist()
282
+ else:
283
+ ids = list(input_ids)
284
+ text_prompt = processor.tokenizer.decode(ids, skip_special_tokens=True)
285
+ except Exception:
286
+ text_prompt = None
287
+
288
+ if not text_prompt:
289
+ # Fallback: simple textual template with a tag where the image is referenced.
290
+ text_prompt = f"<img> [image here] </img>\n{prompt}\nAnswer:"
291
+
292
+ # Debug prints (Space logs)
293
+ print("Prompt to model (truncated):", text_prompt[:512].replace("\n", "\\n"))
294
+
295
+ out_text = MODEL.generate(text_prompt, max_new_tokens=MAX_NEW_TOKENS)
296
+ # Postprocess: strip, remove accidental stop tokens, etc.
297
+ return out_text.strip()
298
  except Exception as e:
299
  return f"Inference error: {e}"
 
 
 
 
 
300
 
301
+
302
+ # ----------------------------------------------------------------------
303
+ # Gradio UI (URL + prompt -> text)
304
+ # ----------------------------------------------------------------------
305
+ gradio_kwargs = dict(
306
  fn=generate_caption_from_url,
307
  inputs=[
308
+ gr.Textbox(label="Image / GIF / MP4 URL", placeholder="https://example.com/photo.jpg"),
309
  gr.Textbox(label="Prompt (optional)", value="Describe the image."),
310
  ],
311
  outputs=gr.Textbox(label="Generated caption"),
312
+ title="JoyCaption - URL input (GGUF + auto-rebuild)",
313
+ description="Paste a direct link to an image/GIF/MP4 (MP4 will be converted).",
314
  )
315
 
316
+ try:
317
+ iface = gr.Interface(**gradio_kwargs, allow_flagging="never")
318
+ except TypeError:
319
+ iface = gr.Interface(**gradio_kwargs)
320
+
321
  if __name__ == "__main__":
322
+ try:
323
+ iface.launch(server_name="0.0.0.0", server_port=7860)
324
+ finally:
325
+ try:
326
+ import asyncio
327
+ loop = asyncio.get_event_loop()
328
+ if not loop.is_closed():
329
+ loop.close()
330
+ except Exception:
331
+ pass