Spaces:

Hug0endob
/

Joycaption-basic

Build error

App Files Files Community

Hug0endob commited on Dec 14, 2025

Commit

09c7c56

verified ·

1 Parent(s): 7766a5c

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -11

app.py CHANGED Viewed

@@ -7,22 +7,28 @@ from PIL import Image, ImageSequence
 from transformers import AutoProcessor, LlavaForConditionalGeneration
 import gradio as gr
-MODEL_NAME = "fancyfeast/llama-joycaption-beta-one-hf-llava"  # public repo
-# Optional: read HF token from secrets if you set HF_TOKEN in Space (not required for public repo)
-HF_TOKEN = os.getenv("HF_TOKEN")
-# Load processor and model (CPU only)
 token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
 processor = AutoProcessor.from_pretrained(MODEL_NAME, **token_arg)
 llava_model = LlavaForConditionalGeneration.from_pretrained(
     MODEL_NAME,
     device_map="cpu",
     torch_dtype=torch.bfloat16,
-    **token_arg
 )
 llava_model.eval()
 def download_bytes(url: str, timeout: int = 30) -> bytes:
     resp = requests.get(url, stream=True, timeout=timeout)
     resp.raise_for_status()
@@ -39,7 +45,6 @@ def mp4_to_gif(mp4_bytes: bytes) -> bytes:
     resp.raise_for_status()
     match = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text)
     if not match:
-        # try to extract via other img tags
         match = re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
     if not match:
         raise RuntimeError("Failed to extract GIF URL from ezgif response")
@@ -60,6 +65,9 @@ def load_first_frame_from_bytes(raw: bytes) -> Image.Image:
         img = img.convert("RGB")
     return img
 def generate_caption_from_url(url: str, prompt: str = "Describe the image.") -> str:
     if not url:
         return "No URL provided."
@@ -70,7 +78,8 @@ def generate_caption_from_url(url: str, prompt: str = "Describe the image.") ->
     lower = url.lower().split("?")[0]
     try:
-        if lower.endswith(".mp4") or b"ftyp" in raw[:16].lower():
             try:
                 raw = mp4_to_gif(raw)
             except Exception as e:
@@ -89,17 +98,25 @@ def generate_caption_from_url(url: str, prompt: str = "Describe the image.") ->
     except Exception as e:
         return f"Inference error: {e}"
-iface = gr.Interface(
     fn=generate_caption_from_url,
     inputs=[
         gr.Textbox(label="Image / GIF / MP4 URL", placeholder="https://example.com/photo.jpg"),
         gr.Textbox(label="Prompt (optional)", value="Describe the image."),
     ],
     outputs=gr.Textbox(label="Generated caption"),
-    title="JoyCaption (public fancyfeast) - URL input",
     description="Paste a direct link to an image, GIF, or MP4. MP4 files are converted to GIF via ezgif.com; the first frame is captioned.",
-    allow_flagging="never",
 )
 if __name__ == "__main__":
     iface.launch()

 from transformers import AutoProcessor, LlavaForConditionalGeneration
 import gradio as gr
+# ---------------------------
+# Config
+# ---------------------------
+MODEL_NAME = "fancyfeast/llama-joycaption-beta-one-hf-llava"
+HF_TOKEN = os.getenv("HF_TOKEN")  # optional secret in Space settings
+# ---------------------------
+# Load model & processor
+# ---------------------------
 token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
 processor = AutoProcessor.from_pretrained(MODEL_NAME, **token_arg)
 llava_model = LlavaForConditionalGeneration.from_pretrained(
     MODEL_NAME,
     device_map="cpu",
     torch_dtype=torch.bfloat16,
+    **token_arg,
 )
 llava_model.eval()
+# ---------------------------
+# Helpers
+# ---------------------------
 def download_bytes(url: str, timeout: int = 30) -> bytes:
     resp = requests.get(url, stream=True, timeout=timeout)
     resp.raise_for_status()
     resp.raise_for_status()
     match = re.search(r'<img[^>]+src="([^"]+\.gif)"', resp.text)
     if not match:
         match = re.search(r'src="([^"]+?/tmp/[^"]+\.gif)"', resp.text)
     if not match:
         raise RuntimeError("Failed to extract GIF URL from ezgif response")
         img = img.convert("RGB")
     return img
+# ---------------------------
+# Main inference
+# ---------------------------
 def generate_caption_from_url(url: str, prompt: str = "Describe the image.") -> str:
     if not url:
         return "No URL provided."
     lower = url.lower().split("?")[0]
     try:
+        # crude MP4 detection by extension or ftyp box signature
+        if lower.endswith(".mp4") or raw[:16].lower().find(b"ftyp") != -1:
             try:
                 raw = mp4_to_gif(raw)
             except Exception as e:
     except Exception as e:
         return f"Inference error: {e}"
+# ---------------------------
+# Gradio UI (compatible init)
+# ---------------------------
+# Use try/except to support Gradio versions that don't accept allow_flagging
+gradio_kwargs = dict(
     fn=generate_caption_from_url,
     inputs=[
         gr.Textbox(label="Image / GIF / MP4 URL", placeholder="https://example.com/photo.jpg"),
         gr.Textbox(label="Prompt (optional)", value="Describe the image."),
     ],
     outputs=gr.Textbox(label="Generated caption"),
+    title="JoyCaption (fancyfeast) - URL input",
     description="Paste a direct link to an image, GIF, or MP4. MP4 files are converted to GIF via ezgif.com; the first frame is captioned.",
 )
+try:
+    iface = gr.Interface(**gradio_kwargs, allow_flagging="never")
+except TypeError:
+    iface = gr.Interface(**gradio_kwargs)
 if __name__ == "__main__":
     iface.launch()