Spaces:

Hug0endob
/

Joycaption-basic

Build error

App Files Files Community

Hug0endob commited on Dec 14, 2025

Commit

71b45b9

verified ·

1 Parent(s): b89f943

Create app.py

Browse files

Files changed (1) hide show

app.py +58 -0

app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+from transformers import AutoProcessor, LlavaForConditionalGeneration
+import gradio as gr
+from PIL import Image
+import requests
+# -------------------------------------------------
+# Model identifier (replace if you fork or use a different checkpoint)
+# -------------------------------------------------
+MODEL_NAME = "fpgaminer/joycaption-llama3.1-8b"   # 8‑B checkpoint fits comfortably on CPU
+# -------------------------------------------------
+# Load processor and model (CPU only)
+# -------------------------------------------------
+processor = AutoProcessor.from_pretrained(MODEL_NAME)
+# `device_map="cpu"` forces everything onto the CPU
+llava_model = LlavaForConditionalGeneration.from_pretrained(
+    MODEL_NAME,
+    device_map="cpu",
+    torch_dtype=torch.bfloat16,   # native dtype for this model
+)
+llava_model.eval()
+# -------------------------------------------------
+# Inference function used by Gradio
+# -------------------------------------------------
+def generate_caption(image: Image.Image, prompt: str = "Describe the image.") -> str:
+    # Prepare inputs for the model
+    inputs = processor(images=image, text=prompt, return_tensors="pt")
+    inputs = {k: v.to(llava_model.device) for k, v in inputs.items()}
+    # Generate up to 64 new tokens (adjust if you want longer captions)
+    with torch.no_grad():
+        output_ids = llava_model.generate(**inputs, max_new_tokens=64)
+    # Decode to plain text
+    caption = processor.decode(output_ids[0], skip_special_tokens=True)
+    return caption
+# -------------------------------------------------
+# Gradio UI
+# -------------------------------------------------
+iface = gr.Interface(
+    fn=generate_caption,
+    inputs=[
+        gr.Image(type="pil", label="Upload an image"),
+        gr.Textbox(label="Prompt (optional)", value="Describe the image.")
+    ],
+    outputs=gr.Textbox(label="Generated caption"),
+    title="JoyCaption (CPU‑only) Demo",
+    description="Upload an image and let the JoyCaption model generate a caption. Runs entirely on the free CPU tier.",
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    iface.launch()