Spaces:

moslem
/

IMG

Sleeping

App Files Files Community

moslem commited on Oct 16, 2025

Commit

3d4fa97

verified ·

1 Parent(s): 5591cd5

Upload 2 files

Browse files

Files changed (2) hide show

app.py +161 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# app.py
+"""
+Image Captioning demo with Gradio + Hugging Face transformers.
+Environment variables:
+  MODEL_ID                - huggingface model id (default: Salesforce/blip-image-captioning-base)
+  TRUST_REMOTE_CODE       - "true"/"false" to allow custom repo code (default: false)
+  HUGGINGFACE_HUB_TOKEN   - optional, if your model is private
+Run:
+  python app.py
+"""
+import os
+import logging
+from typing import Optional
+import torch
+from PIL import Image
+from transformers import pipeline
+import gradio as gr
+# ----------------------------
+# Configuration & logging
+# ----------------------------
+MODEL_ID = os.environ.get("MODEL_ID", "Salesforce/blip-image-captioning-base")
+TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "false").lower() in ("1", "true", "yes")
+HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")  # optional (for private models)
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("image-caption-gradio")
+# ----------------------------
+# Device helper
+# ----------------------------
+def get_pipeline_device() -> int:
+    """
+    Return device index for transformers pipeline:
+      0 (GPU) if available, else -1 (CPU)
+    """
+    return 0 if torch.cuda.is_available() else -1
+# ----------------------------
+# Load pipeline (global)
+# ----------------------------
+caption_pipe = None
+_load_error: Optional[str] = None
+def load_caption_pipeline():
+    """
+    Load the image-to-text pipeline once (global).
+    Uses HUGGINGFACE_HUB_TOKEN if set for private models.
+    """
+    global caption_pipe, _load_error
+    if caption_pipe is not None or _load_error:
+        return
+    device = get_pipeline_device()
+    logger.info("Loading model '%s' (trust_remote_code=%s) on device %s", MODEL_ID, TRUST_REMOTE_CODE, device)
+    try:
+        # If HUB_TOKEN is provided, transformers/huggingface_hub will pick it up from env.
+        caption_pipe = pipeline(
+            "image-to-text",
+            model=MODEL_ID,
+            device=device,
+            trust_remote_code=TRUST_REMOTE_CODE,
+        )
+        logger.info("Model loaded successfully.")
+    except Exception as e:
+        _load_error = str(e)
+        logger.exception("Failed to load model: %s", e)
+# Preload model at startup (best-effort)
+load_caption_pipeline()
+# ----------------------------
+# Inference function used by Gradio
+# ----------------------------
+def caption_image(img: Image.Image) -> str:
+    """
+    Run the captioning pipeline on a PIL image and return the caption text.
+    """
+    if _load_error:
+        # If loading failed earlier, return the error for the UI
+        return f"Error loading model: {_load_error}"
+    if caption_pipe is None:
+        # Try loading lazily if not loaded yet
+        load_caption_pipeline()
+        if caption_pipe is None:
+            return f"Model not loaded. Try again in a moment."
+    try:
+        outputs = caption_pipe(img)
+        # pipeline usually returns a list of dicts with 'generated_text'
+        if isinstance(outputs, list) and outputs:
+            caption = outputs[0].get("generated_text") or outputs[0].get("caption") or str(outputs[0])
+        else:
+            caption = str(outputs)
+        return caption.strip()
+    except Exception as e:
+        logger.exception("Captioning error: %s", e)
+        return f"Captioning failed: {e}"
+# ----------------------------
+# Gradio UI
+# ----------------------------
+title = "Image Captioning"
+description = (
+    "Upload an image and the model will generate a short descriptive caption. "
+    "Model: <b>{}</b>. ".format(MODEL_ID)
+)
+examples = [
+    # If you want, place example image paths here (local files in repo), or leave empty.
+    # ["examples/cat.jpg"],
+]
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"# {title}")
+    gr.Markdown(description)
+    # Status row
+    with gr.Row():
+        model_info = gr.Textbox(label="Model", value=MODEL_ID, interactive=False)
+        device_info = gr.Textbox(label="Device", value=("cuda" if torch.cuda.is_available() else "cpu"), interactive=False)
+        status_info = gr.Textbox(label="Model status", value=("loaded" if caption_pipe is not None and not _load_error else f"error: {_load_error}" if _load_error else "loading"), interactive=False)
+    gr.Markdown("## Upload image")
+    with gr.Row():
+        image_input = gr.Image(type="pil", label="Image", tool="editor")
+        with gr.Column():
+            run_btn = gr.Button("Generate Caption")
+            clear_btn = gr.Button("Clear")
+            gr.Markdown("**Tips:** use clear photos; try different crops in the editor for better captions.")
+    output = gr.Textbox(label="Caption", interactive=False)
+    # Example images (optional)
+    if examples:
+        gr.Examples(examples=examples, inputs=image_input, label="Examples")
+    # Actions
+    run_btn.click(fn=caption_image, inputs=image_input, outputs=output)
+    clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, output])
+    gr.Markdown("---")
+    gr.Markdown("**Notes**: If the model is private, set `HUGGINGFACE_HUB_TOKEN` environment variable. "
+                "For large models you may need GPU and more memory.")
+# ----------------------------
+# Launch
+# ----------------------------
+if __name__ == "__main__":
+    # Respect PORT env var (used by Hugging Face Spaces)
+    port = int(os.environ.get("PORT", 7860))
+    demo.launch(server_name="0.0.0.0", server_port=port, share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio>=3.40.0
+transformers>=4.30.0
+torch>=2.0.0
+pillow>=9.0.0
+# Optional extras (uncomment if needed by the model)
+# accelerate>=0.20.3
+# diffusers>=0.11.0
+# safetensors>=0.3.0