Spaces:

moslem
/

IMG

Sleeping

App Files Files Community

moslem commited on Oct 16, 2025

Commit

2ce6e8c

verified ·

1 Parent(s): 3d4fa97

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -161

app.py CHANGED Viewed

@@ -1,161 +1,137 @@
-# app.py
-"""
-Image Captioning demo with Gradio + Hugging Face transformers.
-Environment variables:
-  MODEL_ID                - huggingface model id (default: Salesforce/blip-image-captioning-base)
-  TRUST_REMOTE_CODE       - "true"/"false" to allow custom repo code (default: false)
-  HUGGINGFACE_HUB_TOKEN   - optional, if your model is private
-Run:
-  python app.py
-"""
-import os
-import logging
-from typing import Optional
-import torch
-from PIL import Image
-from transformers import pipeline
-import gradio as gr
-# ----------------------------
-# Configuration & logging
-# ----------------------------
-MODEL_ID = os.environ.get("MODEL_ID", "Salesforce/blip-image-captioning-base")
-TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "false").lower() in ("1", "true", "yes")
-HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")  # optional (for private models)
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("image-caption-gradio")
-# ----------------------------
-# Device helper
-# ----------------------------
-def get_pipeline_device() -> int:
-    """
-    Return device index for transformers pipeline:
-      0 (GPU) if available, else -1 (CPU)
-    """
-    return 0 if torch.cuda.is_available() else -1
-# ----------------------------
-# Load pipeline (global)
-# ----------------------------
-caption_pipe = None
-_load_error: Optional[str] = None
-def load_caption_pipeline():
-    """
-    Load the image-to-text pipeline once (global).
-    Uses HUGGINGFACE_HUB_TOKEN if set for private models.
-    """
-    global caption_pipe, _load_error
-    if caption_pipe is not None or _load_error:
-        return
-    device = get_pipeline_device()
-    logger.info("Loading model '%s' (trust_remote_code=%s) on device %s", MODEL_ID, TRUST_REMOTE_CODE, device)
-    try:
-        # If HUB_TOKEN is provided, transformers/huggingface_hub will pick it up from env.
-        caption_pipe = pipeline(
-            "image-to-text",
-            model=MODEL_ID,
-            device=device,
-            trust_remote_code=TRUST_REMOTE_CODE,
-        )
-        logger.info("Model loaded successfully.")
-    except Exception as e:
-        _load_error = str(e)
-        logger.exception("Failed to load model: %s", e)
-# Preload model at startup (best-effort)
-load_caption_pipeline()
-# ----------------------------
-# Inference function used by Gradio
-# ----------------------------
-def caption_image(img: Image.Image) -> str:
-    """
-    Run the captioning pipeline on a PIL image and return the caption text.
-    """
-    if _load_error:
-        # If loading failed earlier, return the error for the UI
-        return f"Error loading model: {_load_error}"
-    if caption_pipe is None:
-        # Try loading lazily if not loaded yet
-        load_caption_pipeline()
-        if caption_pipe is None:
-            return f"Model not loaded. Try again in a moment."
-    try:
-        outputs = caption_pipe(img)
-        # pipeline usually returns a list of dicts with 'generated_text'
-        if isinstance(outputs, list) and outputs:
-            caption = outputs[0].get("generated_text") or outputs[0].get("caption") or str(outputs[0])
-        else:
-            caption = str(outputs)
-        return caption.strip()
-    except Exception as e:
-        logger.exception("Captioning error: %s", e)
-        return f"Captioning failed: {e}"
-# ----------------------------
-# Gradio UI
-# ----------------------------
-title = "Image Captioning"
-description = (
-    "Upload an image and the model will generate a short descriptive caption. "
-    "Model: <b>{}</b>. ".format(MODEL_ID)
-)
-examples = [
-    # If you want, place example image paths here (local files in repo), or leave empty.
-    # ["examples/cat.jpg"],
-]
-with gr.Blocks(title=title) as demo:
-    gr.Markdown(f"# {title}")
-    gr.Markdown(description)
-    # Status row
-    with gr.Row():
-        model_info = gr.Textbox(label="Model", value=MODEL_ID, interactive=False)
-        device_info = gr.Textbox(label="Device", value=("cuda" if torch.cuda.is_available() else "cpu"), interactive=False)
-        status_info = gr.Textbox(label="Model status", value=("loaded" if caption_pipe is not None and not _load_error else f"error: {_load_error}" if _load_error else "loading"), interactive=False)
-    gr.Markdown("## Upload image")
-    with gr.Row():
-        image_input = gr.Image(type="pil", label="Image", tool="editor")
-        with gr.Column():
-            run_btn = gr.Button("Generate Caption")
-            clear_btn = gr.Button("Clear")
-            gr.Markdown("**Tips:** use clear photos; try different crops in the editor for better captions.")
-    output = gr.Textbox(label="Caption", interactive=False)
-    # Example images (optional)
-    if examples:
-        gr.Examples(examples=examples, inputs=image_input, label="Examples")
-    # Actions
-    run_btn.click(fn=caption_image, inputs=image_input, outputs=output)
-    clear_btn.click(fn=lambda: (None, ""), inputs=None, outputs=[image_input, output])
-    gr.Markdown("---")
-    gr.Markdown("**Notes**: If the model is private, set `HUGGINGFACE_HUB_TOKEN` environment variable. "
-                "For large models you may need GPU and more memory.")
-# ----------------------------
-# Launch
-# ----------------------------
-if __name__ == "__main__":
-    # Respect PORT env var (used by Hugging Face Spaces)
-    port = int(os.environ.get("PORT", 7860))
-    demo.launch(server_name="0.0.0.0", server_port=port, share=False)

+# app.py
+"""
+Image Captioning demo with Gradio + Hugging Face transformers.
+Environment variables:
+  MODEL_ID                - huggingface model id (default: Salesforce/blip-image-captioning-base)
+  TRUST_REMOTE_CODE       - "true"/"false" to allow custom repo code (default: false)
+  HUGGINGFACE_HUB_TOKEN   - optional, if your model is private
+"""
+import os
+import logging
+from typing import Optional
+import torch
+from PIL import Image
+from transformers import pipeline
+import gradio as gr
+# ----------------------------
+# Configuration & logging
+# ----------------------------
+MODEL_ID = os.environ.get("MODEL_ID", "Salesforce/blip-image-captioning-base")
+TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "false").lower() in ("1", "true", "yes")
+HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")  # optional (for private models)
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("image-caption-gradio")
+logger.info("Gradio version: %s", gr.__version__)
+# ----------------------------
+# Device helper
+# ----------------------------
+def get_pipeline_device() -> int:
+    """Return device index for transformers pipeline: 0 (GPU) or -1 (CPU)."""
+    return 0 if torch.cuda.is_available() else -1
+# ----------------------------
+# Load pipeline (global)
+# ----------------------------
+caption_pipe = None
+_load_error: Optional[str] = None
+def load_caption_pipeline():
+    """Load the image-to-text pipeline once (global)."""
+    global caption_pipe, _load_error
+    if caption_pipe is not None or _load_error:
+        return
+    device = get_pipeline_device()
+    logger.info("Loading model '%s' (trust_remote_code=%s) on device %s", MODEL_ID, TRUST_REMOTE_CODE, device)
+    try:
+        caption_pipe = pipeline(
+            "image-to-text",
+            model=MODEL_ID,
+            device=device,
+            trust_remote_code=TRUST_REMOTE_CODE,
+        )
+        logger.info("Model loaded successfully.")
+    except Exception as e:
+        _load_error = str(e)
+        logger.exception("Failed to load model: %s", e)
+# Preload model at startup (best-effort)
+load_caption_pipeline()
+# ----------------------------
+# Inference function used by Gradio
+# ----------------------------
+def caption_image(img: Image.Image) -> str:
+    """Run the captioning pipeline on a PIL image and return the caption text."""
+    if _load_error:
+        return f"Error loading model: {_load_error}"
+    if caption_pipe is None:
+        load_caption_pipeline()
+        if caption_pipe is None:
+            return "Model not loaded yet. Please try again in a moment."
+    try:
+        outputs = caption_pipe(img)
+        if isinstance(outputs, list) and outputs:
+            caption = outputs[0].get("generated_text") or outputs[0].get("caption") or str(outputs[0])
+        else:
+            caption = str(outputs)
+        return caption.strip()
+    except Exception as e:
+        logger.exception("Captioning error: %s", e)
+        return f"Captioning failed: {e}"
+# ----------------------------
+# Gradio UI
+# ----------------------------
+title = "Image Captioning"
+description = (
+    "Upload an image and the model will generate a short descriptive caption.\n"
+    f"Model: {MODEL_ID}"
+)
+with gr.Blocks(title=title) as demo:
+    gr.Markdown(f"# {title}")
+    gr.Markdown(description)
+    with gr.Row():
+        model_info = gr.Textbox(label="Model", value=MODEL_ID, interactive=False)
+        device_info = gr.Textbox(label="Device", value=("cuda" if torch.cuda.is_available() else "cpu"), interactive=False)
+        status_info = gr.Textbox(label="Model status", value=("loaded" if caption_pipe is not None and not _load_error else f\"error: {_load_error}\" if _load_error else "loading"), interactive=False)
+    gr.Markdown("## Upload image")
+    with gr.Row():
+        # NOTE: removed 'tool' kw to support more Gradio versions
+        image_input = gr.Image(type="pil", label="Image")
+        with gr.Column():
+            run_btn = gr.Button("Generate Caption")
+            clear_btn = gr.Button("Clear")
+    output = gr.Textbox(label="Caption", interactive=False)
+    run_btn.click(fn=caption_image, inputs=image_input, outputs=output)
+    # clear button: resets output and clears image (Gradio sometimes resets image by returning None)
+    def _clear():
+        return None, ""
+    clear_btn.click(fn=_clear, inputs=None, outputs=[image_input, output])
+    gr.Markdown("---")
+    gr.Markdown("Notes: If the model is private, set HUGGINGFACE_HUB_TOKEN. Large models need more memory/GPU.")
+# ----------------------------
+# Launch
+# ----------------------------
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))
+    demo.launch(server_name="0.0.0.0", server_port=port, share=False)