Spaces:

moslem
/

IMG

Sleeping

App Files Files Community

moslem commited on Oct 16, 2025

Commit

0b8d5a4

verified ·

1 Parent(s): 2ce6e8c

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -128

app.py CHANGED Viewed

@@ -1,137 +1,52 @@
-# app.py
-"""
-Image Captioning demo with Gradio + Hugging Face transformers.
-Environment variables:
-  MODEL_ID                - huggingface model id (default: Salesforce/blip-image-captioning-base)
-  TRUST_REMOTE_CODE       - "true"/"false" to allow custom repo code (default: false)
-  HUGGINGFACE_HUB_TOKEN   - optional, if your model is private
-"""
-import os
-import logging
-from typing import Optional
-import torch
-from PIL import Image
-from transformers import pipeline
 import gradio as gr
-# ----------------------------
-# Configuration & logging
-# ----------------------------
-MODEL_ID = os.environ.get("MODEL_ID", "Salesforce/blip-image-captioning-base")
-TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "false").lower() in ("1", "true", "yes")
-HUB_TOKEN = os.environ.get("HUGGINGFACE_HUB_TOKEN")  # optional (for private models)
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("image-caption-gradio")
-logger.info("Gradio version: %s", gr.__version__)
-# ----------------------------
-# Device helper
-# ----------------------------
-def get_pipeline_device() -> int:
-    """Return device index for transformers pipeline: 0 (GPU) or -1 (CPU)."""
-    return 0 if torch.cuda.is_available() else -1
-# ----------------------------
-# Load pipeline (global)
-# ----------------------------
-caption_pipe = None
-_load_error: Optional[str] = None
-def load_caption_pipeline():
-    """Load the image-to-text pipeline once (global)."""
-    global caption_pipe, _load_error
-    if caption_pipe is not None or _load_error:
-        return
-    device = get_pipeline_device()
-    logger.info("Loading model '%s' (trust_remote_code=%s) on device %s", MODEL_ID, TRUST_REMOTE_CODE, device)
-    try:
-        caption_pipe = pipeline(
-            "image-to-text",
-            model=MODEL_ID,
-            device=device,
-            trust_remote_code=TRUST_REMOTE_CODE,
-        )
-        logger.info("Model loaded successfully.")
-    except Exception as e:
-        _load_error = str(e)
-        logger.exception("Failed to load model: %s", e)
-# Preload model at startup (best-effort)
-load_caption_pipeline()
-# ----------------------------
-# Inference function used by Gradio
-# ----------------------------
-def caption_image(img: Image.Image) -> str:
-    """Run the captioning pipeline on a PIL image and return the caption text."""
     if _load_error:
-        return f"Error loading model: {_load_error}"
-    if caption_pipe is None:
-        load_caption_pipeline()
-        if caption_pipe is None:
-            return "Model not loaded yet. Please try again in a moment."
-    try:
-        outputs = caption_pipe(img)
-        if isinstance(outputs, list) and outputs:
-            caption = outputs[0].get("generated_text") or outputs[0].get("caption") or str(outputs[0])
-        else:
-            caption = str(outputs)
-        return caption.strip()
-    except Exception as e:
-        logger.exception("Captioning error: %s", e)
-        return f"Captioning failed: {e}"
-# ----------------------------
-# Gradio UI
-# ----------------------------
-title = "Image Captioning"
-description = (
-    "Upload an image and the model will generate a short descriptive caption.\n"
-    f"Model: {MODEL_ID}"
 )
-with gr.Blocks(title=title) as demo:
-    gr.Markdown(f"# {title}")
-    gr.Markdown(description)
-    with gr.Row():
-        model_info = gr.Textbox(label="Model", value=MODEL_ID, interactive=False)
-        device_info = gr.Textbox(label="Device", value=("cuda" if torch.cuda.is_available() else "cpu"), interactive=False)
-        status_info = gr.Textbox(label="Model status", value=("loaded" if caption_pipe is not None and not _load_error else f\"error: {_load_error}\" if _load_error else "loading"), interactive=False)
-    gr.Markdown("## Upload image")
     with gr.Row():
-        # NOTE: removed 'tool' kw to support more Gradio versions
-        image_input = gr.Image(type="pil", label="Image")
-        with gr.Column():
-            run_btn = gr.Button("Generate Caption")
-            clear_btn = gr.Button("Clear")
-    output = gr.Textbox(label="Caption", interactive=False)
-    run_btn.click(fn=caption_image, inputs=image_input, outputs=output)
-    # clear button: resets output and clears image (Gradio sometimes resets image by returning None)
-    def _clear():
-        return None, ""
-    clear_btn.click(fn=_clear, inputs=None, outputs=[image_input, output])
-    gr.Markdown("---")
-    gr.Markdown("Notes: If the model is private, set HUGGINGFACE_HUB_TOKEN. Large models need more memory/GPU.")
-# ----------------------------
-# Launch
-# ----------------------------
-if __name__ == "__main__":
-    port = int(os.environ.get("PORT", 7860))
-    demo.launch(server_name="0.0.0.0", server_port=port, share=False)

 import gradio as gr
+import torch
+from transformers import BlipProcessor, BlipForConditionalGeneration
+MODEL_NAME = "Salesforce/blip-image-captioning-base"
+# --- مدل را بارگیری کن
+try:
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    caption_processor = BlipProcessor.from_pretrained(MODEL_NAME)
+    caption_model = BlipForConditionalGeneration.from_pretrained(MODEL_NAME)
+    caption_model.to(device)
+    _load_error = None
+except Exception as e:
+    caption_processor = None
+    caption_model = None
+    _load_error = str(e)
+# --- تابع captioning
+def caption_image(image):
     if _load_error:
+        return f"❌ Model load error: {_load_error}"
+    if image is None:
+        return "⚠️ لطفاً یک تصویر آپلود کنید."
+    inputs = caption_processor(image, return_tensors="pt").to(device)
+    out = caption_model.generate(**inputs, max_new_tokens=30)
+    caption = caption_processor.decode(out[0], skip_special_tokens=True)
+    return caption
+# --- رابط کاربری Gradio
+status_text = (
+    "✅ Model loaded successfully"
+    if caption_model is not None and not _load_error
+    else f"❌ Error: {_load_error}"
+    if _load_error
+    else "⏳ Loading model..."
 )
+with gr.Blocks(title="Image Captioning App") as demo:
+    gr.Markdown("## 🖼️ Image Captioning with BLIP\nUpload an image and get an automatic caption.")
+    gr.Markdown(f"**Status:** {status_text}")
     with gr.Row():
+        image_input = gr.Image(type="pil", label="Upload Image")
+        caption_output = gr.Textbox(label="Generated Caption", interactive=False)
+    generate_btn = gr.Button("Generate Caption")
+    generate_btn.click(fn=caption_image, inputs=image_input, outputs=caption_output)
+demo.launch()