The image captioning project has been committed.

Browse files

Files changed (14) hide show

.gitattributes +3 -0
README.md +37 -0
app.py +75 -0
data/image1.jpg +3 -0
data/image10.jpg +3 -0
data/image2.png +3 -0
data/image3.jpg +3 -0
data/image4.jpg +3 -0
data/image5.jpg +3 -0
data/image6.png +3 -0
data/image7.png +3 -0
data/image8.jpeg +3 -0
data/image9.jpeg +3 -0
requirements.txt +7 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+data/*.jpg filter=lfs diff=lfs merge=lfs -text
+data/*.jpeg filter=lfs diff=lfs merge=lfs -text
+data/*.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,37 @@

+# Image Captioning with BLIP
+This project uses the Salesforce BLIP model to generate captions for images. It provides a simple web interface built with Gradio to upload an image and view the generated caption.
+## Setup
+1.  **Clone the repository:**
+    ```bash
+    git clone https://huggingface.co/spaces/electro-sb/image_captioning
+    cd image_captioning
+    ```
+2.  **Install dependencies:**
+    ```bash
+    pip install -r requirements.txt
+    ```
+3.  **Set up your Hugging Face token:**
+    Create a `.env` file in the root of the project and add your Hugging Face API key:
+    ```
+    HF_API_KEY=<your-hugging-face-api-key>
+    ```
+4.  **Run the application:**
+    ```bash
+    python app.py
+    ```
+    The application will be available at `http://localhost:7860`.
+## Usage
+1.  Open your web browser and navigate to `http://localhost:7860`.
+2.  Upload an image using the provided interface.
+3.  Click the "Caption" button to generate a caption for the image.
+4.  The generated caption will be displayed in the "Caption" textbox.

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from transformers import pipeline, AutoTokenizer
+import io
+import base64
+from PIL import Image
+import gradio as gr
+model = "Salesforce/blip-image-captioning-large"
+tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)
+pipe = pipeline(task="image-to-text",
+                model=model,
+                tokenizer=tokenizer)
+def image_to_base64(image: Image) -> str:
+    """
+    Convert an image to a base64 string.
+    """
+    bytearray= io.BytesIO()
+    image.save(bytearray, format="PNG")
+    return str(base64.b64encode(bytearray.getvalue()).decode('utf-8'))
+def caption_image(image):
+    result = pipe(
+        image_to_base64(image),
+        #Temperature=0.7,
+        # max_length=130,
+        # min_length=30,
+        #do_sample=True
+    )
+    return result[0]['generated_text'].upper()
+if __name__ == "__main__":
+    gr.close_all()
+    with gr.Blocks() as interface:
+        gr.Markdown("### Image Captioning using BLIP Large")
+        with gr.Row():
+            image_input = gr.Image(type="pil", label="Image")
+        with gr.Row():
+            caption_output = gr.Textbox(lines=2, label="Caption")
+        with gr.Row():
+            clear_button = gr.ClearButton()
+            caption_button = gr.Button("Caption", variant="primary")
+        with gr.Row():
+            example_images = gr.Examples(
+                examples=[
+                    "data/image1.jpg",
+                    "data/image2.png",
+                    "data/image3.jpg",
+                    "data/image4.jpg",
+                    "data/image5.jpg",
+                    "data/image6.png",
+                    "data/image7.png",
+                    "data/image8.jpeg",
+                    "data/image9.jpeg",
+                    "data/image10.jpg",
+                ],
+                inputs=[image_input],
+                label="Example Images"
+            )
+        caption_button.click(fn=caption_image,
+            inputs=[image_input],
+            outputs=[caption_output]
+        )
+        clear_button.click(fn=lambda: [None,""],
+                            inputs=[],
+                            outputs=[image_input, caption_output])
+    interface.launch(share=True, server_port=7860)