Spaces:

LiKenun
/

ai-building-blocks

Sleeping

App Files Files Community

LiKenun commited on Nov 2

Commit

d56b9d9

1 Parent(s): dc382c8

Add image captioning sample

Browse files

Files changed (5) hide show

app.py +31 -22
image_classification.py +11 -32
image_to_text.py +19 -0
requirements.txt +5 -1
utils.py +33 -1

app.py CHANGED Viewed

@@ -3,7 +3,9 @@ from functools import partial
 import gradio as gr
 from huggingface_hub import InferenceClient
 from image_classification import image_classification
 from text_to_image import text_to_image
 class App:
@@ -18,7 +20,7 @@ class App:
             with gr.Tabs():
                 with gr.Tab("Text-to-image Generation"):
                     gr.Markdown("Generate an image from a text prompt.")
-                    text_to_image_prompt = gr.Textbox(label="Prompt", value="A panda under a giant mushroom next to a pumpkin")
                     text_to_image_generate_button = gr.Button("Generate")
                     text_to_image_output = gr.Image(label="Image", type="pil")
                     text_to_image_generate_button.click(
@@ -26,32 +28,39 @@ class App:
                         inputs=text_to_image_prompt,
                         outputs=text_to_image_output
                     )
                 with gr.Tab("Image Classification"):
                     gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
-                    with gr.Row():
-                        with gr.Column():
-                            image_classification_url_input = gr.Textbox(
-                                label="Image URL",
-                                value="https://campuslifeservices.ucsf.edu/upload/facilities/galleries/cardboard_0.jpg",
-                                placeholder="Enter the URL of the image to classify",
-                                scale=2
-                            )
-                            image_classification_image_preview = gr.Image(label="Image Preview", type="pil")
-                        image_classification_upload_input = gr.Image(
-                            label="Or Upload Image",
-                            type="pil",
-                            scale=2
-                        )
-                    image_classification_button = gr.Button("Classify")
-                    image_classification_output = gr.Dataframe(
-                        label="Classification Results",
-                        headers=["Label", "Probability"],
-                        interactive=False
                     )
                     image_classification_button.click(
                         fn=partial(image_classification, self.client),
-                        inputs=[image_classification_url_input, image_classification_upload_input],
-                        outputs=[image_classification_image_preview, image_classification_output]
                     )
             demo.launch()

 import gradio as gr
 from huggingface_hub import InferenceClient
 from image_classification import image_classification
+from image_to_text import image_to_text
 from text_to_image import text_to_image
+from utils import request_image
 class App:
             with gr.Tabs():
                 with gr.Tab("Text-to-image Generation"):
                     gr.Markdown("Generate an image from a text prompt.")
+                    text_to_image_prompt = gr.Textbox(label="Prompt")
                     text_to_image_generate_button = gr.Button("Generate")
                     text_to_image_output = gr.Image(label="Image", type="pil")
                     text_to_image_generate_button.click(
                         inputs=text_to_image_prompt,
                         outputs=text_to_image_output
                     )
+                with gr.Tab("Image-to-text or Image Captioning"):
+                    gr.Markdown("Generate a text description of an image.")
+                    image_to_text_url_input = gr.Textbox(label="Image URL")
+                    image_to_text_image_request_button = gr.Button("Get Image")
+                    image_to_text_image_input = gr.Image(label="Image", type="pil")
+                    image_to_text_image_request_button.click(
+                        fn=request_image,
+                        inputs=image_to_text_url_input,
+                        outputs=image_to_text_image_input
+                    )
+                    image_to_text_output = gr.List(label="Captions", headers=["Caption"])
+                    image_to_text_button = gr.Button("Caption")
+                    image_to_text_button.click(
+                        fn=image_to_text,
+                        inputs=image_to_text_image_input,
+                        outputs=image_to_text_output
+                    )
                 with gr.Tab("Image Classification"):
                     gr.Markdown("Classify a recyclable item as one of: cardboard, glass, metal, paper, plastic, or other using [Trash-Net](https://huggingface.co/prithivMLmods/Trash-Net).")
+                    image_classification_url_input = gr.Textbox(label="Image URL")
+                    image_classification_image_request_button = gr.Button("Get Image")
+                    image_classification_image_input = gr.Image(label="Image",type="pil")
+                    image_classification_image_request_button.click(
+                        fn=request_image,
+                        inputs=image_classification_url_input,
+                        outputs=image_classification_image_input
                     )
+                    image_classification_button = gr.Button("Classify")
+                    image_classification_output = gr.Dataframe(label="Classification", headers=["Label", "Probability"], interactive=False)
                     image_classification_button.click(
                         fn=partial(image_classification, self.client),
+                        inputs=image_classification_image_input,
+                        outputs=image_classification_output
                     )
             demo.launch()

image_classification.py CHANGED Viewed

@@ -1,44 +1,23 @@
-import gradio as gr
 from huggingface_hub import InferenceClient
-from io import BytesIO
 from os import path, unlink, getenv
-from PIL.Image import Image, open as open_image
 import pandas as pd
 from pandas import DataFrame
-import requests
 from utils import save_image_to_temp_file
-def image_classification(client: InferenceClient, image_url: str | None, image: Image | None) -> tuple[Image | None, DataFrame]:
-    temp_file_path = None
     try:
-        if image is not None and image_url and image_url.strip():
-            raise gr.Error("Both an image URL and an uploaded image were provided. Please provide only one or the other.")
-        elif image is not None:
-            temp_file_path = save_image_to_temp_file(image)
-            classifications = client.image_classification(temp_file_path, model=getenv("IMAGE_CLASSIFICATION_MODEL"))
-            image = None
-        elif image_url and image_url.strip():
-            try:
-                response = requests.get(image_url, timeout=int(getenv("REQUEST_TIMEOUT")))
-                response.raise_for_status()
-                image = open_image(BytesIO(response.content))
-                temp_file_path = save_image_to_temp_file(image)
-                classifications = client.image_classification(temp_file_path, model=getenv("IMAGE_CLASSIFICATION_MODEL"))
-            except Exception as e:
-                raise gr.Error(f"Failed to fetch image from URL: {str(e)}")
-        else:
-            raise gr.Error("Please either provide an image URL or upload an image.")
-        df = pd.DataFrame({
-                              "Label": classification.label,
-                              "Probability": f"{classification.score:.2%}"
-                          }
-                          for classification
-                          in classifications)
-        return image, df
     finally:
-        # Clean up temporary file.
-        if temp_file_path and path.exists(temp_file_path):
             try:
                 unlink(temp_file_path)
             except Exception:

 from huggingface_hub import InferenceClient
 from os import path, unlink, getenv
+from PIL.Image import Image
 import pandas as pd
 from pandas import DataFrame
 from utils import save_image_to_temp_file
+def image_classification(client: InferenceClient, image: Image) -> DataFrame:
     try:
+        temp_file_path = save_image_to_temp_file(image) # Needed because InferenceClient does not accept PIL Images directly.
+        classifications = client.image_classification(temp_file_path, model=getenv("IMAGE_CLASSIFICATION_MODEL"))
+        return pd.DataFrame({
+                                "Label": classification.label,
+                                "Probability": f"{classification.score:.2%}"
+                            }
+                            for classification
+                            in classifications)
     finally:
+        if temp_file_path and path.exists(temp_file_path): # Clean up temporary file.
             try:
                 unlink(temp_file_path)
             except Exception:

image_to_text.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import gc
+from os import getenv
+from PIL.Image import Image
+from transformers import AutoProcessor, BlipForConditionalGeneration
+from utils import get_pytorch_device, spaces_gpu
+@spaces_gpu
+def image_to_text(image: Image) -> list[str]:
+    image_to_text_model_id = getenv("IMAGE_TO_TEXT_MODEL")
+    pytorch_device = get_pytorch_device()
+    processor = AutoProcessor.from_pretrained(image_to_text_model_id)
+    model = BlipForConditionalGeneration.from_pretrained(image_to_text_model_id).to(pytorch_device)
+    inputs = processor(images=image, return_tensors="pt").to(pytorch_device)
+    generated_ids = model.generate(pixel_values=inputs.pixel_values, num_beams=3, max_length=20, min_length=5)
+    results = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    del model, inputs
+    gc.collect()
+    return results

requirements.txt CHANGED Viewed

@@ -1,6 +1,10 @@
 gradio>=5.49.1
-huggingface-hub>=1.0.1
 python-dotenv>=1.0.0
 pandas>=2.0.0
 pillow>=10.0.0
 requests>=2.31.0

 gradio>=5.49.1
+huggingface-hub>=0.34.0,<1.0
 python-dotenv>=1.0.0
 pandas>=2.0.0
 pillow>=10.0.0
 requests>=2.31.0
+transformers>=4.40.0
+torch>=2.0.0
+torchvision>=0.15.0
+torchaudio>=2.0.0

utils.py CHANGED Viewed

@@ -1,7 +1,39 @@
-from PIL.Image import Image
 from tempfile import NamedTemporaryFile
 def save_image_to_temp_file(image: Image) -> str:
     image_format = image.format if image.format else 'PNG'
     format_extension = image_format.lower() if image_format else 'png'

+import gradio as gr
+from io import BytesIO
+from PIL.Image import Image, open as open_image
+from os import getenv
+import requests
 from tempfile import NamedTemporaryFile
+import torch
+# Try to import spaces decorator (for Hugging Face Spaces), otherwise use no-op decorator.
+try:
+    import spaces
+    spaces_gpu = spaces.GPU
+except ImportError:
+    # For local development, use a no-op decorator because spaces is not available.
+    def spaces_gpu(func):
+        return func
+def get_pytorch_device() -> str:
+    return ("cuda" if torch.cuda.is_available() # Nvidia CUDA and AMD ROCm
+       else "xpu" if torch.xpu.is_available() # Intel XPU
+       else "mps" if torch.mps.is_available() # Apple Silicon
+       else "cpu") # gl bro 🫠
+def request_image(url: str) -> Image:
+    try:
+        response = requests.get(url, timeout=int(getenv("REQUEST_TIMEOUT")))
+        response.raise_for_status()
+        return open_image(BytesIO(response.content))
+    except requests.HTTPError as e:
+        raise gr.Error(f"Failed to fetch image from URL because of HTTP error: {e.response.status_code} {e.response.text}")
+    except requests.Timeout as e:
+        raise gr.Error(f"Failed to fetch image from URL because the request timed out.")
+    except requests.RequestException as e:
+        raise gr.Error(f"Failed to fetch image from URL: {str(e)}")
 def save_image_to_temp_file(image: Image) -> str:
     image_format = image.format if image.format else 'PNG'
     format_extension = image_format.lower() if image_format else 'png'