dlaima commited on
Commit
741667f
·
verified ·
1 Parent(s): c88e865

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -52
app.py CHANGED
@@ -1,65 +1,29 @@
1
- from dotenv import load_dotenv, find_dotenv
 
2
  import os
3
  import io
4
  from PIL import Image
5
- import requests
6
  import warnings
7
  import gradio as gr
 
8
 
9
- # Suppress specific warnings
10
  warnings.filterwarnings("ignore", message=".*Using the model-agnostic default max_length.*")
11
 
12
- # Load environment variables from .env file
13
- load_dotenv(find_dotenv())
14
- hf_api_key = os.getenv('HF_API_KEY')
15
- endpoint_url = os.getenv('HF_API_ITT_BASE')
16
-
17
- # Validate environment variables
18
- if not hf_api_key:
19
- raise ValueError("HF_API_KEY is not set in the .env file.")
20
- if not endpoint_url:
21
- raise ValueError("HF_API_ITT_BASE is not set in the .env file.")
22
-
23
 
24
- def generate_caption(image):
25
- """
26
- Sends an image to the Hugging Face Inference Endpoint for caption generation.
27
- Sends raw image bytes (not base64).
28
- :param image: An image in PIL format.
29
- :return: Generated caption or error message.
30
- """
31
  try:
32
- headers = {"Authorization": f"Bearer {hf_api_key}"}
33
-
34
- # Convert the image to RGB and save as JPEG into buffer
35
- buffered = io.BytesIO()
36
  image = image.convert("RGB")
37
- image.save(buffered, format="JPEG")
38
- buffered.seek(0)
39
-
40
- # Send raw image bytes to the endpoint
41
- response = requests.post(endpoint_url, headers=headers, data=buffered.getvalue())
42
-
43
- # Try to parse JSON response safely
44
- try:
45
- response_data = response.json()
46
- except ValueError:
47
- return f"Invalid response (not JSON): {response.text}"
48
-
49
- if response.status_code == 200:
50
- if isinstance(response_data, list):
51
- return response_data[0].get("generated_text", "No caption generated.")
52
- elif isinstance(response_data, dict):
53
- return response_data.get("generated_text", "No caption generated.")
54
- else:
55
- return f"Unexpected response format: {response_data}"
56
- else:
57
- return f"Error {response.status_code}: {response.text}"
58
-
59
  except Exception as e:
60
  return f"An error occurred: {str(e)}"
61
 
62
-
63
  # Predefined sample images
64
  def get_sample_images():
65
  """
@@ -75,9 +39,10 @@ def get_sample_images():
75
  except FileNotFoundError:
76
  return []
77
 
 
 
78
 
79
  # Gradio interface
80
- sample_images = get_sample_images() # Load predefined sample images
81
  demo = gr.Interface(
82
  fn=generate_caption,
83
  inputs=gr.Image(type="pil", label="Upload Image"),
@@ -86,12 +51,11 @@ demo = gr.Interface(
86
  title="Image Captioning App",
87
  description=(
88
  "Upload an image or use one of the predefined samples to generate a caption. "
89
- "This app uses a Hugging Face Inference Endpoint for the Salesforce/blip-image-captioning-large model."
90
  ),
 
91
  )
92
 
93
  if __name__ == "__main__":
94
- # Launch the Gradio demo
95
  demo.launch()
96
 
97
-
 
1
+ # app.py
2
+
3
  import os
4
  import io
5
  from PIL import Image
 
6
  import warnings
7
  import gradio as gr
8
+ from transformers import pipeline
9
 
10
+ # Suppress warnings
11
  warnings.filterwarnings("ignore", message=".*Using the model-agnostic default max_length.*")
12
 
13
+ # Load BLIP image captioning pipeline
14
+ captioner = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
 
 
 
 
 
 
 
 
15
 
16
+ # Function to generate caption using the pipeline
17
+ def generate_caption(image: Image.Image):
 
 
 
 
 
18
  try:
19
+ # Convert image to RGB just in case
 
 
 
20
  image = image.convert("RGB")
21
+ # Generate caption
22
+ caption = captioner(image)[0]["generated_text"]
23
+ return caption
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  except Exception as e:
25
  return f"An error occurred: {str(e)}"
26
 
 
27
  # Predefined sample images
28
  def get_sample_images():
29
  """
 
39
  except FileNotFoundError:
40
  return []
41
 
42
+ # Load sample images
43
+ sample_images = get_sample_images()
44
 
45
  # Gradio interface
 
46
  demo = gr.Interface(
47
  fn=generate_caption,
48
  inputs=gr.Image(type="pil", label="Upload Image"),
 
51
  title="Image Captioning App",
52
  description=(
53
  "Upload an image or use one of the predefined samples to generate a caption. "
54
+ "This app uses `Salesforce/blip-image-captioning-base` locally via Hugging Face Transformers."
55
  ),
56
+ flagging_mode="never"
57
  )
58
 
59
  if __name__ == "__main__":
 
60
  demo.launch()
61