Walid-Ahmed commited on
Commit
b386d07
·
verified ·
1 Parent(s): e354d77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -13
app.py CHANGED
@@ -1,33 +1,58 @@
1
  import torch
2
  from transformers import pipeline
3
  from PIL import Image
4
- from scipy.io import wavfile
5
  import gradio as gr
6
- import numpy as np
 
 
 
7
  # Specify the device (CPU or GPU)
8
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
 
10
  # Load the image-to-text pipeline
11
  caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)
12
- # Load the image-to-text pipeline with the vit-gpt2 model
13
- #caption_pipeline = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning", device=device)
14
-
15
- # Load the text-to-speech pipeline
16
- narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
17
-
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def process_image(image):
20
- # Generate the caption
21
  caption = caption_image(image)[0]['generated_text']
22
-
23
-
24
  return caption
25
 
26
- # Create Gradio interface
27
  iface = gr.Interface(
28
  fn=process_image,
29
  inputs=gr.Image(type="pil"),
30
- outputs=[gr.Textbox(label="Generated Caption")]
 
31
  )
32
 
33
  # Launch the interface
 
1
  import torch
2
  from transformers import pipeline
3
  from PIL import Image
 
4
  import gradio as gr
5
+ import requests
6
+ from io import BytesIO
7
+ import os
8
+
9
  # Specify the device (CPU or GPU)
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
 
12
  # Load the image-to-text pipeline
13
  caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device)
 
 
 
 
 
 
14
 
15
+ # URLs of the images
16
+ image_urls = [
17
+ "https://github.com/Walid-Ahmed/ML_Datasets/blob/master/image1.jpeg?raw=true",
18
+ "https://github.com/Walid-Ahmed/ML_Datasets/blob/master/image2.jpeg?raw=true",
19
+ "https://github.com/Walid-Ahmed/ML_Datasets/blob/master/image3.jpeg?raw=true"
20
+ ]
21
+
22
+ # Directory to save images
23
+ save_dir = "example_images"
24
+ os.makedirs(save_dir, exist_ok=True)
25
+
26
+ # Function to download images
27
+ def download_image(url, filename):
28
+ response = requests.get(url)
29
+ if response.status_code == 200:
30
+ with open(filename, "wb") as f:
31
+ f.write(response.content)
32
+ return filename
33
+ else:
34
+ print(f"Failed to download: {url}")
35
+ return None
36
+
37
+ # Download images
38
+ example_images = []
39
+ for idx, url in enumerate(image_urls):
40
+ img_path = os.path.join(save_dir, f"image{idx+1}.jpeg")
41
+ if not os.path.exists(img_path): # Avoid redownloading if already exists
42
+ download_image(url, img_path)
43
+ example_images.append(img_path)
44
+
45
+ # Function to process the image
46
  def process_image(image):
 
47
  caption = caption_image(image)[0]['generated_text']
 
 
48
  return caption
49
 
50
+ # Create Gradio interface with example images
51
  iface = gr.Interface(
52
  fn=process_image,
53
  inputs=gr.Image(type="pil"),
54
+ outputs=gr.Textbox(label="Generated Caption"),
55
+ examples=example_images # Use downloaded images as examples
56
  )
57
 
58
  # Launch the interface