ma4389 commited on
Commit
29dcc5e
·
verified ·
1 Parent(s): 78f56e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -31
app.py CHANGED
@@ -1,31 +1,37 @@
1
- import torch
2
- from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq
3
- from PIL import Image
4
- import gradio as gr
5
-
6
- # Automatically use GPU if available
7
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
-
9
- # Load processor and model
10
- processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
11
- model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
12
-
13
- # Inference function
14
- def generate_caption(image):
15
- image = image.convert("RGB")
16
- inputs = processor(images=image, return_tensors="pt").to(device)
17
- output = model.generate(**inputs)
18
- caption = processor.decode(output[0], skip_special_tokens=True)
19
- return caption
20
-
21
- # Gradio interface
22
- interface = gr.Interface(
23
- fn=generate_caption,
24
- inputs=gr.Image(type="pil"),
25
- outputs="text",
26
- title="🖼️ Image to Text Captioning",
27
- description="Upload an image and get a caption using BLIP (Salesforce/blip-image-captioning-large)."
28
- )
29
-
30
- if __name__ == "__main__":
31
- interface.launch()
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq
3
+ from PIL import Image
4
+ import gradio as gr
5
+
6
+ # Device
7
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
+
9
+ # Load processor & model
10
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
11
+ model = AutoModelForVision2Seq.from_pretrained(
12
+ "Salesforce/blip-image-captioning-large"
13
+ ).to(device)
14
+
15
+ # Inference function
16
+ def generate_caption(image):
17
+ try:
18
+ image = image.convert("RGB")
19
+ with torch.inference_mode():
20
+ inputs = processor(images=image, return_tensors="pt").to(device)
21
+ output = model.generate(**inputs)
22
+ caption = processor.decode(output[0], skip_special_tokens=True)
23
+ return caption
24
+ except Exception as e:
25
+ return f"Error: {str(e)}"
26
+
27
+ # Gradio UI
28
+ interface = gr.Interface(
29
+ fn=generate_caption,
30
+ inputs=gr.Image(type="pil"),
31
+ outputs="text",
32
+ title="🖼️ Image to Text Captioning",
33
+ description="Upload an image and get a caption using BLIP (Salesforce/blip-image-captioning-large)."
34
+ )
35
+
36
+ if __name__ == "__main__":
37
+ interface.launch(share=True)