gopalagra commited on
Commit
4e69050
·
verified ·
1 Parent(s): d11dc78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -21
app.py CHANGED
@@ -67,23 +67,15 @@
67
  # # demo.launch(share=True)
68
 
69
  import gradio as gr
70
- from transformers import Blip2Processor, Blip2ForConditionalGeneration, pipeline
71
  from PIL import Image
72
  import torch
73
 
74
  # ----------------------
75
- # Load BLIP2 for Captioning
76
  # ----------------------
77
- caption_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-125m")
78
- caption_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-125m")
79
-
80
- # ----------------------
81
- # Load BLIP2 for VQA
82
- # ----------------------
83
- vqa_processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-base")
84
- vqa_model = Blip2ForConditionalGeneration.from_pretrained(
85
- "Salesforce/blip2-flan-t5-base", torch_dtype=torch.float16, device_map="auto"
86
- )
87
 
88
  # ----------------------
89
  # Translation pipelines
@@ -98,9 +90,9 @@ translation_models = {
98
  # Caption + Translate Function
99
  # ----------------------
100
  def generate_caption_translate(image, target_lang):
101
- inputs = caption_processor(image, return_tensors="pt")
102
- out = caption_model.generate(**inputs, max_new_tokens=50)
103
- english_caption = caption_processor.decode(out[0], skip_special_tokens=True)
104
 
105
  # Translate if chosen
106
  if target_lang in translation_models:
@@ -111,19 +103,19 @@ def generate_caption_translate(image, target_lang):
111
  return english_caption, translated
112
 
113
  # ----------------------
114
- # VQA Function
115
  # ----------------------
116
  def vqa(image, question):
117
- inputs = vqa_processor(image, question, return_tensors="pt").to(vqa_model.device)
118
- out = vqa_model.generate(**inputs, max_new_tokens=100)
119
- answer = vqa_processor.decode(out[0], skip_special_tokens=True)
120
  return answer
121
 
122
  # ----------------------
123
  # Gradio UI
124
  # ----------------------
125
- with gr.Blocks(title="BLIP2 Vision App") as demo:
126
- gr.Markdown("## 🖼️ BLIP2: Image Captioning + Translation + Question Answering")
127
 
128
  with gr.Tab("Caption + Translate"):
129
  with gr.Row():
 
67
  # # demo.launch(share=True)
68
 
69
  import gradio as gr
70
+ from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
71
  from PIL import Image
72
  import torch
73
 
74
  # ----------------------
75
+ # Load BLIP (Large) for Captioning + VQA
76
  # ----------------------
77
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
78
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
 
 
 
 
 
 
 
 
79
 
80
  # ----------------------
81
  # Translation pipelines
 
90
  # Caption + Translate Function
91
  # ----------------------
92
  def generate_caption_translate(image, target_lang):
93
+ inputs = processor(images=image, return_tensors="pt")
94
+ out = model.generate(**inputs, max_new_tokens=50)
95
+ english_caption = processor.decode(out[0], skip_special_tokens=True)
96
 
97
  # Translate if chosen
98
  if target_lang in translation_models:
 
103
  return english_caption, translated
104
 
105
  # ----------------------
106
+ # VQA Function (using same BLIP model)
107
  # ----------------------
108
  def vqa(image, question):
109
+ inputs = processor(images=image, text=question, return_tensors="pt")
110
+ out = model.generate(**inputs, max_new_tokens=50)
111
+ answer = processor.decode(out[0], skip_special_tokens=True)
112
  return answer
113
 
114
  # ----------------------
115
  # Gradio UI
116
  # ----------------------
117
+ with gr.Blocks(title="BLIP Vision App") as demo:
118
+ gr.Markdown("## 🖼️ BLIP: Image Captioning + Translation + Question Answering")
119
 
120
  with gr.Tab("Caption + Translate"):
121
  with gr.Row():