gopalagra commited on
Commit
e1a7959
·
verified ·
1 Parent(s): fd13abe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -24
app.py CHANGED
@@ -69,9 +69,6 @@
69
  import gradio as gr
70
  from transformers import Blip2Processor, Blip2ForConditionalGeneration, pipeline
71
  from PIL import Image
72
- from gtts import gTTS
73
- import tempfile
74
- import os
75
 
76
  # Load BLIP model
77
  processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
@@ -84,14 +81,7 @@ translation_models = {
84
  "Spanish": pipeline("translation", model="Helsinki-NLP/opus-mt-en-es"),
85
  }
86
 
87
- # Map language to gTTS codes
88
- tts_lang_map = {
89
- "Hindi": "hi",
90
- "French": "fr",
91
- "Spanish": "es",
92
- }
93
-
94
- def generate_caption_translate_tts(image, target_lang):
95
  # Step 1: Generate English caption
96
  inputs = processor(image, return_tensors="pt")
97
  out = model.generate(**inputs, max_new_tokens=50)
@@ -103,26 +93,17 @@ def generate_caption_translate_tts(image, target_lang):
103
  else:
104
  translated = "Translation not available"
105
 
106
- # Step 3: Convert to Speech
107
- audio_file = None
108
- if target_lang in tts_lang_map:
109
- tts = gTTS(translated, lang=tts_lang_map[target_lang])
110
- tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
111
- tts.save(tmp_file.name)
112
- audio_file = tmp_file.name
113
-
114
- return english_caption, translated, audio_file
115
 
116
  # Gradio Interface
117
  interface = gr.Interface(
118
- fn=generate_caption_translate_tts,
119
  inputs=[gr.Image(type="pil"), gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To")],
120
  outputs=[
121
  gr.Textbox(label="English Caption"),
122
- gr.Textbox(label="Translated Caption"),
123
- gr.Audio(label="Spoken Translation")
124
  ],
125
- title="BLIP Captioning + Translation + Speech"
126
  )
127
 
128
  interface.launch()
 
69
  import gradio as gr
70
  from transformers import Blip2Processor, Blip2ForConditionalGeneration, pipeline
71
  from PIL import Image
 
 
 
72
 
73
  # Load BLIP model
74
  processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
 
81
  "Spanish": pipeline("translation", model="Helsinki-NLP/opus-mt-en-es"),
82
  }
83
 
84
+ def generate_caption_translate(image, target_lang):
 
 
 
 
 
 
 
85
  # Step 1: Generate English caption
86
  inputs = processor(image, return_tensors="pt")
87
  out = model.generate(**inputs, max_new_tokens=50)
 
93
  else:
94
  translated = "Translation not available"
95
 
96
+ return english_caption, translated
 
 
 
 
 
 
 
 
97
 
98
  # Gradio Interface
99
  interface = gr.Interface(
100
+ fn=generate_caption_translate,
101
  inputs=[gr.Image(type="pil"), gr.Dropdown(["Hindi", "French", "Spanish"], label="Translate To")],
102
  outputs=[
103
  gr.Textbox(label="English Caption"),
104
+ gr.Textbox(label="Translated Caption")
 
105
  ],
106
+ title="BLIP Captioning + Translation"
107
  )
108
 
109
  interface.launch()