gopalagra commited on
Commit
fee2e0a
·
verified ·
1 Parent(s): c11c555

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -22
app.py CHANGED
@@ -14,37 +14,39 @@ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-capt
14
  # -------------------------------
15
  # Generate caption function
16
  # -------------------------------
17
- def generate_caption_fn(image):
18
- # Convert uploaded image to PIL
19
- if not isinstance(image, Image.Image):
20
- image = Image.fromarray(image)
21
-
22
- # BLIP preprocessing
23
- inputs = processor(images=image, return_tensors="pt")
24
-
25
- # Generate caption
26
- out = model.generate(**inputs)
27
- caption = processor.decode(out[0], skip_special_tokens=True)
28
-
29
- return caption
30
 
31
  # -------------------------------
32
  # Convert text to speech using gTTS
33
  # -------------------------------
34
- def text_to_speech(caption):
35
- tts = gTTS(text=caption, lang='en')
36
- mp3_fp = io.BytesIO()
37
- tts.write_to_fp(mp3_fp)
38
- mp3_fp.seek(0)
39
- return mp3_fp
 
 
 
 
 
 
 
 
 
40
 
41
  # -------------------------------
42
  # Gradio interface: Caption + Audio
43
  # -------------------------------
44
  def generate_caption_tts(image):
45
- caption = generate_caption_fn(image)
46
- audio = text_to_speech(caption)
47
- return caption, audio
 
48
 
49
  interface = gr.Interface(
50
  fn=generate_caption_tts,
 
14
  # -------------------------------
15
  # Generate caption function
16
  # -------------------------------
17
+ # def generate_caption_tts(image):
18
+ # caption = generate_caption(model, processor, image)
19
+ # audio_file = text_to_audio_file(caption)
20
+ # return caption, audio_file # return file path, not BytesIO
21
+
 
 
 
 
 
 
 
 
22
 
23
  # -------------------------------
24
  # Convert text to speech using gTTS
25
  # -------------------------------
26
+ import tempfile
27
+ import pyttsx3
28
+
29
+ def text_to_audio_file(text):
30
+ # Create a temporary file
31
+ tmp_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
32
+ tmp_path = tmp_file.name
33
+ tmp_file.close()
34
+
35
+ engine = pyttsx3.init()
36
+ engine.save_to_file(text, tmp_path)
37
+ engine.runAndWait()
38
+
39
+ return tmp_path
40
+
41
 
42
  # -------------------------------
43
  # Gradio interface: Caption + Audio
44
  # -------------------------------
45
  def generate_caption_tts(image):
46
+ caption = generate_caption(model, processor, image)
47
+ audio_file = text_to_audio_file(caption)
48
+ return caption, audio_file # return file path, not BytesIO
49
+
50
 
51
  interface = gr.Interface(
52
  fn=generate_caption_tts,