Walid-Ahmed commited on
Commit
e354d77
·
verified ·
1 Parent(s): a0c84cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -20,23 +20,14 @@ def process_image(image):
20
  # Generate the caption
21
  caption = caption_image(image)[0]['generated_text']
22
 
23
- # Generate speech from the caption
24
- speech = narrator(caption)
25
 
26
- # Convert the audio to PCM format
27
- audio_data = np.array(speech["audio"][0] * 32767, dtype=np.int16)
28
-
29
- # Save the audio to a WAV file
30
- audio_path = "caption.wav"
31
- wavfile.write(audio_path, rate=speech["sampling_rate"], data=audio_data)
32
-
33
- return caption, audio_path
34
 
35
  # Create Gradio interface
36
  iface = gr.Interface(
37
  fn=process_image,
38
  inputs=gr.Image(type="pil"),
39
- outputs=[gr.Textbox(label="Generated Caption"), gr.Audio(label="Generated Audio", type="filepath")]
40
  )
41
 
42
  # Launch the interface
 
20
  # Generate the caption
21
  caption = caption_image(image)[0]['generated_text']
22
 
 
 
23
 
24
+ return caption
 
 
 
 
 
 
 
25
 
26
  # Create Gradio interface
27
  iface = gr.Interface(
28
  fn=process_image,
29
  inputs=gr.Image(type="pil"),
30
+ outputs=[gr.Textbox(label="Generated Caption")]
31
  )
32
 
33
  # Launch the interface