Image-Captioning

Sleeping

Walid-Ahmed commited on Feb 13, 2025

Commit

e354d77

verified ·

1 Parent(s): a0c84cf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,23 +20,14 @@ def process_image(image):
     # Generate the caption
     caption = caption_image(image)[0]['generated_text']
-    # Generate speech from the caption
-    speech = narrator(caption)
-    # Convert the audio to PCM format
-    audio_data = np.array(speech["audio"][0] * 32767, dtype=np.int16)
-    # Save the audio to a WAV file
-    audio_path = "caption.wav"
-    wavfile.write(audio_path, rate=speech["sampling_rate"], data=audio_data)
-    return caption, audio_path
 # Create Gradio interface
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),
-    outputs=[gr.Textbox(label="Generated Caption"), gr.Audio(label="Generated Audio", type="filepath")]
 )
 # Launch the interface

     # Generate the caption
     caption = caption_image(image)[0]['generated_text']
+    return caption
 # Create Gradio interface
 iface = gr.Interface(
     fn=process_image,
     inputs=gr.Image(type="pil"),
+    outputs=[gr.Textbox(label="Generated Caption")]
 )
 # Launch the interface