Spaces:

DemahAlmutairi
/

AudioToAudio

Runtime error

App Files Files Community

DemahAlmutairi commited on Feb 26, 2025

Commit

491d71f

verified ·

1 Parent(s): 4d1c44f

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -12

app.py CHANGED Viewed

@@ -1,19 +1,16 @@
 import gradio as gr
 import whisper
 from transformers import pipeline
-import torch
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # Load the Whisper model from openai-whisper
 whisper_model = whisper.load_model("tiny")
-whisper_model=whisper_model.to(device)
 # Load the summarization model from Hugging Face
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 def summarize_audio(audio_path):
     # Step 1: Transcribe the uploaded audio file using Whisper
     transcription_result = whisper_model.transcribe(audio_path)
@@ -22,21 +19,26 @@ def summarize_audio(audio_path):
     # Step 2: Summarize the transcribed text using a pre-trained summarization model
     summary = summarizer(transcription, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
-    return summary
 # Gradio interface
 interface = gr.Interface(
     fn=summarize_audio,  # The function to process the audio and return summarized audio
     inputs=gr.Audio(type="filepath", label="Upload your audio file"),  # Accept audio file uploads, file path as input
-    #outputs="text",  # Provide a downloadable summarized audio file
-    outputs=gr.Textbox(label="summarized audio file"),
     title="Audio Summarizer",  # Interface title
-    description="Upload an audio file, and this tool will summarize it.",  # Interface description
     examples=[["audio_sample1.mp3"]]
 )
 # Launch the Gradio interface
-interface.launch()

 import gradio as gr
 import whisper
 from transformers import pipeline
+from gtts import gTTS
+import os
 # Load the Whisper model from openai-whisper
 whisper_model = whisper.load_model("tiny")
 # Load the summarization model from Hugging Face
 summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 def summarize_audio(audio_path):
     # Step 1: Transcribe the uploaded audio file using Whisper
     transcription_result = whisper_model.transcribe(audio_path)
     # Step 2: Summarize the transcribed text using a pre-trained summarization model
     summary = summarizer(transcription, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
+    # Step 3: Convert the summarized text into speech using the Hugging Face TTS model
+    # Breakdown into multiple steps
+    tts = gTTS(text=summary, lang='en')           # Generate the TTS output
+    tts.save("summarized_audio.wav")
+    # Save the TTS audio to a file (WAV format)
+    # Return the path to the saved summarized audio file
+    return "summarized_audio.wav"
 # Gradio interface
 interface = gr.Interface(
     fn=summarize_audio,  # The function to process the audio and return summarized audio
     inputs=gr.Audio(type="filepath", label="Upload your audio file"),  # Accept audio file uploads, file path as input
+    outputs=gr.File(label="Download Summarized Audio"),  # Provide a downloadable summarized audio file
     title="Audio Summarizer",  # Interface title
+    description="Upload an audio file, and this tool will summarize it and generate a downloadable audio summary." , # Interface description
     examples=[["audio_sample1.mp3"]]
 )
 # Launch the Gradio interface
+interface.launch(debug=True)