Week2_Assig1 / app.py
yalali's picture
Upload app.py
934436b verified
import gradio as gr
from transformers import pipeline
"""## Define the speech-to-text function
### Subtask:
Create a Python function that takes an audio file (MP3) as input and returns the transcribed text.
**Reasoning**:
Define a Python function that uses the `transformers` pipeline to transcribe an audio file.
"""
transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
sentiment = pipeline("sentiment-analysis", verbose = 0)
synthesizer = pipeline(model="suno/bark-small")
def transcribe_audio(audio_file_path):
"""
Transcribes an audio file using a speech-to-text model.
Args:
audio_file_path: The path to the audio file (MP3).
Returns:
The transcribed text as a string.
"""
transcription = transcriber(audio_file_path)
return transcription["text"]
def summarize_text(text):
"""Summarizes the input text using the loaded LLM summarizer.
Args:
text: The input text string to summarize.
Returns:
The summarized text string.
"""
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
def get_sentiment(text):
result = sentiment(text)[0]
return result['label'], result['score']
# prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
def text_to_speech(text):
"""
Synthesizes text into speech.
Args:
text: The text string to synthesize.
Returns:
The audio output.
"""
audio_output = synthesizer(text)
return audio_output['audio'], audio_output['sampling_rate']
"""## Create the gradio interface
### Subtask:
Use the `gradio` library to create a user interface with an audio input component and a text output component, linking them to the speech-to-text function.
**Reasoning**:
Create a Gradio interface linking the `transcribe_audio` function with an audio input and a textbox output.
"""
Audinterface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox()
)
Suminterface = gr.Interface(
fn=summarize_text,
inputs=gr.Textbox(label="Input Text"),
outputs=gr.Textbox(label="Summarized Text"),
title="Text Summarization using LLM",
description="Enter text to get a summarized version using a large language model."
)
Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
# prompt: write a Text-to-Speech model through Gradio.
# Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
SpeechInterface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(label="Enter Text"),
outputs=gr.Audio(label="Synthesized Speech")
)
"""## Launch the gradio interface
### Subtask:
Launch the Gradio application to make the interface accessible.
**Reasoning**:
Launch the Gradio interface using the `launch()` method.
"""
app = gr.TabbedInterface(
[Audinterface, Suminterface, Seminterface, SpeechInterface],
["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
)
app.launch()
from IPython.display import Audio
# Play the generated audio
Audio(audio, rate=sampling_rate)
# This is the corrected text_to_speech function for Gradio
def text_to_speech(text):
"""
Synthesizes text into speech.
Args:
text: The text string to synthesize.
Returns:
The audio output as a tuple of (sampling_rate, audio_array).
"""
try:
print(f"Attempting to synthesize text of length: {len(text)}")
audio_output = synthesizer(text)
print("Text synthesis successful.")
# Return the audio array and sampling rate as a tuple
return (audio_output['sampling_rate'], audio_output['audio'])
except Exception as e:
print(f"An error occurred during text synthesis: {e}")
raise e # Re-raise the exception so Gradio might show it
"""**Next Steps:**
1. **Execute the code cell above** to define the corrected `text_to_speech` function.
2. **Re-run the cell that launches the Gradio interface** (cell `9f75926a`).
After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
"""