Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| """## Define the speech-to-text function | |
| ### Subtask: | |
| Create a Python function that takes an audio file (MP3) as input and returns the transcribed text. | |
| **Reasoning**: | |
| Define a Python function that uses the `transformers` pipeline to transcribe an audio file. | |
| """ | |
| transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h") | |
| summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
| sentiment = pipeline("sentiment-analysis", verbose = 0) | |
| synthesizer = pipeline(model="suno/bark-small") | |
| def transcribe_audio(audio_file_path): | |
| """ | |
| Transcribes an audio file using a speech-to-text model. | |
| Args: | |
| audio_file_path: The path to the audio file (MP3). | |
| Returns: | |
| The transcribed text as a string. | |
| """ | |
| transcription = transcriber(audio_file_path) | |
| return transcription["text"] | |
| def summarize_text(text): | |
| """Summarizes the input text using the loaded LLM summarizer. | |
| Args: | |
| text: The input text string to summarize. | |
| Returns: | |
| The summarized text string. | |
| """ | |
| summary = summarizer(text, max_length=130, min_length=30, do_sample=False) | |
| return summary[0]['summary_text'] | |
| def get_sentiment(text): | |
| result = sentiment(text)[0] | |
| return result['label'], result['score'] | |
| # prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility. | |
| def text_to_speech(text): | |
| """ | |
| Synthesizes text into speech. | |
| Args: | |
| text: The text string to synthesize. | |
| Returns: | |
| The audio output. | |
| """ | |
| audio_output = synthesizer(text) | |
| return audio_output['audio'], audio_output['sampling_rate'] | |
| """## Create the gradio interface | |
| ### Subtask: | |
| Use the `gradio` library to create a user interface with an audio input component and a text output component, linking them to the speech-to-text function. | |
| **Reasoning**: | |
| Create a Gradio interface linking the `transcribe_audio` function with an audio input and a textbox output. | |
| """ | |
| Audinterface = gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=gr.Audio(type="filepath"), | |
| outputs=gr.Textbox() | |
| ) | |
| Suminterface = gr.Interface( | |
| fn=summarize_text, | |
| inputs=gr.Textbox(label="Input Text"), | |
| outputs=gr.Textbox(label="Summarized Text"), | |
| title="Text Summarization using LLM", | |
| description="Enter text to get a summarized version using a large language model." | |
| ) | |
| Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")]) | |
| # prompt: write a Text-to-Speech model through Gradio. | |
| # Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility. | |
| SpeechInterface = gr.Interface( | |
| fn=text_to_speech, | |
| inputs=gr.Textbox(label="Enter Text"), | |
| outputs=gr.Audio(label="Synthesized Speech") | |
| ) | |
| """## Launch the gradio interface | |
| ### Subtask: | |
| Launch the Gradio application to make the interface accessible. | |
| **Reasoning**: | |
| Launch the Gradio interface using the `launch()` method. | |
| """ | |
| app = gr.TabbedInterface( | |
| [Audinterface, Suminterface, Seminterface, SpeechInterface], | |
| ["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"] | |
| ) | |
| app.launch() | |
| from IPython.display import Audio | |
| # Play the generated audio | |
| Audio(audio, rate=sampling_rate) | |
| # This is the corrected text_to_speech function for Gradio | |
| def text_to_speech(text): | |
| """ | |
| Synthesizes text into speech. | |
| Args: | |
| text: The text string to synthesize. | |
| Returns: | |
| The audio output as a tuple of (sampling_rate, audio_array). | |
| """ | |
| try: | |
| print(f"Attempting to synthesize text of length: {len(text)}") | |
| audio_output = synthesizer(text) | |
| print("Text synthesis successful.") | |
| # Return the audio array and sampling rate as a tuple | |
| return (audio_output['sampling_rate'], audio_output['audio']) | |
| except Exception as e: | |
| print(f"An error occurred during text synthesis: {e}") | |
| raise e # Re-raise the exception so Gradio might show it | |
| """**Next Steps:** | |
| 1. **Execute the code cell above** to define the corrected `text_to_speech` function. | |
| 2. **Re-run the cell that launches the Gradio interface** (cell `9f75926a`). | |
| After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio. | |
| """ |