Spaces:
Sleeping
Sleeping
File size: 4,494 Bytes
1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 a4c6844 1f88245 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
from transformers import pipeline
"""## Define the speech-to-text function
### Subtask:
Create a Python function that takes an audio file (MP3) as input and returns the transcribed text.
**Reasoning**:
Define a Python function that uses the `transformers` pipeline to transcribe an audio file.
"""
transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
sentiment = pipeline("sentiment-analysis", verbose = 0)
synthesizer = pipeline(model="suno/bark-small")
def transcribe_audio(audio_file_path):
"""
Transcribes an audio file using a speech-to-text model.
Args:
audio_file_path: The path to the audio file (MP3).
Returns:
The transcribed text as a string.
"""
transcription = transcriber(audio_file_path)
return transcription["text"]
def summarize_text(text):
"""Summarizes the input text using the loaded LLM summarizer.
Args:
text: The input text string to summarize.
Returns:
The summarized text string.
"""
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
def get_sentiment(text):
result = sentiment(text)[0]
return result['label'], result['score']
# prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
def text_to_speech(text):
"""
Synthesizes text into speech.
Args:
text: The text string to synthesize.
Returns:
The audio output.
"""
audio_output = synthesizer(text)
return audio_output['audio'], audio_output['sampling_rate']
"""## Create the gradio interface
### Subtask:
Use the `gradio` library to create a user interface with an audio input component and a text output component, linking them to the speech-to-text function.
**Reasoning**:
Create a Gradio interface linking the `transcribe_audio` function with an audio input and a textbox output.
"""
Audinterface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath"),
outputs=gr.Textbox()
)
Suminterface = gr.Interface(
fn=summarize_text,
inputs=gr.Textbox(label="Input Text"),
outputs=gr.Textbox(label="Summarized Text"),
title="Text Summarization using LLM",
description="Enter text to get a summarized version using a large language model."
)
Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
# prompt: write a Text-to-Speech model through Gradio.
# Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
SpeechInterface = gr.Interface(
fn=text_to_speech,
inputs=gr.Textbox(label="Enter Text"),
outputs=gr.Audio(label="Synthesized Speech")
)
"""## Launch the gradio interface
### Subtask:
Launch the Gradio application to make the interface accessible.
**Reasoning**:
Launch the Gradio interface using the `launch()` method.
"""
app = gr.TabbedInterface(
[Audinterface, Suminterface, Seminterface, SpeechInterface],
["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
)
app.launch()
from IPython.display import Audio
# Play the generated audio
Audio(audio, rate=sampling_rate)
# This is the corrected text_to_speech function for Gradio
def text_to_speech(text):
"""
Synthesizes text into speech.
Args:
text: The text string to synthesize.
Returns:
The audio output as a tuple of (sampling_rate, audio_array).
"""
try:
print(f"Attempting to synthesize text of length: {len(text)}")
audio_output = synthesizer(text)
print("Text synthesis successful.")
# Return the audio array and sampling rate as a tuple
return (audio_output['sampling_rate'], audio_output['audio'])
except Exception as e:
print(f"An error occurred during text synthesis: {e}")
raise e # Re-raise the exception so Gradio might show it
"""**Next Steps:**
1. **Execute the code cell above** to define the corrected `text_to_speech` function.
2. **Re-run the cell that launches the Gradio interface** (cell `9f75926a`).
After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
""" |