Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,33 @@
|
|
|
|
|
|
|
|
| 1 |
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
"""## Define the speech-to-text function
|
| 8 |
|
|
@@ -16,7 +41,7 @@ Define a Python function that uses the `transformers` pipeline to transcribe an
|
|
| 16 |
transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
| 17 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 18 |
sentiment = pipeline("sentiment-analysis", verbose = 0)
|
| 19 |
-
synthesizer = pipeline("
|
| 20 |
|
| 21 |
def transcribe_audio(audio_file_path):
|
| 22 |
"""
|
|
@@ -48,7 +73,7 @@ def get_sentiment(text):
|
|
| 48 |
result = sentiment(text)[0]
|
| 49 |
return result['label'], result['score']
|
| 50 |
|
| 51 |
-
# prompt:
|
| 52 |
|
| 53 |
def text_to_speech(text):
|
| 54 |
"""
|
|
@@ -61,7 +86,7 @@ def text_to_speech(text):
|
|
| 61 |
The audio output.
|
| 62 |
"""
|
| 63 |
audio_output = synthesizer(text)
|
| 64 |
-
return audio_output
|
| 65 |
|
| 66 |
"""## Create the gradio interface
|
| 67 |
|
|
@@ -88,12 +113,15 @@ Suminterface = gr.Interface(
|
|
| 88 |
|
| 89 |
Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
|
| 90 |
|
| 91 |
-
# prompt: create gr for the text-to-speech function
|
| 92 |
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
fn=text_to_speech,
|
| 95 |
-
inputs=gr.Textbox(label="Enter
|
| 96 |
-
outputs=gr.Audio()
|
| 97 |
)
|
| 98 |
|
| 99 |
"""## Launch the gradio interface
|
|
@@ -106,24 +134,43 @@ Launch the Gradio interface using the `launch()` method.
|
|
| 106 |
"""
|
| 107 |
|
| 108 |
app = gr.TabbedInterface(
|
| 109 |
-
[Audinterface, Suminterface, Seminterface,
|
| 110 |
["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
|
| 111 |
)
|
| 112 |
|
| 113 |
app.launch()
|
| 114 |
|
| 115 |
-
|
| 116 |
|
| 117 |
-
|
|
|
|
| 118 |
|
| 119 |
-
|
| 120 |
-
* A Python function `transcribe_audio` was defined to perform speech-to-text transcription using the "facebook/wav2vec2-base-960h" model from the `transformers` library.
|
| 121 |
-
* A Gradio interface was successfully created, linking the `transcribe_audio` function to an audio file input component (`gr.Audio(type="filepath")`) and a text output component (`gr.Textbox()`).
|
| 122 |
-
* The Gradio application was successfully launched and is accessible via a public URL.
|
| 123 |
|
| 124 |
-
|
|
|
|
|
|
|
| 125 |
|
| 126 |
-
|
| 127 |
-
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
"""
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""Speech-to-text.ipynb
|
| 3 |
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/1KH-Ype8YQshQHjpSPZKLzEt1Ms0WAzu0
|
| 8 |
+
|
| 9 |
+
## Install necessary libraries
|
| 10 |
+
|
| 11 |
+
### Subtask:
|
| 12 |
+
Install `gradio` and any other required libraries for the speech-to-text model.
|
| 13 |
+
|
| 14 |
+
**Reasoning**:
|
| 15 |
+
The first step is to install the necessary libraries, starting with `gradio`. I will also install `transformers` and `datasets` which are commonly used for speech-to-text models from the Hugging Face ecosystem.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
!pip install gradio transformers datasets
|
| 19 |
+
|
| 20 |
+
"""## Import libraries
|
| 21 |
+
|
| 22 |
+
### Subtask:
|
| 23 |
+
Import the necessary libraries for building the Gradio interface and the speech-to-text model.
|
| 24 |
|
| 25 |
+
**Reasoning**:
|
| 26 |
+
Import the necessary libraries for building the Gradio interface and the speech-to-text model.
|
| 27 |
+
"""
|
| 28 |
|
| 29 |
+
import gradio as gr
|
| 30 |
+
from transformers import pipeline
|
| 31 |
|
| 32 |
"""## Define the speech-to-text function
|
| 33 |
|
|
|
|
| 41 |
transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
| 42 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
| 43 |
sentiment = pipeline("sentiment-analysis", verbose = 0)
|
| 44 |
+
synthesizer = pipeline(model="suno/bark-small")
|
| 45 |
|
| 46 |
def transcribe_audio(audio_file_path):
|
| 47 |
"""
|
|
|
|
| 73 |
result = sentiment(text)[0]
|
| 74 |
return result['label'], result['score']
|
| 75 |
|
| 76 |
+
# prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
|
| 77 |
|
| 78 |
def text_to_speech(text):
|
| 79 |
"""
|
|
|
|
| 86 |
The audio output.
|
| 87 |
"""
|
| 88 |
audio_output = synthesizer(text)
|
| 89 |
+
return audio_output['audio'], audio_output['sampling_rate']
|
| 90 |
|
| 91 |
"""## Create the gradio interface
|
| 92 |
|
|
|
|
| 113 |
|
| 114 |
Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
|
| 115 |
|
|
|
|
| 116 |
|
| 117 |
+
|
| 118 |
+
# prompt: write a Text-to-Speech model through Gradio.
|
| 119 |
+
# Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
|
| 120 |
+
|
| 121 |
+
SpeechInterface = gr.Interface(
|
| 122 |
fn=text_to_speech,
|
| 123 |
+
inputs=gr.Textbox(label="Enter Text"),
|
| 124 |
+
outputs=gr.Audio(label="Synthesized Speech")
|
| 125 |
)
|
| 126 |
|
| 127 |
"""## Launch the gradio interface
|
|
|
|
| 134 |
"""
|
| 135 |
|
| 136 |
app = gr.TabbedInterface(
|
| 137 |
+
[Audinterface, Suminterface, Seminterface, SpeechInterface],
|
| 138 |
["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
|
| 139 |
)
|
| 140 |
|
| 141 |
app.launch()
|
| 142 |
|
| 143 |
+
from IPython.display import Audio
|
| 144 |
|
| 145 |
+
# Play the generated audio
|
| 146 |
+
Audio(audio, rate=sampling_rate)
|
| 147 |
|
| 148 |
+
# This is the corrected text_to_speech function for Gradio
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
def text_to_speech(text):
|
| 151 |
+
"""
|
| 152 |
+
Synthesizes text into speech.
|
| 153 |
|
| 154 |
+
Args:
|
| 155 |
+
text: The text string to synthesize.
|
| 156 |
|
| 157 |
+
Returns:
|
| 158 |
+
The audio output as a tuple of (sampling_rate, audio_array).
|
| 159 |
+
"""
|
| 160 |
+
try:
|
| 161 |
+
print(f"Attempting to synthesize text of length: {len(text)}")
|
| 162 |
+
audio_output = synthesizer(text)
|
| 163 |
+
print("Text synthesis successful.")
|
| 164 |
+
# Return the audio array and sampling rate as a tuple
|
| 165 |
+
return (audio_output['sampling_rate'], audio_output['audio'])
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(f"An error occurred during text synthesis: {e}")
|
| 168 |
+
raise e # Re-raise the exception so Gradio might show it
|
| 169 |
+
|
| 170 |
+
"""**Next Steps:**
|
| 171 |
+
|
| 172 |
+
1. **Execute the code cell above** to define the corrected `text_to_speech` function.
|
| 173 |
+
2. **Re-run the cell that launches the Gradio interface** (cell `9f75926a`).
|
| 174 |
+
|
| 175 |
+
After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
|
| 176 |
"""
|