yalali commited on
Commit
a4c6844
·
verified ·
1 Parent(s): ba7d690

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -19
app.py CHANGED
@@ -1,8 +1,33 @@
 
 
1
 
2
- import gradio as gr
3
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
 
 
 
5
 
 
 
6
 
7
  """## Define the speech-to-text function
8
 
@@ -16,7 +41,7 @@ Define a Python function that uses the `transformers` pipeline to transcribe an
16
  transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
17
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
18
  sentiment = pipeline("sentiment-analysis", verbose = 0)
19
- synthesizer = pipeline("text-to-speech", "suno/bark")
20
 
21
  def transcribe_audio(audio_file_path):
22
  """
@@ -48,7 +73,7 @@ def get_sentiment(text):
48
  result = sentiment(text)[0]
49
  return result['label'], result['score']
50
 
51
- # prompt: write function text-to-speech using pipline
52
 
53
  def text_to_speech(text):
54
  """
@@ -61,7 +86,7 @@ def text_to_speech(text):
61
  The audio output.
62
  """
63
  audio_output = synthesizer(text)
64
- return audio_output
65
 
66
  """## Create the gradio interface
67
 
@@ -88,12 +113,15 @@ Suminterface = gr.Interface(
88
 
89
  Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
90
 
91
- # prompt: create gr for the text-to-speech function
92
 
93
- TTD_interface = gr.Interface(
 
 
 
 
94
  fn=text_to_speech,
95
- inputs=gr.Textbox(label="Enter the text to synthesize"),
96
- outputs=gr.Audio()
97
  )
98
 
99
  """## Launch the gradio interface
@@ -106,24 +134,43 @@ Launch the Gradio interface using the `launch()` method.
106
  """
107
 
108
  app = gr.TabbedInterface(
109
- [Audinterface, Suminterface, Seminterface, TTD_interface],
110
  ["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
111
  )
112
 
113
  app.launch()
114
 
115
- """## Summary:
116
 
117
- ### Data Analysis Key Findings
 
118
 
119
- * The necessary libraries (`gradio`, `transformers`, `datasets`) for building the speech-to-text Gradio application were successfully installed.
120
- * A Python function `transcribe_audio` was defined to perform speech-to-text transcription using the "facebook/wav2vec2-base-960h" model from the `transformers` library.
121
- * A Gradio interface was successfully created, linking the `transcribe_audio` function to an audio file input component (`gr.Audio(type="filepath")`) and a text output component (`gr.Textbox()`).
122
- * The Gradio application was successfully launched and is accessible via a public URL.
123
 
124
- ### Insights or Next Steps
 
 
125
 
126
- * The current implementation uses a specific pre-trained model. Future steps could explore using different or fine-tuned models to potentially improve transcription accuracy.
127
- * Consider adding error handling to the `transcribe_audio` function to manage cases of invalid file types or transcription errors.
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  """
 
1
+ # -*- coding: utf-8 -*-
2
+ """Speech-to-text.ipynb
3
 
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1KH-Ype8YQshQHjpSPZKLzEt1Ms0WAzu0
8
+
9
+ ## Install necessary libraries
10
+
11
+ ### Subtask:
12
+ Install `gradio` and any other required libraries for the speech-to-text model.
13
+
14
+ **Reasoning**:
15
+ The first step is to install the necessary libraries, starting with `gradio`. I will also install `transformers` and `datasets` which are commonly used for speech-to-text models from the Hugging Face ecosystem.
16
+ """
17
+
18
+ !pip install gradio transformers datasets
19
+
20
+ """## Import libraries
21
+
22
+ ### Subtask:
23
+ Import the necessary libraries for building the Gradio interface and the speech-to-text model.
24
 
25
+ **Reasoning**:
26
+ Import the necessary libraries for building the Gradio interface and the speech-to-text model.
27
+ """
28
 
29
+ import gradio as gr
30
+ from transformers import pipeline
31
 
32
  """## Define the speech-to-text function
33
 
 
41
  transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
42
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
43
  sentiment = pipeline("sentiment-analysis", verbose = 0)
44
+ synthesizer = pipeline(model="suno/bark-small")
45
 
46
  def transcribe_audio(audio_file_path):
47
  """
 
73
  result = sentiment(text)[0]
74
  return result['label'], result['score']
75
 
76
+ # prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
77
 
78
  def text_to_speech(text):
79
  """
 
86
  The audio output.
87
  """
88
  audio_output = synthesizer(text)
89
+ return audio_output['audio'], audio_output['sampling_rate']
90
 
91
  """## Create the gradio interface
92
 
 
113
 
114
  Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
115
 
 
116
 
117
+
118
+ # prompt: write a Text-to-Speech model through Gradio.
119
+ # Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
120
+
121
+ SpeechInterface = gr.Interface(
122
  fn=text_to_speech,
123
+ inputs=gr.Textbox(label="Enter Text"),
124
+ outputs=gr.Audio(label="Synthesized Speech")
125
  )
126
 
127
  """## Launch the gradio interface
 
134
  """
135
 
136
  app = gr.TabbedInterface(
137
+ [Audinterface, Suminterface, Seminterface, SpeechInterface],
138
  ["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
139
  )
140
 
141
  app.launch()
142
 
143
+ from IPython.display import Audio
144
 
145
+ # Play the generated audio
146
+ Audio(audio, rate=sampling_rate)
147
 
148
+ # This is the corrected text_to_speech function for Gradio
 
 
 
149
 
150
+ def text_to_speech(text):
151
+ """
152
+ Synthesizes text into speech.
153
 
154
+ Args:
155
+ text: The text string to synthesize.
156
 
157
+ Returns:
158
+ The audio output as a tuple of (sampling_rate, audio_array).
159
+ """
160
+ try:
161
+ print(f"Attempting to synthesize text of length: {len(text)}")
162
+ audio_output = synthesizer(text)
163
+ print("Text synthesis successful.")
164
+ # Return the audio array and sampling rate as a tuple
165
+ return (audio_output['sampling_rate'], audio_output['audio'])
166
+ except Exception as e:
167
+ print(f"An error occurred during text synthesis: {e}")
168
+ raise e # Re-raise the exception so Gradio might show it
169
+
170
+ """**Next Steps:**
171
+
172
+ 1. **Execute the code cell above** to define the corrected `text_to_speech` function.
173
+ 2. **Re-run the cell that launches the Gradio interface** (cell `9f75926a`).
174
+
175
+ After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
176
  """