Spaces:

yalali
/

Week2_Assig1

Sleeping

App Files Files Community

yalali commited on Jun 30, 2025

Commit

a4c6844

verified ·

1 Parent(s): ba7d690

Upload app.py

Browse files

Files changed (1) hide show

app.py +66 -19

app.py CHANGED Viewed

@@ -1,8 +1,33 @@
-import gradio as gr
-from transformers import pipeline
 """## Define the speech-to-text function
@@ -16,7 +41,7 @@ Define a Python function that uses the `transformers` pipeline to transcribe an
 transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 sentiment =  pipeline("sentiment-analysis", verbose = 0)
-synthesizer = pipeline("text-to-speech", "suno/bark")
 def transcribe_audio(audio_file_path):
     """
@@ -48,7 +73,7 @@ def get_sentiment(text):
   result = sentiment(text)[0]
   return result['label'], result['score']
-# prompt: write function text-to-speech using pipline
 def text_to_speech(text):
     """
@@ -61,7 +86,7 @@ def text_to_speech(text):
         The audio output.
     """
     audio_output = synthesizer(text)
-    return audio_output
 """## Create the gradio interface
@@ -88,12 +113,15 @@ Suminterface = gr.Interface(
 Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
-# prompt: create gr for the text-to-speech function
-TTD_interface = gr.Interface(
     fn=text_to_speech,
-    inputs=gr.Textbox(label="Enter the text to synthesize"),
-    outputs=gr.Audio()
 )
 """## Launch the gradio interface
@@ -106,24 +134,43 @@ Launch the Gradio interface using the `launch()` method.
 """
 app = gr.TabbedInterface(
-    [Audinterface, Suminterface, Seminterface, TTD_interface],
     ["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
 )
 app.launch()
-"""## Summary:
-### Data Analysis Key Findings
-*   The necessary libraries (`gradio`, `transformers`, `datasets`) for building the speech-to-text Gradio application were successfully installed.
-*   A Python function `transcribe_audio` was defined to perform speech-to-text transcription using the "facebook/wav2vec2-base-960h" model from the `transformers` library.
-*   A Gradio interface was successfully created, linking the `transcribe_audio` function to an audio file input component (`gr.Audio(type="filepath")`) and a text output component (`gr.Textbox()`).
-*   The Gradio application was successfully launched and is accessible via a public URL.
-### Insights or Next Steps
-*   The current implementation uses a specific pre-trained model. Future steps could explore using different or fine-tuned models to potentially improve transcription accuracy.
-*   Consider adding error handling to the `transcribe_audio` function to manage cases of invalid file types or transcription errors.
 """

+# -*- coding: utf-8 -*-
+"""Speech-to-text.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1KH-Ype8YQshQHjpSPZKLzEt1Ms0WAzu0
+## Install necessary libraries
+### Subtask:
+Install `gradio` and any other required libraries for the speech-to-text model.
+**Reasoning**:
+The first step is to install the necessary libraries, starting with `gradio`. I will also install `transformers` and `datasets` which are commonly used for speech-to-text models from the Hugging Face ecosystem.
+"""
+!pip install gradio transformers datasets
+"""## Import libraries
+### Subtask:
+Import the necessary libraries for building the Gradio interface and the speech-to-text model.
+**Reasoning**:
+Import the necessary libraries for building the Gradio interface and the speech-to-text model.
+"""
+import gradio as gr
+from transformers import pipeline
 """## Define the speech-to-text function
 transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
 summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 sentiment =  pipeline("sentiment-analysis", verbose = 0)
+synthesizer = pipeline(model="suno/bark-small")
 def transcribe_audio(audio_file_path):
     """
   result = sentiment(text)[0]
   return result['label'], result['score']
+# prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
 def text_to_speech(text):
     """
         The audio output.
     """
     audio_output = synthesizer(text)
+    return audio_output['audio'], audio_output['sampling_rate']
 """## Create the gradio interface
 Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])
+# prompt: write a Text-to-Speech model through Gradio.
+# Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.
+SpeechInterface = gr.Interface(
     fn=text_to_speech,
+    inputs=gr.Textbox(label="Enter Text"),
+    outputs=gr.Audio(label="Synthesized Speech")
 )
 """## Launch the gradio interface
 """
 app = gr.TabbedInterface(
+    [Audinterface, Suminterface, Seminterface, SpeechInterface],
     ["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
 )
 app.launch()
+from IPython.display import Audio
+# Play the generated audio
+Audio(audio, rate=sampling_rate)
+# This is the corrected text_to_speech function for Gradio
+def text_to_speech(text):
+    """
+    Synthesizes text into speech.
+    Args:
+        text: The text string to synthesize.
+    Returns:
+        The audio output as a tuple of (sampling_rate, audio_array).
+    """
+    try:
+        print(f"Attempting to synthesize text of length: {len(text)}")
+        audio_output = synthesizer(text)
+        print("Text synthesis successful.")
+        # Return the audio array and sampling rate as a tuple
+        return (audio_output['sampling_rate'], audio_output['audio'])
+    except Exception as e:
+        print(f"An error occurred during text synthesis: {e}")
+        raise e # Re-raise the exception so Gradio might show it
+"""**Next Steps:**
+1.  **Execute the code cell above** to define the corrected `text_to_speech` function.
+2.  **Re-run the cell that launches the Gradio interface** (cell `9f75926a`).
+After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
 """