Spaces:

yalali
/

Week2_Assig1

Sleeping

File size: 4,494 Bytes


import gradio as gr
from transformers import pipeline

"""## Define the speech-to-text function

### Subtask:
Create a Python function that takes an audio file (MP3) as input and returns the transcribed text.

**Reasoning**:
Define a Python function that uses the `transformers` pipeline to transcribe an audio file.
"""

transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
sentiment =  pipeline("sentiment-analysis", verbose = 0)
synthesizer = pipeline(model="suno/bark-small")

def transcribe_audio(audio_file_path):
    """
    Transcribes an audio file using a speech-to-text model.

    Args:
        audio_file_path: The path to the audio file (MP3).

    Returns:
        The transcribed text as a string.
    """

    transcription = transcriber(audio_file_path)
    return transcription["text"]

def summarize_text(text):
  """Summarizes the input text using the loaded LLM summarizer.

  Args:
    text: The input text string to summarize.

  Returns:
    The summarized text string.
  """
  summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
  return summary[0]['summary_text']

def get_sentiment(text):
  result = sentiment(text)[0]
  return result['label'], result['score']

# prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.

def text_to_speech(text):
    """
    Synthesizes text into speech.

    Args:
        text: The text string to synthesize.

    Returns:
        The audio output.
    """
    audio_output = synthesizer(text)
    return audio_output['audio'], audio_output['sampling_rate']

"""## Create the gradio interface

### Subtask:
Use the `gradio` library to create a user interface with an audio input component and a text output component, linking them to the speech-to-text function.

**Reasoning**:
Create a Gradio interface linking the `transcribe_audio` function with an audio input and a textbox output.
"""

Audinterface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.Textbox()
)

Suminterface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(label="Input Text"),
    outputs=gr.Textbox(label="Summarized Text"),
    title="Text Summarization using LLM",
    description="Enter text to get a summarized version using a large language model."
)

Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])



# prompt: write a Text-to-Speech model through Gradio.
# Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.

SpeechInterface = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(label="Enter Text"),
    outputs=gr.Audio(label="Synthesized Speech")
)

"""## Launch the gradio interface

### Subtask:
Launch the Gradio application to make the interface accessible.

**Reasoning**:
Launch the Gradio interface using the `launch()` method.
"""

app = gr.TabbedInterface(
    [Audinterface, Suminterface, Seminterface, SpeechInterface],
    ["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
)

app.launch()

from IPython.display import Audio

# Play the generated audio
Audio(audio, rate=sampling_rate)

# This is the corrected text_to_speech function for Gradio

def text_to_speech(text):
    """
    Synthesizes text into speech.

    Args:
        text: The text string to synthesize.

    Returns:
        The audio output as a tuple of (sampling_rate, audio_array).
    """
    try:
        print(f"Attempting to synthesize text of length: {len(text)}")
        audio_output = synthesizer(text)
        print("Text synthesis successful.")
        # Return the audio array and sampling rate as a tuple
        return (audio_output['sampling_rate'], audio_output['audio'])
    except Exception as e:
        print(f"An error occurred during text synthesis: {e}")
        raise e # Re-raise the exception so Gradio might show it

"""**Next Steps:**

1.  **Execute the code cell above** to define the corrected `text_to_speech` function.
2.  **Re-run the cell that launches the Gradio interface** (cell `9f75926a`).

After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
"""