File size: 4,494 Bytes
1f88245
a4c6844
 
1f88245
 
 
 
 
 
 
 
 
 
 
 
 
a4c6844
1f88245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4c6844
1f88245
 
 
 
 
 
 
 
 
 
 
 
a4c6844
1f88245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4c6844
 
 
 
 
1f88245
a4c6844
 
1f88245
 
 
 
 
 
 
 
 
 
 
 
a4c6844
1f88245
 
 
 
 
a4c6844
1f88245
a4c6844
 
1f88245
a4c6844
1f88245
a4c6844
 
 
1f88245
a4c6844
 
1f88245
a4c6844
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f88245
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

import gradio as gr
from transformers import pipeline

"""## Define the speech-to-text function

### Subtask:
Create a Python function that takes an audio file (MP3) as input and returns the transcribed text.

**Reasoning**:
Define a Python function that uses the `transformers` pipeline to transcribe an audio file.
"""

transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
sentiment =  pipeline("sentiment-analysis", verbose = 0)
synthesizer = pipeline(model="suno/bark-small")

def transcribe_audio(audio_file_path):
    """
    Transcribes an audio file using a speech-to-text model.

    Args:
        audio_file_path: The path to the audio file (MP3).

    Returns:
        The transcribed text as a string.
    """

    transcription = transcriber(audio_file_path)
    return transcription["text"]

def summarize_text(text):
  """Summarizes the input text using the loaded LLM summarizer.

  Args:
    text: The input text string to summarize.

  Returns:
    The summarized text string.
  """
  summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
  return summary[0]['summary_text']

def get_sentiment(text):
  result = sentiment(text)[0]
  return result['label'], result['score']

# prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.

def text_to_speech(text):
    """
    Synthesizes text into speech.

    Args:
        text: The text string to synthesize.

    Returns:
        The audio output.
    """
    audio_output = synthesizer(text)
    return audio_output['audio'], audio_output['sampling_rate']

"""## Create the gradio interface

### Subtask:
Use the `gradio` library to create a user interface with an audio input component and a text output component, linking them to the speech-to-text function.

**Reasoning**:
Create a Gradio interface linking the `transcribe_audio` function with an audio input and a textbox output.
"""

Audinterface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=gr.Textbox()
)

Suminterface = gr.Interface(
    fn=summarize_text,
    inputs=gr.Textbox(label="Input Text"),
    outputs=gr.Textbox(label="Summarized Text"),
    title="Text Summarization using LLM",
    description="Enter text to get a summarized version using a large language model."
)

Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])



# prompt: write a Text-to-Speech model through Gradio.
# Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.

SpeechInterface = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(label="Enter Text"),
    outputs=gr.Audio(label="Synthesized Speech")
)

"""## Launch the gradio interface

### Subtask:
Launch the Gradio application to make the interface accessible.

**Reasoning**:
Launch the Gradio interface using the `launch()` method.
"""

app = gr.TabbedInterface(
    [Audinterface, Suminterface, Seminterface, SpeechInterface],
    ["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
)

app.launch()

from IPython.display import Audio

# Play the generated audio
Audio(audio, rate=sampling_rate)

# This is the corrected text_to_speech function for Gradio

def text_to_speech(text):
    """
    Synthesizes text into speech.

    Args:
        text: The text string to synthesize.

    Returns:
        The audio output as a tuple of (sampling_rate, audio_array).
    """
    try:
        print(f"Attempting to synthesize text of length: {len(text)}")
        audio_output = synthesizer(text)
        print("Text synthesis successful.")
        # Return the audio array and sampling rate as a tuple
        return (audio_output['sampling_rate'], audio_output['audio'])
    except Exception as e:
        print(f"An error occurred during text synthesis: {e}")
        raise e # Re-raise the exception so Gradio might show it

"""**Next Steps:**

1.  **Execute the code cell above** to define the corrected `text_to_speech` function.
2.  **Re-run the cell that launches the Gradio interface** (cell `9f75926a`).

After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
"""