Spaces:

yalali
/

Week2_Assig1

Sleeping

App Files Files Community

Week2_Assig1 / app.py

yalali

Upload app.py

934436b verified 8 months ago

raw

history blame contribute delete

4.49 kB


	import gradio as gr
	from transformers import pipeline

	"""## Define the speech-to-text function

	### Subtask:
	Create a Python function that takes an audio file (MP3) as input and returns the transcribed text.

	Reasoning:
	Define a Python function that uses the `transformers` pipeline to transcribe an audio file.
	"""

	transcriber = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
	sentiment = pipeline("sentiment-analysis", verbose = 0)
	synthesizer = pipeline(model="suno/bark-small")

	def transcribe_audio(audio_file_path):
	"""
	Transcribes an audio file using a speech-to-text model.

	Args:
	audio_file_path: The path to the audio file (MP3).

	Returns:
	The transcribed text as a string.
	"""

	transcription = transcriber(audio_file_path)
	return transcription["text"]

	def summarize_text(text):
	"""Summarizes the input text using the loaded LLM summarizer.

	Args:
	text: The input text string to summarize.

	Returns:
	The summarized text string.
	"""
	summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
	return summary[0]['summary_text']

	def get_sentiment(text):
	result = sentiment(text)[0]
	return result['label'], result['score']

	# prompt: text-to-speach Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.

	def text_to_speech(text):
	"""
	Synthesizes text into speech.

	Args:
	text: The text string to synthesize.

	Returns:
	The audio output.
	"""
	audio_output = synthesizer(text)
	return audio_output['audio'], audio_output['sampling_rate']

	"""## Create the gradio interface

	### Subtask:
	Use the `gradio` library to create a user interface with an audio input component and a text output component, linking them to the speech-to-text function.

	Reasoning:
	Create a Gradio interface linking the `transcribe_audio` function with an audio input and a textbox output.
	"""

	Audinterface = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.Textbox()
	)

	Suminterface = gr.Interface(
	fn=summarize_text,
	inputs=gr.Textbox(label="Input Text"),
	outputs=gr.Textbox(label="Summarized Text"),
	title="Text Summarization using LLM",
	description="Enter text to get a summarized version using a large language model."
	)

	Seminterface = gr.Interface(fn=get_sentiment, inputs=gr.Textbox(label="enter the review"), outputs=[gr.Textbox(label="sentiment"), gr.Number(label="score")])



	# prompt: write a Text-to-Speech model through Gradio.
	# Allow users to input a text and turn it to a voice. This is a prototype to show better web accessibility.

	SpeechInterface = gr.Interface(
	fn=text_to_speech,
	inputs=gr.Textbox(label="Enter Text"),
	outputs=gr.Audio(label="Synthesized Speech")
	)

	"""## Launch the gradio interface

	### Subtask:
	Launch the Gradio application to make the interface accessible.

	Reasoning:
	Launch the Gradio interface using the `launch()` method.
	"""

	app = gr.TabbedInterface(
	[Audinterface, Suminterface, Seminterface, SpeechInterface],
	["Audio Transcription", "Text Summarization", "Sentiment Analysis", "Text-to-Speech"]
	)

	app.launch()

	from IPython.display import Audio

	# Play the generated audio
	Audio(audio, rate=sampling_rate)

	# This is the corrected text_to_speech function for Gradio

	def text_to_speech(text):
	"""
	Synthesizes text into speech.

	Args:
	text: The text string to synthesize.

	Returns:
	The audio output as a tuple of (sampling_rate, audio_array).
	"""
	try:
	print(f"Attempting to synthesize text of length: {len(text)}")
	audio_output = synthesizer(text)
	print("Text synthesis successful.")
	# Return the audio array and sampling rate as a tuple
	return (audio_output['sampling_rate'], audio_output['audio'])
	except Exception as e:
	print(f"An error occurred during text synthesis: {e}")
	raise e # Re-raise the exception so Gradio might show it

	"""Next Steps:

	1. Execute the code cell above to define the corrected `text_to_speech` function.
	2. Re-run the cell that launches the Gradio interface (cell `9f75926a`).

	After these steps, when you input text into the "Text-to-Speech" tab in the Gradio interface, you should see and be able to play the synthesized audio.
	"""