Spaces:

arevedudaa
/

speechorwhat

Paused

speechorwhat / app.py

Create app.py

34c0dbd verified about 1 year ago

1.57 kB

	import gradio as gr
	import whisper
	import torch

	# Load the Whisper base model
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = whisper.load_model("base", device=device)

	def transcribe(audio):
	# Load and process the audio file
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	# Detect the language of the audio
	_, probs = model.detect_language(mel)
	language = max(probs, key=probs.get)
	print(f"Detected language: {language}")

	# Transcribe the audio
	options = whisper.DecodingOptions(fp16=torch.cuda.is_available())
	result = whisper.decode(model, mel, options)

	return result.text

	# Create the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("## Multilingual Speech-to-Text Transcription")
	with gr.Tab("Upload Audio"):
	audio_file = gr.Audio(source="upload", type="filepath", label="Upload your audio file")
	transcribe_button = gr.Button("Transcribe")
	transcription_output = gr.Textbox(label="Transcription")
	with gr.Tab("Record Audio"):
	audio_record = gr.Audio(source="microphone", type="filepath", label="Record your audio")
	record_button = gr.Button("Transcribe")
	record_output = gr.Textbox(label="Transcription")

	# Define button actions
	transcribe_button.click(transcribe, inputs=audio_file, outputs=transcription_output)
	record_button.click(transcribe, inputs=audio_record, outputs=record_output)

	if __name__ == "__main__":
	demo.launch()