Spaces:

Brightsun10
/

voice-recognition

Sleeping

App Files Files Community

voice-recognition / app.py

Brightsun10

Update app.py

ad8fa51 verified 10 months ago

raw

history blame contribute delete

2.79 kB

	import gradio as gr
	import whisper
	import torch
	import time

	# --- MODEL INITIALIZATION ---

	# Check for GPU availability
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# Load the Whisper model.
	# "base" is a good starting point. For higher accuracy, you can use "medium" or "large",
	# but they require more resources.
	print("Loading Whisper model...")
	model = whisper.load_model("base", device=device)
	print("Whisper model loaded successfully.")


	# --- TRANSCRIPTION FUNCTION ---

	def transcribe_audio(microphone_input, file_input):
	"""
	Transcribes audio from either a microphone recording or an uploaded file.

	Args:
	microphone_input (tuple or None): Audio data from the microphone.
	file_input (str or None): Path to the uploaded audio file.

	Returns:
	str: The transcribed text.
	"""
	# Determine the input source
	if microphone_input is not None:
	audio_source = microphone_input
	elif file_input is not None:
	audio_source = file_input
	else:
	return "No audio source provided. Please record or upload an audio file."

	# Perform the transcription
	try:
	# The transcribe function returns a dictionary with the text
	result = model.transcribe(audio_source)
	transcription = result["text"]
	return transcription
	except Exception as e:
	return f"An error occurred during transcription: {e}"


	# --- GRADIO INTERFACE ---

	# Use gr.Blocks for more complex layouts and custom styling
	with gr.Blocks(css="assets/style.css", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🎙️ Voice Recognition")
	gr.Markdown(
	"This application uses OpenAI's Whisper model to transcribe speech to text. "
	"You can either record audio directly from your microphone or upload an audio file."
	)

	with gr.Row(elem_classes="audio-container"):
	with gr.Column():
	# Microphone input
	mic_input = gr.Audio(sources=["microphone"], type="filepath", label="Record from Microphone")

	# File upload input
	file_upload = gr.Audio(sources=["upload"], type="filepath", label="Upload Audio File")

	# Transcribe Button
	transcribe_button = gr.Button("Transcribe Audio")

	# Transcription Output
	output_text = gr.Textbox(
	lines=10,
	label="Transcription Result",
	placeholder="Your transcribed text will appear here...",
	elem_id="transcription_output"
	)

	# Define the action for the button click
	transcribe_button.click(
	fn=transcribe_audio,
	inputs=[mic_input, file_upload],
	outputs=output_text
	)

	# Launch the application
	if __name__ == "__main__":
	demo.launch(debug=True)