Spaces:

faraimupfuti
/

Real_time_speech_recognition

Sleeping

initial commit

817f275 verified 7 months ago

1.4 kB

	# Import necessary libraries
	import gradio as gr
	import numpy as np

	# Define the function to transcribe audio
	def transcribe(audio):
	sr, y = audio

	# Convert to mono if stereo
	if y.ndim > 1:
	y = y.mean(axis=1)

	y = y.astype(np.float32)
	y /= np.max(np.abs(y))
	# Placeholder for the actual transcription logic
	return "Transcribed text: " + " ".join([str(i) for i in y[:10]])

	# Create a Gradio interface for full-context ASR
	demo_full_context = gr.Interface(
	transcribe,
	gr.Audio(sources="microphone"),
	"text",
	)

	# Define the function to transcribe streaming audio
	def transcribe_stream(stream, new_chunk):
	sr, y = new_chunk

	# Convert to mono if stereo
	if y.ndim > 1:
	y = y.mean(axis=1)

	y = y.astype(np.float32)
	y /= np.max(np.abs(y))
	if stream is not None:
	stream = np.concatenate([stream, y])
	else:
	stream = y
	# Placeholder for the actual transcription logic
	return stream, "Transcribed text: " + " ".join([str(i) for i in stream[:10]])

	# Create a Gradio interface for streaming ASR
	demo_streaming = gr.Interface(
	transcribe_stream,
	["state", gr.Audio(sources=["microphone"], streaming=True)],
	["state", "text"],
	live=True,
	)

	# Launch the interfaces
	if __name__ == "__main__":
	demo_full_context.launch(show_error=True)
	demo_streaming.launch()