Spaces:

NLPV
/

BihariVernacular

Sleeping

App Files Files Community

BihariVernacular / app.py

NLPV

Update app.py

3169e8f verified 9 months ago

raw

history blame contribute delete

2.74 kB

	import torch
	import os
	import gradio as gr
	from deep_translator import GoogleTranslator
	import whisper

	# Check if NVIDIA GPU is available
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# Directories for transcripts
	BASE_DIR = os.getcwd()
	TRANSCRIPTS_FOLDER = os.path.join(BASE_DIR, 'transcripts')

	# Ensure transcripts directory exists
	def check_directory(path):
	if not os.path.exists(path):
	os.makedirs(path)

	check_directory(TRANSCRIPTS_FOLDER)

	def live_transcribe_and_translate(stream, selected_language, model_type="base"):
	"""
	Transcribe live audio using Whisper and translate it into English if required.

	:param stream: Stream of live audio data
	:param selected_language: Language code for transcription
	:param model_type: Whisper model type (default is 'base')
	:return: Transcription and translation
	"""
	try:
	# Load the Whisper model based on user selection
	model = whisper.load_model(model_type, device=DEVICE)
	except Exception as e:
	return f"Failed to load Whisper model ({model_type}): {e}"

	# Prepare audio processor
	audio_processor = whisper.audio.AudioProcessor(model, streaming=True)

	translated_text = []
	transcript_file = os.path.join(TRANSCRIPTS_FOLDER, 'live_transcript.txt')
	with open(transcript_file, 'w', encoding='utf-8') as text_file:
	for chunk in stream:
	result = audio_processor.transcribe(chunk, return_timestamps=True)
	for segment in result['segments']:
	start_time = segment['start']
	end_time = segment['end']
	text = segment['text']
	text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text}\n")
	if selected_language in ['nl']:
	text_en = GoogleTranslator(source='auto', target='en').translate(text)
	translated_text.append(f"[{start_time:.2f} - {end_time:.2f}] {text_en}")
	text_file.write(f"[{start_time:.2f} - {end_time:.2f}] {text_en}\n")

	return "\n".join(translated_text) if translated_text else "Live transcription completed."

	# Define the Gradio interface
	interface = gr.Interface(
	fn=live_transcribe_and_translate,
	inputs=[
	gr.Audio(type="numpy", label="Upload Audio"), # Adjusted for pre-recorded or in-memory audio
	gr.Dropdown(label="Select Language", choices=["nl", "en"], value="en"),
	gr.Dropdown(label="Select Model Type", choices=["tiny", "base", "small", "medium", "large"], value="base")
	],
	outputs="text",
	title="Live Transcription and Translation"
	)


	if __name__ == '__main__':
	# Launch the Gradio interface
	interface.launch()