Spaces:

botario
/

wave2vec_luxembourgish

Sleeping

App Files Files Community

wave2vec_luxembourgish / app.py

botarioAcc

update app.py

85eedc6 verified 7 months ago

raw

history blame contribute delete

2.51 kB

	import spaces
	import time
	import os

	import torch
	import gradio as gr
	from transformers.pipelines import pipeline
	import utils

	from config import (
	MODEL_PATHS,
	SUPPORTED_LANGUAGES,
	CUSTOM_CSS,
	)

	# set language here: available are en, de and lb
	LANGUAGE = "lb"
	if LANGUAGE not in SUPPORTED_LANGUAGES:
	print(f"language ({LANGUAGE}) not supported. Use one of {SUPPORTED_LANGUAGES}")
	exit()
	else:
	MODEL_PATH = MODEL_PATHS[LANGUAGE]

	_asr_pipeline = None

	@spaces.GPU
	def transcribe_gradio(audio_path: str \| None) -> str:
	if not audio_path:
	return "⚠️ Please record something or choose a file first."

	global _asr_pipeline

	if _asr_pipeline is None:

	_asr_pipeline = pipeline(
	"automatic-speech-recognition",
	model=MODEL_PATH,
	device=0 if torch.cuda.is_available() else -1,
	chunk_length_s=30,
	stride_length_s=(4, 2),
	batch_size=8,
	token=os.getenv("HF_TOKEN"),
	)

	start = time.time()
	try:
	result = _asr_pipeline(audio_path)
	transcript = result["text"] if isinstance(result, dict) else str(result)
	except Exception as err:
	return f"❌ {err}"
	runtime = time.time() - start

	return f"{transcript}\n\n⌛ Inference time: {runtime:.2f} s"

	# gradio interface
	with gr.Blocks(title="Wave2Vec (Luxembourgish) ", theme="soft", css=CUSTOM_CSS) as demo:
	gr.Markdown("""
	# 🎙️ Speech-to-Text Demo — Wave2Vec (Luxembourgish)
	Use Record to capture speech live or Upload to select an audio file (.wav, .mp3, .flac).
	Hit Transcribe to convert your recording into text, and Clear to reset both fields.
	""")

	with gr.Row():
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Input audio",
	autoplay=False,
	)
	output_text = gr.Textbox(
	label="Transcript",
	placeholder="Your transcript will appear here …",
	show_copy_button=True,
	lines=10,
	)

	with gr.Row(equal_height=True, elem_classes="centered-row") as row:
	transcribe_btn = gr.Button("Transcribe ✨", scale=0)
	clear_btn = gr.ClearButton(
	[audio_input, output_text], scale=0, elem_classes="clear-btn"
	)

	transcribe_btn.click(transcribe_gradio, inputs=audio_input, outputs=output_text)


	if __name__ == "__main__":
	demo.launch()