Spaces:

SoyVitou
/

Infinity-Khmer-ASR

Sleeping

App Files Files Community

Infinity-Khmer-ASR / app.py

SoyVitou

Update app.py

4c7a6bf verified 19 days ago

raw

history blame contribute delete

2.96 kB

	from __future__ import annotations

	import importlib.util
	import os
	import time
	from pathlib import Path
	from typing import Callable, List

	import gradio as gr

	from libs.asr import predict


	def load_inverse_text_function() -> Callable[[str], str]:
	inverse_file = Path("./libs/inverse-text.py")

	if not inverse_file.exists():
	raise FileNotFoundError(f"could not find inverse text file: {inverse_file}")

	spec = importlib.util.spec_from_file_location(
	"inverse_text_module",
	str(inverse_file),
	)

	if spec is None or spec.loader is None:
	raise ImportError(f"could not load inverse text module from: {inverse_file}")

	module = importlib.util.module_from_spec(spec)
	spec.loader.exec_module(module)

	if not hasattr(module, "InverseText"):
	raise AttributeError(f"{inverse_file} must contain function InverseText(text)")

	return module.InverseText


	InverseText = load_inverse_text_function()


	def load_audio_examples(test_dir: str = "./test") -> List[List[str]]:
	audio_extensions = {
	".wav",
	".mp3",
	".flac",
	".ogg",
	".m4a",
	".aac",
	".webm",
	".opus",
	}

	folder = Path(test_dir)

	if not folder.exists():
	return []

	examples = []

	for file_path in sorted(folder.rglob("*")):
	if file_path.is_file() and file_path.suffix.lower() in audio_extensions:
	examples.append([str(file_path)])

	return examples


	def transcribe_easier(filepath: str):
	if not filepath:
	return "", "", "0 ms"

	start_time = time.perf_counter()

	result = predict(filepath)

	execute_time_ms = (time.perf_counter() - start_time) * 1000

	if not result.get("success"):
	error = result.get("error", "unknown error")
	return f"ASR error: {error}", "", f"{execute_time_ms:.2f} ms"

	transcribe_text = result.get("transcription", "") or ""

	try:
	inverse_text = InverseText(transcribe_text)
	except Exception as error:
	inverse_text = f"InverseText error: {error}"

	return transcribe_text, inverse_text, f"{execute_time_ms:.2f} ms"


	examples = load_audio_examples("./test")


	iface = gr.Interface(
	fn=transcribe_easier,
	inputs=gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="audio",
	),
	outputs=[
	gr.Textbox(label="transcribe", lines=4),
	gr.Textbox(label="inverse text", lines=4),
	gr.Textbox(label="execute time"),
	],
	examples=examples,
	cache_examples=False,
	title="Infinity Khmer ASR",
	description="Infinity Khmer ASR demo for Khmer speech recognition develop by @ លោក សយ វិទូ was trained with 200 hours",
	)


	if __name__ == "__main__":
	iface.launch(
	server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
	server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")),
	share=False,
	)