from __future__ import annotations import importlib.util import os import time from pathlib import Path from typing import Callable, List import gradio as gr from libs.asr import predict def load_inverse_text_function() -> Callable[[str], str]: inverse_file = Path("./libs/inverse-text.py") if not inverse_file.exists(): raise FileNotFoundError(f"could not find inverse text file: {inverse_file}") spec = importlib.util.spec_from_file_location( "inverse_text_module", str(inverse_file), ) if spec is None or spec.loader is None: raise ImportError(f"could not load inverse text module from: {inverse_file}") module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) if not hasattr(module, "InverseText"): raise AttributeError(f"{inverse_file} must contain function InverseText(text)") return module.InverseText InverseText = load_inverse_text_function() def load_audio_examples(test_dir: str = "./test") -> List[List[str]]: audio_extensions = { ".wav", ".mp3", ".flac", ".ogg", ".m4a", ".aac", ".webm", ".opus", } folder = Path(test_dir) if not folder.exists(): return [] examples = [] for file_path in sorted(folder.rglob("*")): if file_path.is_file() and file_path.suffix.lower() in audio_extensions: examples.append([str(file_path)]) return examples def transcribe_easier(filepath: str): if not filepath: return "", "", "0 ms" start_time = time.perf_counter() result = predict(filepath) execute_time_ms = (time.perf_counter() - start_time) * 1000 if not result.get("success"): error = result.get("error", "unknown error") return f"ASR error: {error}", "", f"{execute_time_ms:.2f} ms" transcribe_text = result.get("transcription", "") or "" try: inverse_text = InverseText(transcribe_text) except Exception as error: inverse_text = f"InverseText error: {error}" return transcribe_text, inverse_text, f"{execute_time_ms:.2f} ms" examples = load_audio_examples("./test") iface = gr.Interface( fn=transcribe_easier, inputs=gr.Audio( sources=["upload", "microphone"], type="filepath", label="audio", ), outputs=[ gr.Textbox(label="transcribe", lines=4), gr.Textbox(label="inverse text", lines=4), gr.Textbox(label="execute time"), ], examples=examples, cache_examples=False, title="Infinity Khmer ASR", description="Infinity Khmer ASR demo for Khmer speech recognition develop by @ លោក សយ វិទូ​ was trained with 200 hours", ) if __name__ == "__main__": iface.launch( server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"), server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")), share=False, )