SoyVitou's picture
Update app.py
4c7a6bf verified
from __future__ import annotations
import importlib.util
import os
import time
from pathlib import Path
from typing import Callable, List
import gradio as gr
from libs.asr import predict
def load_inverse_text_function() -> Callable[[str], str]:
inverse_file = Path("./libs/inverse-text.py")
if not inverse_file.exists():
raise FileNotFoundError(f"could not find inverse text file: {inverse_file}")
spec = importlib.util.spec_from_file_location(
"inverse_text_module",
str(inverse_file),
)
if spec is None or spec.loader is None:
raise ImportError(f"could not load inverse text module from: {inverse_file}")
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
if not hasattr(module, "InverseText"):
raise AttributeError(f"{inverse_file} must contain function InverseText(text)")
return module.InverseText
InverseText = load_inverse_text_function()
def load_audio_examples(test_dir: str = "./test") -> List[List[str]]:
audio_extensions = {
".wav",
".mp3",
".flac",
".ogg",
".m4a",
".aac",
".webm",
".opus",
}
folder = Path(test_dir)
if not folder.exists():
return []
examples = []
for file_path in sorted(folder.rglob("*")):
if file_path.is_file() and file_path.suffix.lower() in audio_extensions:
examples.append([str(file_path)])
return examples
def transcribe_easier(filepath: str):
if not filepath:
return "", "", "0 ms"
start_time = time.perf_counter()
result = predict(filepath)
execute_time_ms = (time.perf_counter() - start_time) * 1000
if not result.get("success"):
error = result.get("error", "unknown error")
return f"ASR error: {error}", "", f"{execute_time_ms:.2f} ms"
transcribe_text = result.get("transcription", "") or ""
try:
inverse_text = InverseText(transcribe_text)
except Exception as error:
inverse_text = f"InverseText error: {error}"
return transcribe_text, inverse_text, f"{execute_time_ms:.2f} ms"
examples = load_audio_examples("./test")
iface = gr.Interface(
fn=transcribe_easier,
inputs=gr.Audio(
sources=["upload", "microphone"],
type="filepath",
label="audio",
),
outputs=[
gr.Textbox(label="transcribe", lines=4),
gr.Textbox(label="inverse text", lines=4),
gr.Textbox(label="execute time"),
],
examples=examples,
cache_examples=False,
title="Infinity Khmer ASR",
description="Infinity Khmer ASR demo for Khmer speech recognition develop by @ αž›αŸ„αž€ αžŸαž™ αžœαž·αž‘αžΌβ€‹ was trained with 200 hours",
)
if __name__ == "__main__":
iface.launch(
server_name=os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")),
share=False,
)