import gradio as gr import torch from transformers import AutoProcessor from tror_yong_asr import TrorYongASRModel, transcribe import numpy as np from scipy.signal import resample import os model_id = os.getenv("MODEL_ID") torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) model = TrorYongASRModel.from_pretrained(model_id) model.eval() @torch.no_grad() def transcribe_easier(filepath): output=transcribe(filepath, model, processor, max_tokens=1024) return output.text iface = gr.Interface( fn=transcribe_easier, inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"), outputs=gr.components.Textbox(), title="TrorYongASR-tiny", description="Realtime demo for Khmer speech recognition using TrorYongASR-tiny." ) iface.launch(share=False)