TrorYongASR-Demo

Running

File size: 884 Bytes

bef3669
 
7c48bda
 
b03f9b4
a71317f
db288c0
bef3669
7c48bda
db288c0
bef3669
5a3a86d
bef3669
7c48bda
b1ab0c4
8b2ff9f
bef3669
c6bbc6b
939c017
bee2ee7
d7ffb81
c6bbc6b
bef3669
939c017
c6bbc6b
939c017
7c48bda
 
bef3669

import gradio as gr
import torch
from transformers import AutoProcessor
from tror_yong_asr import TrorYongASRModel, transcribe
import numpy as np
from scipy.signal import resample
import os


model_id = os.getenv("MODEL_ID")

torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model = TrorYongASRModel.from_pretrained(model_id)
model.eval()

@torch.no_grad()
def transcribe_easier(filepath):
    output=transcribe(filepath, model, processor, max_tokens=1024)
    return output.text

iface = gr.Interface(
    fn=transcribe_easier,
    inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
    outputs=gr.components.Textbox(),
    title="TrorYongASR-tiny",
    description="Realtime demo for Khmer speech recognition using TrorYongASR-tiny."
)
iface.launch(share=False)