File size: 884 Bytes
bef3669
 
7c48bda
 
b03f9b4
a71317f
db288c0
bef3669
7c48bda
db288c0
bef3669
5a3a86d
bef3669
7c48bda
b1ab0c4
8b2ff9f
bef3669
c6bbc6b
939c017
bee2ee7
d7ffb81
c6bbc6b
bef3669
939c017
c6bbc6b
939c017
7c48bda
 
bef3669
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import gradio as gr
import torch
from transformers import AutoProcessor
from tror_yong_asr import TrorYongASRModel, transcribe
import numpy as np
from scipy.signal import resample
import os


model_id = os.getenv("MODEL_ID")

torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model = TrorYongASRModel.from_pretrained(model_id)
model.eval()

@torch.no_grad()
def transcribe_easier(filepath):
    output=transcribe(filepath, model, processor, max_tokens=1024)
    return output.text

iface = gr.Interface(
    fn=transcribe_easier,
    inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
    outputs=gr.components.Textbox(),
    title="TrorYongASR-tiny",
    description="Realtime demo for Khmer speech recognition using TrorYongASR-tiny."
)
iface.launch(share=False)