Spaces:
Running
Running
File size: 884 Bytes
bef3669 7c48bda b03f9b4 a71317f db288c0 bef3669 7c48bda db288c0 bef3669 5a3a86d bef3669 7c48bda b1ab0c4 8b2ff9f bef3669 c6bbc6b 939c017 bee2ee7 d7ffb81 c6bbc6b bef3669 939c017 c6bbc6b 939c017 7c48bda bef3669 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | import gradio as gr
import torch
from transformers import AutoProcessor
from tror_yong_asr import TrorYongASRModel, transcribe
import numpy as np
from scipy.signal import resample
import os
model_id = os.getenv("MODEL_ID")
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model = TrorYongASRModel.from_pretrained(model_id)
model.eval()
@torch.no_grad()
def transcribe_easier(filepath):
output=transcribe(filepath, model, processor, max_tokens=1024)
return output.text
iface = gr.Interface(
fn=transcribe_easier,
inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
outputs=gr.components.Textbox(),
title="TrorYongASR-tiny",
description="Realtime demo for Khmer speech recognition using TrorYongASR-tiny."
)
iface.launch(share=False) |