import gradio as gr
import torch
from transformers import AutoProcessor
from tror_yong_asr import TrorYongASRModel, transcribe
import numpy as np
from scipy.signal import resample
import os


model_id = os.getenv("MODEL_ID")

torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
model = TrorYongASRModel.from_pretrained(model_id)
model.eval()

@torch.no_grad()
def transcribe_easier(filepath):
    output=transcribe(filepath, model, processor, max_tokens=1024)
    return output.text

iface = gr.Interface(
    fn=transcribe_easier,
    inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"),
    outputs=gr.components.Textbox(),
    title="TrorYongASR-tiny",
    description="Realtime demo for Khmer speech recognition using TrorYongASR-tiny."
)
iface.launch(share=False)