import sys import gradio as gr import spaces from huggingface_hub import snapshot_download sys.path.append("./fireredasr") from fireredasr.models.fireredasr import FireRedAsr asr_model_aed = None asr_model_llm = None def init_model(model_dir_aed, model_dir_llm): global asr_model_aed global asr_model_llm if asr_model_aed is None: asr_model_aed = FireRedAsr.from_pretrained("aed", model_dir_aed) if asr_model_llm is None: asr_model_llm = FireRedAsr.from_pretrained("llm", model_dir_llm) @spaces.GPU(duration=20) def asr_inference(audio_file): if not audio_file: return "Please upload a wav file" batch_uttid = ["demo"] batch_wav_path = [audio_file] results = asr_model_aed.transcribe( batch_uttid, batch_wav_path, { "use_gpu": True, "beam_size": 3, "nbest": 1, "decode_max_len": 0, "softmax_smoothing": 1.25, "aed_length_penalty": 0.6, "eos_penalty": 1.0, #"decode_min_len": args.decode_min_len, #"repetition_penalty": args.repetition_penalty, #"llm_length_penalty": args.llm_length_penalty, #"temperature": args.temperature } ) text_output = results[0]["text"] return text_output @spaces.GPU(duration=30) def asr_inference_llm(audio_file): if not audio_file: return "Please upload a wav file" batch_uttid = ["demo"] batch_wav_path = [audio_file] results = asr_model_llm.transcribe( batch_uttid, batch_wav_path, { "use_gpu": True, "beam_size": 3, "nbest": 1, "decode_max_len": 0, "decode_min_len": 0, "repetition_penalty": 3.0, "llm_length_penalty": 1.0, "temperature": 1.0 } ) text_output = results[0]["text"] return text_output with gr.Blocks(title="FireRedASR") as demo: gr.HTML( "