import gradio as gr import os import re import subprocess from SenseVoiceAx import SenseVoiceAx import numpy as np model_root = "../sensevoice_ax650" max_seq_len = 256 model_path = os.path.join(model_root, "sensevoice.axmodel") assert os.path.exists(model_path), f"model {model_path} not exist" cmvn_file = os.path.join(model_root, "am.mvn") bpe_model = os.path.join(model_root, "chn_jpn_yue_eng_ko_spectok.bpe.model") token_file = os.path.join(model_root, "tokens.txt") model = SenseVoiceAx( model_path, cmvn_file, token_file, bpe_model, max_seq_len=max_seq_len, beam_size=3, hot_words=None, streaming=False, ) # 你实现的语言转文本函数 def speech_to_text(audio_input, lang): """ audio_path: A tuple of (sample rate in Hz, audio data as numpy array). lang: 语言类型 "auto", "zh", "en", "yue", "ja", "ko" """ if not audio_input: return "无音频" sr, audio_data = audio_input if audio_data.dtype != np.float32: audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max asr_res = model.infer((audio_data, sr), lang, print_rtf=False) return asr_res def get_all_local_ips(): result = subprocess.run(['ip', 'a'], capture_output=True, text=True) output = result.stdout # 匹配所有IPv4 ips = re.findall(r'inet (\d+\.\d+\.\d+\.\d+)', output) # 过滤掉回环地址 real_ips = [ip for ip in ips if not ip.startswith('127.')] return real_ips def main(): with gr.Blocks() as demo: with gr.Row(): output_text = gr.Textbox(label="识别结果", lines=5) with gr.Row(): audio_input = gr.Audio( sources=["microphone", "upload"], type="numpy", label="录制或上传音频", format="wav" ) lang_dropdown = gr.Dropdown( choices=["auto", "zh", "en", "yue", "ja", "ko"], value="auto", label="选择音频语言", ) audio_input.change( fn=speech_to_text, inputs=[audio_input, lang_dropdown], outputs=output_text ) # 启动 ips = get_all_local_ips() port = 7861 for ip in ips: print(f"* Running on local URL: https://{ip}:{port}") ip = "0.0.0.0" demo.launch( server_name=ip, server_port=port, ssl_certfile="./cert.pem", ssl_keyfile="./key.pem", ssl_verify=False, ) if __name__ == "__main__": main()