update

Files changed (4) hide show

.gitattributes +1 -0
main_api_ax650 +3 -0
run_api_ax650.sh +22 -0
scripts/gradio_demo.py +6 -6

.gitattributes CHANGED Viewed

@@ -90,3 +90,4 @@ asset/dingding.png filter=lfs diff=lfs merge=lfs -text
 asset/output.wav filter=lfs diff=lfs merge=lfs -text
 main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
 main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text

 asset/output.wav filter=lfs diff=lfs merge=lfs -text
 main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
 main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
+main_api_ax650 filter=lfs diff=lfs merge=lfs -text

main_api_ax650 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a4c39036e17878cade43e6c97864d3e44abf671dfe1721e9ca0964d45696a00a
+size 6729848

run_api_ax650.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+LLM_DIR=CosyVoice-BlankEN-Ax650-C64-P256-CTX512/
+TOKEN2WAV_DIR=token2wav-axmodels/
+openssl req -newkey rsa:2048 -new -nodes -x509 -days 365 -keyout server.key -out server.crt -subj "/C=CN/ST=Beijing/L=Beijing/O=YourOrg/CN=localhost"
+./main_api_ax650 \
+--template_filename_axmodel "${LLM_DIR}/qwen2_p64_l%d_together.axmodel" \
+--token2wav_axmodel_dir $TOKEN2WAV_DIR \
+--n_timesteps 10 \
+--axmodel_num 24 \
+--bos 0 --eos 0 \
+--filename_tokenizer_model "http://10.122.86.184:12345" \
+--filename_post_axmodel "${LLM_DIR}/qwen2_post.axmodel" \
+--filename_decoder_axmodel "${LLM_DIR}/llm_decoder.axmodel" \
+--filename_tokens_embed "${LLM_DIR}/model.embed_tokens.weight.bfloat16.bin" \
+--filename_llm_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
+--filename_speech_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
+--continue 0 \
+--prompt_files prompt_files
+chmod 777 output*.wav

scripts/gradio_demo.py CHANGED Viewed

@@ -41,7 +41,7 @@ args = args.parse_args()
 frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
                                 args.wetext_dir,
                                 "frontend-onnx/campplus.onnx",
-                                "frontend-onnx/speech_tokenizer_v2.onnx",
                                 f"{args.model_dir}/spk2info.pt",
                                 "all")
@@ -60,7 +60,7 @@ def update_audio(audio_input_path, audio_text):
     os.makedirs(output_dir, exist_ok=True)
     zero_shot_spk_id = ""
     prompt_speech_16k = load_wav(audio_input_path, 16000)
-    prompt_text = audio_text
     print("prompt_text",prompt_text)
     model_input = frontend.process_prompt( prompt_text, prompt_speech_16k, args.sample_rate, zero_shot_spk_id)
     print("prompt speech token size:", model_input["flow_prompt_speech_token"].shape)
@@ -123,20 +123,20 @@ def run_tts(text):
 with gr.Blocks() as demo:
-    gr.Markdown("### 🎙️ AXERA CosyVoice2 Demo")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(label="输入音频", type="filepath")
         with gr.Column():
-            audio_text = gr.Textbox(label="音频文本(自己改一下或者照着念)", value="锄禾日当午，汗滴禾下土。")
             btn_update = gr.Button("更新音源")
     with gr.Row():
-        text_input = gr.Textbox(value="琦琦，麻烦你适配一下这个新的模型吧。", label="输入文本")
         with gr.Column():
-            timesteps = gr.Slider(minimum=4, maximum=30, value=7, step=1, label="Timesteps")
             run_btn = gr.Button("生成语音")
     status = gr.Label(label="状态")

 frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
                                 args.wetext_dir,
                                 "frontend-onnx/campplus.onnx",
+                                "frontend-onnx/speech_tokenizer_v3.onnx",
                                 f"{args.model_dir}/spk2info.pt",
                                 "all")
     os.makedirs(output_dir, exist_ok=True)
     zero_shot_spk_id = ""
     prompt_speech_16k = load_wav(audio_input_path, 16000)
+    prompt_text = "You are a helpful assistant.<|endofprompt|>"+audio_text
     print("prompt_text",prompt_text)
     model_input = frontend.process_prompt( prompt_text, prompt_speech_16k, args.sample_rate, zero_shot_spk_id)
     print("prompt speech token size:", model_input["flow_prompt_speech_token"].shape)
 with gr.Blocks() as demo:
+    gr.Markdown("### 🎙️ AXERA CosyVoice3 Demo")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(label="输入音频", type="filepath")
         with gr.Column():
+            audio_text = gr.Textbox(label="音频文本(自己改一下或者照着念)", value="希望你以后能够做的比我还好呦。")
             btn_update = gr.Button("更新音源")
     with gr.Row():
+        text_input = gr.Textbox(value="高管也通过电话、短信、微信等方式对报道[j][ǐ]予好评。", label="输入文本")
         with gr.Column():
+            timesteps = gr.Slider(minimum=4, maximum=30, value=10, step=1, label="Timesteps")
             run_btn = gr.Button("生成语音")
     status = gr.Label(label="状态")