lihongjie commited on
Commit
c94a20b
·
1 Parent(s): 7d6c743
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. main_api_ax650 +3 -0
  3. run_api_ax650.sh +22 -0
  4. scripts/gradio_demo.py +6 -6
.gitattributes CHANGED
@@ -90,3 +90,4 @@ asset/dingding.png filter=lfs diff=lfs merge=lfs -text
90
  asset/output.wav filter=lfs diff=lfs merge=lfs -text
91
  main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
92
  main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
 
 
90
  asset/output.wav filter=lfs diff=lfs merge=lfs -text
91
  main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
92
  main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
93
+ main_api_ax650 filter=lfs diff=lfs merge=lfs -text
main_api_ax650 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c39036e17878cade43e6c97864d3e44abf671dfe1721e9ca0964d45696a00a
3
+ size 6729848
run_api_ax650.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LLM_DIR=CosyVoice-BlankEN-Ax650-C64-P256-CTX512/
2
+ TOKEN2WAV_DIR=token2wav-axmodels/
3
+
4
+ openssl req -newkey rsa:2048 -new -nodes -x509 -days 365 -keyout server.key -out server.crt -subj "/C=CN/ST=Beijing/L=Beijing/O=YourOrg/CN=localhost"
5
+
6
+ ./main_api_ax650 \
7
+ --template_filename_axmodel "${LLM_DIR}/qwen2_p64_l%d_together.axmodel" \
8
+ --token2wav_axmodel_dir $TOKEN2WAV_DIR \
9
+ --n_timesteps 10 \
10
+ --axmodel_num 24 \
11
+ --bos 0 --eos 0 \
12
+ --filename_tokenizer_model "http://10.122.86.184:12345" \
13
+ --filename_post_axmodel "${LLM_DIR}/qwen2_post.axmodel" \
14
+ --filename_decoder_axmodel "${LLM_DIR}/llm_decoder.axmodel" \
15
+ --filename_tokens_embed "${LLM_DIR}/model.embed_tokens.weight.bfloat16.bin" \
16
+ --filename_llm_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
17
+ --filename_speech_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
18
+ --continue 0 \
19
+ --prompt_files prompt_files
20
+
21
+
22
+ chmod 777 output*.wav
scripts/gradio_demo.py CHANGED
@@ -41,7 +41,7 @@ args = args.parse_args()
41
  frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
42
  args.wetext_dir,
43
  "frontend-onnx/campplus.onnx",
44
- "frontend-onnx/speech_tokenizer_v2.onnx",
45
  f"{args.model_dir}/spk2info.pt",
46
  "all")
47
 
@@ -60,7 +60,7 @@ def update_audio(audio_input_path, audio_text):
60
  os.makedirs(output_dir, exist_ok=True)
61
  zero_shot_spk_id = ""
62
  prompt_speech_16k = load_wav(audio_input_path, 16000)
63
- prompt_text = audio_text
64
  print("prompt_text",prompt_text)
65
  model_input = frontend.process_prompt( prompt_text, prompt_speech_16k, args.sample_rate, zero_shot_spk_id)
66
  print("prompt speech token size:", model_input["flow_prompt_speech_token"].shape)
@@ -123,20 +123,20 @@ def run_tts(text):
123
 
124
 
125
  with gr.Blocks() as demo:
126
- gr.Markdown("### 🎙️ AXERA CosyVoice2 Demo")
127
 
128
  with gr.Row():
129
  with gr.Column():
130
  audio_input = gr.Audio(label="输入音频", type="filepath")
131
  with gr.Column():
132
- audio_text = gr.Textbox(label="音频文本(自己改一下或者照着念)", value="锄禾日当午,汗滴禾下土。")
133
  btn_update = gr.Button("更新音源")
134
 
135
 
136
  with gr.Row():
137
- text_input = gr.Textbox(value="琦琦,麻烦你适配一下这个新的模型吧。", label="输入文本")
138
  with gr.Column():
139
- timesteps = gr.Slider(minimum=4, maximum=30, value=7, step=1, label="Timesteps")
140
  run_btn = gr.Button("生成语音")
141
 
142
  status = gr.Label(label="状态")
 
41
  frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
42
  args.wetext_dir,
43
  "frontend-onnx/campplus.onnx",
44
+ "frontend-onnx/speech_tokenizer_v3.onnx",
45
  f"{args.model_dir}/spk2info.pt",
46
  "all")
47
 
 
60
  os.makedirs(output_dir, exist_ok=True)
61
  zero_shot_spk_id = ""
62
  prompt_speech_16k = load_wav(audio_input_path, 16000)
63
+ prompt_text = "You are a helpful assistant.<|endofprompt|>"+audio_text
64
  print("prompt_text",prompt_text)
65
  model_input = frontend.process_prompt( prompt_text, prompt_speech_16k, args.sample_rate, zero_shot_spk_id)
66
  print("prompt speech token size:", model_input["flow_prompt_speech_token"].shape)
 
123
 
124
 
125
  with gr.Blocks() as demo:
126
+ gr.Markdown("### 🎙️ AXERA CosyVoice3 Demo")
127
 
128
  with gr.Row():
129
  with gr.Column():
130
  audio_input = gr.Audio(label="输入音频", type="filepath")
131
  with gr.Column():
132
+ audio_text = gr.Textbox(label="音频文本(自己改一下或者照着念)", value="希望你以后能够做的比我还好呦。")
133
  btn_update = gr.Button("更新音源")
134
 
135
 
136
  with gr.Row():
137
+ text_input = gr.Textbox(value="高管也通过电话、短信、微信等方式对报道[j][ǐ]予好评。", label="输入文本")
138
  with gr.Column():
139
+ timesteps = gr.Slider(minimum=4, maximum=30, value=10, step=1, label="Timesteps")
140
  run_btn = gr.Button("生成语音")
141
 
142
  status = gr.Label(label="状态")