lihongjie
commited on
Commit
·
c94a20b
1
Parent(s):
7d6c743
update
Browse files- .gitattributes +1 -0
- main_api_ax650 +3 -0
- run_api_ax650.sh +22 -0
- scripts/gradio_demo.py +6 -6
.gitattributes
CHANGED
|
@@ -90,3 +90,4 @@ asset/dingding.png filter=lfs diff=lfs merge=lfs -text
|
|
| 90 |
asset/output.wav filter=lfs diff=lfs merge=lfs -text
|
| 91 |
main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
|
| 92 |
main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 90 |
asset/output.wav filter=lfs diff=lfs merge=lfs -text
|
| 91 |
main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
|
| 92 |
main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
main_api_ax650 filter=lfs diff=lfs merge=lfs -text
|
main_api_ax650
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4c39036e17878cade43e6c97864d3e44abf671dfe1721e9ca0964d45696a00a
|
| 3 |
+
size 6729848
|
run_api_ax650.sh
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LLM_DIR=CosyVoice-BlankEN-Ax650-C64-P256-CTX512/
|
| 2 |
+
TOKEN2WAV_DIR=token2wav-axmodels/
|
| 3 |
+
|
| 4 |
+
openssl req -newkey rsa:2048 -new -nodes -x509 -days 365 -keyout server.key -out server.crt -subj "/C=CN/ST=Beijing/L=Beijing/O=YourOrg/CN=localhost"
|
| 5 |
+
|
| 6 |
+
./main_api_ax650 \
|
| 7 |
+
--template_filename_axmodel "${LLM_DIR}/qwen2_p64_l%d_together.axmodel" \
|
| 8 |
+
--token2wav_axmodel_dir $TOKEN2WAV_DIR \
|
| 9 |
+
--n_timesteps 10 \
|
| 10 |
+
--axmodel_num 24 \
|
| 11 |
+
--bos 0 --eos 0 \
|
| 12 |
+
--filename_tokenizer_model "http://10.122.86.184:12345" \
|
| 13 |
+
--filename_post_axmodel "${LLM_DIR}/qwen2_post.axmodel" \
|
| 14 |
+
--filename_decoder_axmodel "${LLM_DIR}/llm_decoder.axmodel" \
|
| 15 |
+
--filename_tokens_embed "${LLM_DIR}/model.embed_tokens.weight.bfloat16.bin" \
|
| 16 |
+
--filename_llm_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
|
| 17 |
+
--filename_speech_embed "${LLM_DIR}/llm.speech_embedding.float16.bin" \
|
| 18 |
+
--continue 0 \
|
| 19 |
+
--prompt_files prompt_files
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
chmod 777 output*.wav
|
scripts/gradio_demo.py
CHANGED
|
@@ -41,7 +41,7 @@ args = args.parse_args()
|
|
| 41 |
frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
|
| 42 |
args.wetext_dir,
|
| 43 |
"frontend-onnx/campplus.onnx",
|
| 44 |
-
"frontend-onnx/
|
| 45 |
f"{args.model_dir}/spk2info.pt",
|
| 46 |
"all")
|
| 47 |
|
|
@@ -60,7 +60,7 @@ def update_audio(audio_input_path, audio_text):
|
|
| 60 |
os.makedirs(output_dir, exist_ok=True)
|
| 61 |
zero_shot_spk_id = ""
|
| 62 |
prompt_speech_16k = load_wav(audio_input_path, 16000)
|
| 63 |
-
prompt_text = audio_text
|
| 64 |
print("prompt_text",prompt_text)
|
| 65 |
model_input = frontend.process_prompt( prompt_text, prompt_speech_16k, args.sample_rate, zero_shot_spk_id)
|
| 66 |
print("prompt speech token size:", model_input["flow_prompt_speech_token"].shape)
|
|
@@ -123,20 +123,20 @@ def run_tts(text):
|
|
| 123 |
|
| 124 |
|
| 125 |
with gr.Blocks() as demo:
|
| 126 |
-
gr.Markdown("### 🎙️ AXERA
|
| 127 |
|
| 128 |
with gr.Row():
|
| 129 |
with gr.Column():
|
| 130 |
audio_input = gr.Audio(label="输入音频", type="filepath")
|
| 131 |
with gr.Column():
|
| 132 |
-
audio_text = gr.Textbox(label="音频文本(自己改一下或者照着念)", value="
|
| 133 |
btn_update = gr.Button("更新音源")
|
| 134 |
|
| 135 |
|
| 136 |
with gr.Row():
|
| 137 |
-
text_input = gr.Textbox(value="
|
| 138 |
with gr.Column():
|
| 139 |
-
timesteps = gr.Slider(minimum=4, maximum=30, value=
|
| 140 |
run_btn = gr.Button("生成语音")
|
| 141 |
|
| 142 |
status = gr.Label(label="状态")
|
|
|
|
| 41 |
frontend = CosyVoiceFrontEnd(f"{args.model_dir}",
|
| 42 |
args.wetext_dir,
|
| 43 |
"frontend-onnx/campplus.onnx",
|
| 44 |
+
"frontend-onnx/speech_tokenizer_v3.onnx",
|
| 45 |
f"{args.model_dir}/spk2info.pt",
|
| 46 |
"all")
|
| 47 |
|
|
|
|
| 60 |
os.makedirs(output_dir, exist_ok=True)
|
| 61 |
zero_shot_spk_id = ""
|
| 62 |
prompt_speech_16k = load_wav(audio_input_path, 16000)
|
| 63 |
+
prompt_text = "You are a helpful assistant.<|endofprompt|>"+audio_text
|
| 64 |
print("prompt_text",prompt_text)
|
| 65 |
model_input = frontend.process_prompt( prompt_text, prompt_speech_16k, args.sample_rate, zero_shot_spk_id)
|
| 66 |
print("prompt speech token size:", model_input["flow_prompt_speech_token"].shape)
|
|
|
|
| 123 |
|
| 124 |
|
| 125 |
with gr.Blocks() as demo:
|
| 126 |
+
gr.Markdown("### 🎙️ AXERA CosyVoice3 Demo")
|
| 127 |
|
| 128 |
with gr.Row():
|
| 129 |
with gr.Column():
|
| 130 |
audio_input = gr.Audio(label="输入音频", type="filepath")
|
| 131 |
with gr.Column():
|
| 132 |
+
audio_text = gr.Textbox(label="音频文本(自己改一下或者照着念)", value="希望你以后能够做的比我还好呦。")
|
| 133 |
btn_update = gr.Button("更新音源")
|
| 134 |
|
| 135 |
|
| 136 |
with gr.Row():
|
| 137 |
+
text_input = gr.Textbox(value="高管也通过电话、短信、微信等方式对报道[j][ǐ]予好评。", label="输入文本")
|
| 138 |
with gr.Column():
|
| 139 |
+
timesteps = gr.Slider(minimum=4, maximum=30, value=10, step=1, label="Timesteps")
|
| 140 |
run_btn = gr.Button("生成语音")
|
| 141 |
|
| 142 |
status = gr.Label(label="状态")
|