First commit
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +68 -0
- README.md +218 -3
- ax_model/.gitattributes +2 -0
- ax_model/auto.npy +3 -0
- ax_model/chn_jpn_yue_eng_ko_spectok.bpe.model +3 -0
- ax_model/event_emo.npy +3 -0
- ax_model/sensevoice.axmodel +3 -0
- ax_model/sensevoice/am.mvn +8 -0
- ax_model/sensevoice/config.yaml +97 -0
- ax_model/vad/am.mvn +8 -0
- ax_model/vad/config.yaml +56 -0
- ax_model/withitn.npy +3 -0
- ax_spoken_communication_demo.py +719 -0
- config.json +0 -0
- input_question/Q1.wav +3 -0
- input_question/Q2.wav +3 -0
- input_question/Q3.wav +3 -0
- libaxllm/main_api_ax650 +3 -0
- libaxllm/main_api_axcl_aarch64 +3 -0
- libaxllm/main_api_axcl_x86 +3 -0
- libaxllm/post_config.json +14 -0
- libaxllm/qwen2.5_tokenizer/merges.txt +0 -0
- libaxllm/qwen2.5_tokenizer/tokenizer.json +0 -0
- libaxllm/qwen2.5_tokenizer/tokenizer_config.json +207 -0
- libaxllm/qwen2.5_tokenizer/vocab.json +0 -0
- libaxllm/qwen2.5_tokenizer_uid.py +189 -0
- libaxllm/run_qwen2.5_1.5b_ctx_ax650_api.sh +15 -0
- libaxllm/run_qwen2.5_1.5b_ctx_axcl_aarch64_api.sh +13 -0
- libaxllm/run_qwen2.5_1.5b_ctx_axcl_x86_api.sh +13 -0
- libmelotts/models/decoder-en.axmodel +3 -0
- libmelotts/models/decoder-zh.axmodel +3 -0
- libmelotts/models/encoder-en.onnx +3 -0
- libmelotts/models/encoder-zh.onnx +3 -0
- libmelotts/models/g-en.bin +3 -0
- libmelotts/models/g-jp.bin +3 -0
- libmelotts/models/g-zh_mix_en.bin +3 -0
- libmelotts/models/lexicon.txt +0 -0
- libmelotts/models/tokens.txt +112 -0
- libmelotts/python/split_utils.py +173 -0
- libmelotts/python/symbols.py +1237 -0
- libmelotts/python/text/__init__.py +35 -0
- libmelotts/python/text/bert-base-multilingual-uncased/special_tokens_map.json +7 -0
- libmelotts/python/text/bert-base-multilingual-uncased/tokenizer.json +0 -0
- libmelotts/python/text/bert-base-multilingual-uncased/tokenizer_config.json +13 -0
- libmelotts/python/text/bert-base-multilingual-uncased/vocab.txt +0 -0
- libmelotts/python/text/bert-base-uncased/special_tokens_map.json +7 -0
- libmelotts/python/text/bert-base-uncased/tokenizer.json +0 -0
- libmelotts/python/text/bert-base-uncased/tokenizer_config.json +13 -0
- libmelotts/python/text/bert-base-uncased/vocab.txt +0 -0
- libmelotts/python/text/chinese.py +198 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,71 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
ax_model/sensevoice.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
libmelotts/install/libonnxruntime.so filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
libmelotts/install/libonnxruntime.so.1.14.0 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
libmelotts/install/melotts filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
libmelotts/models/decoder-en.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
libmelotts/models/decoder-zh.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
libtranslate/libax_translate.so filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
libtranslate/libsentencepiece.so.0 filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
libtranslate/opus-mt-en-zh/source.spm filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
libtranslate/opus-mt-en-zh/target.spm filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
libtranslate/opus-mt-en-zh.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
vad.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
ax_model/vad.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
main_api_ax650 filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
libaxllm/main_api_ax650 filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
wav/zh.wav filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_comm.so filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_host_proto.a filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_ive.so filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_ivps.so filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_lite.so filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_npu.so filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_pkg.so filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_rt.so filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_skel.so filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_sys.so filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_vdec.so filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libaxcl_venc.so filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
libmelotts/tts_x86/3rdparty/libaxcl/lib/libspdlog.so filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_aarch64/lib/libonnxruntime.so filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_aarch64/lib/libonnxruntime.so.1.14.0 filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_x86/lib/libonnxruntime.so filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_x86/lib/libonnxruntime.so.1.14.1 filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_x86_ori/lib/libonnxruntime.so filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_x86_ori/lib/libonnxruntime.so-- filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_x86_ori/lib/libonnxruntime.so.1 filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_x86_ori/lib/libonnxruntime.so.1.14.1 filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
libmelotts/tts_x86/3rdparty/onnxruntime_x86_ori/lib/libonnxruntime.so.1.21.0-- filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
libmelotts/tts_x86/install/melotts filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
libmelotts/tts_x86/models/decoder-zh.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_comm.so filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_host_proto.a filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_ive.so filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_ivps.so filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_lite.so filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_npu.so filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_pkg.so filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_rt.so filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_skel.so filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_sys.so filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_vdec.so filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libaxcl_venc.so filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
libmelotts/tts_aarch64/3rdparty/libaxcl/lib/libspdlog.so filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
libmelotts/tts_aarch64/3rdparty/onnxruntime_aarch64/lib/libonnxruntime.so filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
libmelotts/tts_aarch64/3rdparty/onnxruntime_aarch64/lib/libonnxruntime.so.1.14.0 filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
libmelotts/tts_aarch64/3rdparty/onnxruntime_x86/lib/libonnxruntime.so filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
libmelotts/tts_aarch64/3rdparty/onnxruntime_x86/lib/libonnxruntime.so.1 filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
libmelotts/tts_aarch64/3rdparty/onnxruntime_x86/lib/libonnxruntime.so.1.21.0 filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
libmelotts/tts_aarch64/install/melotts filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
libmelotts/tts_aarch64/models/decoder-zh.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
libaxllm/main_api_axcl_x86 filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
libaxllm/main_api_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
libmelotts/text/fr_phonemizer/example_ipa.txt filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
libmelotts/python/text/fr_phonemizer/example_ipa.txt filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
wav/en_6mins.wav filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
input_question/Q1.wav filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
input_question/Q2.wav filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
input_question/Q3.wav filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,3 +1,218 @@
|
|
| 1 |
-
---
|
| 2 |
-
license: mit
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
- zh
|
| 6 |
+
pipeline_tag: audio-to-audio
|
| 7 |
+
base_model:
|
| 8 |
+
- FunAudioLLM/SenseVoiceSmall
|
| 9 |
+
- qwen2.5
|
| 10 |
+
- MeloTTS
|
| 11 |
+
tags:
|
| 12 |
+
- VAD
|
| 13 |
+
- ASR
|
| 14 |
+
- LLM
|
| 15 |
+
- TTS
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# Spoken-Communication.axera
|
| 20 |
+
|
| 21 |
+
spoken communication demo on Axera
|
| 22 |
+
|
| 23 |
+
- [x] Python 示例
|
| 24 |
+
- [ ] C++ 示例
|
| 25 |
+
|
| 26 |
+
## Convert tools links:
|
| 27 |
+
|
| 28 |
+
For those who are interested in model conversion, you can try to export axmodel through the original repo :
|
| 29 |
+
How to Convert from ONNX to axmodel
|
| 30 |
+
- [ASR](https://github.com/AXERA-TECH/3D-Speaker-MT.axera/tree/main/model_convert)
|
| 31 |
+
- [MeloTTS](https://github.com/ml-inory/melotts.axera/tree/main/model_convert)
|
| 32 |
+
|
| 33 |
+
## 支持平台
|
| 34 |
+
|
| 35 |
+
- AX650N
|
| 36 |
+
|
| 37 |
+
## 功能
|
| 38 |
+
|
| 39 |
+
语音交流
|
| 40 |
+
|
| 41 |
+
## Pipeline组件
|
| 42 |
+
|
| 43 |
+
- [ASR](https://github.com/AXERA-TECH/3D-Speaker-MT.axera/tree/main)
|
| 44 |
+
- [LLM](https://github.com/AXERA-TECH/ax-llm/tree/ax-context),参考生成库文件,保存到libaxllm
|
| 45 |
+
- [MeloTTS](https://github.com/ml-inory/melotts.axera/tree/main/python)
|
| 46 |
+
|
| 47 |
+
## 上板部署
|
| 48 |
+
|
| 49 |
+
- AX650N 的设备已预装 Ubuntu22.04
|
| 50 |
+
- 以 root 权限登陆 AX650N 的板卡设备
|
| 51 |
+
- 链接互联网,确保 AX650N 的设备能正常执行 apt install, pip install 等指令
|
| 52 |
+
- 已验证设备:AX650N DEMO Board
|
| 53 |
+
|
| 54 |
+
## Python API 运行
|
| 55 |
+
|
| 56 |
+
在python3.10(验证)
|
| 57 |
+
|
| 58 |
+
### pipeline方案:ASR + LLM(Qwen) + MeloTTS
|
| 59 |
+
|
| 60 |
+
```
|
| 61 |
+
支持板端运行及算力卡模式运行
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### 工程下载
|
| 65 |
+
```
|
| 66 |
+
git clone https://huggingface.co/AXERA-TECH/Spoken-Communication.axera 或者
|
| 67 |
+
hf download AXERA-TECH/Spoken-Communication.axera --local-dir Spoken-Communication.axera
|
| 68 |
+
|
| 69 |
+
cd Spoken-Communication.axera
|
| 70 |
+
|
| 71 |
+
工程目录文件结构如下:
|
| 72 |
+
.
|
| 73 |
+
|-- README.md
|
| 74 |
+
|-- ax_model
|
| 75 |
+
|-- ax_spoken_communication_demo.py
|
| 76 |
+
|-- config.json
|
| 77 |
+
|-- libaxllm
|
| 78 |
+
|-- libmelotts
|
| 79 |
+
|-- model.py
|
| 80 |
+
|-- requirements.txt
|
| 81 |
+
|-- utils
|
| 82 |
+
`-- input_question
|
| 83 |
+
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
### 具体流程
|
| 87 |
+
|
| 88 |
+
**板端 demo**
|
| 89 |
+
|
| 90 |
+
1、安装依赖库
|
| 91 |
+
|
| 92 |
+
```
|
| 93 |
+
1):
|
| 94 |
+
如果环境中没有axengine,下载安装,位置任意
|
| 95 |
+
hf download AXERA-TECH/PyAXEngine --local-dir PyAXEngine
|
| 96 |
+
cd PyAXEngine
|
| 97 |
+
pip3 install axengine-0.1.3-py3-none-any.whl
|
| 98 |
+
|
| 99 |
+
2):
|
| 100 |
+
cd Spoken-Communication.axera
|
| 101 |
+
pip3 install -r requirements.txt
|
| 102 |
+
|
| 103 |
+
3):
|
| 104 |
+
apt install espeak 或者
|
| 105 |
+
sudo apt install espeak
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
2、模型下载
|
| 109 |
+
|
| 110 |
+
以Qwen2.5-1.5B为例,[下载地址](https://huggingface.co/AXERA-TECH/Qwen2.5-1.5B-Instruct/tree/main/qwen2.5-1.5b-ctx-ax650)
|
| 111 |
+
```
|
| 112 |
+
执行命令:
|
| 113 |
+
hf download AXERA-TECH/Qwen2.5-1.5B-Instruct --local-dir libaxllm --include qwen2.5-1.5b-ctx-ax650/*
|
| 114 |
+
|
| 115 |
+
模型下载至libaxllm文件夹
|
| 116 |
+
```
|
| 117 |
+
|
| 118 |
+
3、在开发板运行以下命令
|
| 119 |
+
|
| 120 |
+
```
|
| 121 |
+
1)、运行qwen api
|
| 122 |
+
cd libaxllm
|
| 123 |
+
|
| 124 |
+
启动支持上下文的 tokenizer 服务器
|
| 125 |
+
python3 qwen2.5_tokenizer_uid.py
|
| 126 |
+
|
| 127 |
+
运行
|
| 128 |
+
sh run_qwen2.5_1.5b_ctx_ax650_api.sh
|
| 129 |
+
|
| 130 |
+
2)、运行pipeline板端demo
|
| 131 |
+
cd ..
|
| 132 |
+
|
| 133 |
+
python3 ax_spoken_communication_demo.py --audio_dir input_question --output_dir output_answer --api_url http://10.126.29.158:8000
|
| 134 |
+
|
| 135 |
+
运行参数说明:
|
| 136 |
+
|
| 137 |
+
| 参数名称 | 说明|
|
| 138 |
+
|-------|------|
|
| 139 |
+
| `--audio_dir` | 音频路径 |
|
| 140 |
+
| `--api_url` | qwen API服务地址,对应其运行服务器 |
|
| 141 |
+
| `--output_dir` | 结果保存路径 |
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
输出:
|
| 145 |
+
1、与输入音频相对应的wav文件,
|
| 146 |
+
2、识别信息保存成txt文件 -> "output_answer/processing_summary.txt",如下:
|
| 147 |
+
```
|
| 148 |
+
批量处理结果汇总
|
| 149 |
+
==================================================
|
| 150 |
+
|
| 151 |
+
文件 1: Q1.wav
|
| 152 |
+
原始文本: 人工智能和人类智能最本质的区别是什么?。
|
| 153 |
+
回答结果: 人工智能和人类智能最本质的区别在于,人工智能是基于算法和数据进行学习和决策的机器智能,而人类智能是基于经验和直觉进行思考和决策的生物智能。
|
| 154 |
+
合成音频: Q1_answer.wav
|
| 155 |
+
处理时间: 8.22 秒
|
| 156 |
+
音频时长: 15.19 秒
|
| 157 |
+
RTF: 0.54
|
| 158 |
+
--------------------------------------------------
|
| 159 |
+
文件 2: Q2.wav
|
| 160 |
+
原始文本: 人工智能没有思想,为什么他能创作出震撼人心的艺术?。
|
| 161 |
+
回答结果: 人工智能创作艺术是因为它可以通过算法和数据进行学习和分析,理解艺术作品的风格、情感和意义,然后通过生成模型进行创作。这与人类艺术家创作艺术的灵感、经验和直觉不同,但人工智能在某些领域已经表现出超越人类的能力。
|
| 162 |
+
合成音频: Q2_answer.wav
|
| 163 |
+
处理时间: 9.43 秒
|
| 164 |
+
音频时长: 23.68 秒
|
| 165 |
+
RTF: 0.40
|
| 166 |
+
--------------------------------------------------
|
| 167 |
+
文件 3: Q3.wav
|
| 168 |
+
原始文本: 人工智能最终会统治人类吗?。
|
| 169 |
+
回答结果: 人工智能的发展可能会对人类社会产生重大影响,但目前来看,人工智能尚未达到能够统治人类的程度。人工智能主要是在特定任务上表现出色,如数据分析、图像识别等,但在决策、伦理和情感理解等方面仍存在局限。
|
| 170 |
+
合成音频: Q3_answer.wav
|
| 171 |
+
处理时间: 8.86 秒
|
| 172 |
+
音频时长: 22.62 秒
|
| 173 |
+
RTF: 0.39
|
| 174 |
+
--------------------------------------------------
|
| 175 |
+
|
| 176 |
+
总计: 3 个文件
|
| 177 |
+
总处理时间: 26.53 秒
|
| 178 |
+
```
|
| 179 |
+
|
| 180 |
+
4、Latency
|
| 181 |
+
|
| 182 |
+
AX650N
|
| 183 |
+
|
| 184 |
+
RTF: 约为0.4,如上例。
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
**算力卡demo**
|
| 188 |
+
|
| 189 |
+
运行步骤与板端demo大致相同,以aarch64环境为例:
|
| 190 |
+
```
|
| 191 |
+
1、运行qwen api
|
| 192 |
+
cd libaxllm
|
| 193 |
+
|
| 194 |
+
启动支持上下文的 tokenizer 服务器
|
| 195 |
+
python3 qwen2.5_tokenizer_uid.py
|
| 196 |
+
|
| 197 |
+
运行对应环境的api
|
| 198 |
+
sh run_qwen2.5_1.5b_ctx_axcl_aarch64_api.sh
|
| 199 |
+
|
| 200 |
+
2、运行pipeline算力卡demo
|
| 201 |
+
cd ..
|
| 202 |
+
python3 ax_spoken_communication_demo.py --audio_dir input_question --api_url http://10.126.33.13:8000 --output_dir output
|
| 203 |
+
```
|
| 204 |
+
x86环境运行步骤同上
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
## 参考
|
| 209 |
+
- [sensevoice.axera](https://github.com/ml-inory/sensevoice.axera/tree/main)
|
| 210 |
+
- [3D-Speaker.axera](https://github.com/AXERA-TECH/3D-Speaker.axera/tree/master)
|
| 211 |
+
- [melotts.axera](https://github.com/ml-inory/melotts.axera/tree/main)
|
| 212 |
+
- [ax-llm](https://github.com/AXERA-TECH/ax-llm/tree/ax-context)
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
## 技术讨论
|
| 216 |
+
|
| 217 |
+
- Github issues
|
| 218 |
+
- QQ 群: 139953715
|
ax_model/.gitattributes
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
ax_model/auto.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d0997706b30274f7ff3b157ca90df50b7ed8ced35091a0231700355d5ee1374
|
| 3 |
+
size 2368
|
ax_model/chn_jpn_yue_eng_ko_spectok.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa87f86064c3730d799ddf7af3c04659151102cba548bce325cf06ba4da4e6a8
|
| 3 |
+
size 377341
|
ax_model/event_emo.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d22e3df5d192fdc3e73e368a2cb576975a5a43a114a8432a91c036adf8e2263
|
| 3 |
+
size 4608
|
ax_model/sensevoice.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b64a36fa15e75ab5e3b75f18ae87a058970cff76219407e503b54fb53dd8e38
|
| 3 |
+
size 262170623
|
ax_model/sensevoice/am.mvn
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<Nnet>
|
| 2 |
+
<Splice> 560 560
|
| 3 |
+
[ 0 ]
|
| 4 |
+
<AddShift> 560 560
|
| 5 |
+
<LearnRateCoef> 0 [ -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 ]
|
| 6 |
+
<Rescale> 560 560
|
| 7 |
+
<LearnRateCoef> 0 [ 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 ]
|
| 8 |
+
</Nnet>
|
ax_model/sensevoice/config.yaml
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
encoder: SenseVoiceEncoderSmall
|
| 2 |
+
encoder_conf:
|
| 3 |
+
output_size: 512
|
| 4 |
+
attention_heads: 4
|
| 5 |
+
linear_units: 2048
|
| 6 |
+
num_blocks: 50
|
| 7 |
+
tp_blocks: 20
|
| 8 |
+
dropout_rate: 0.1
|
| 9 |
+
positional_dropout_rate: 0.1
|
| 10 |
+
attention_dropout_rate: 0.1
|
| 11 |
+
input_layer: pe
|
| 12 |
+
pos_enc_class: SinusoidalPositionEncoder
|
| 13 |
+
normalize_before: true
|
| 14 |
+
kernel_size: 11
|
| 15 |
+
sanm_shfit: 0
|
| 16 |
+
selfattention_layer_type: sanm
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
model: SenseVoiceSmall
|
| 20 |
+
model_conf:
|
| 21 |
+
length_normalized_loss: true
|
| 22 |
+
sos: 1
|
| 23 |
+
eos: 2
|
| 24 |
+
ignore_id: -1
|
| 25 |
+
|
| 26 |
+
tokenizer: SentencepiecesTokenizer
|
| 27 |
+
tokenizer_conf:
|
| 28 |
+
bpemodel: null
|
| 29 |
+
unk_symbol: <unk>
|
| 30 |
+
split_with_space: true
|
| 31 |
+
|
| 32 |
+
frontend: WavFrontend
|
| 33 |
+
frontend_conf:
|
| 34 |
+
fs: 16000
|
| 35 |
+
window: hamming
|
| 36 |
+
n_mels: 80
|
| 37 |
+
frame_length: 25
|
| 38 |
+
frame_shift: 10
|
| 39 |
+
lfr_m: 7
|
| 40 |
+
lfr_n: 6
|
| 41 |
+
cmvn_file: null
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
dataset: SenseVoiceCTCDataset
|
| 45 |
+
dataset_conf:
|
| 46 |
+
index_ds: IndexDSJsonl
|
| 47 |
+
batch_sampler: EspnetStyleBatchSampler
|
| 48 |
+
data_split_num: 32
|
| 49 |
+
batch_type: token
|
| 50 |
+
batch_size: 14000
|
| 51 |
+
max_token_length: 2000
|
| 52 |
+
min_token_length: 60
|
| 53 |
+
max_source_length: 2000
|
| 54 |
+
min_source_length: 60
|
| 55 |
+
max_target_length: 200
|
| 56 |
+
min_target_length: 0
|
| 57 |
+
shuffle: true
|
| 58 |
+
num_workers: 4
|
| 59 |
+
sos: ${model_conf.sos}
|
| 60 |
+
eos: ${model_conf.eos}
|
| 61 |
+
IndexDSJsonl: IndexDSJsonl
|
| 62 |
+
retry: 20
|
| 63 |
+
|
| 64 |
+
train_conf:
|
| 65 |
+
accum_grad: 1
|
| 66 |
+
grad_clip: 5
|
| 67 |
+
max_epoch: 20
|
| 68 |
+
keep_nbest_models: 10
|
| 69 |
+
avg_nbest_model: 10
|
| 70 |
+
log_interval: 100
|
| 71 |
+
resume: true
|
| 72 |
+
validate_interval: 10000
|
| 73 |
+
save_checkpoint_interval: 10000
|
| 74 |
+
|
| 75 |
+
optim: adamw
|
| 76 |
+
optim_conf:
|
| 77 |
+
lr: 0.00002
|
| 78 |
+
scheduler: warmuplr
|
| 79 |
+
scheduler_conf:
|
| 80 |
+
warmup_steps: 25000
|
| 81 |
+
|
| 82 |
+
specaug: SpecAugLFR
|
| 83 |
+
specaug_conf:
|
| 84 |
+
apply_time_warp: false
|
| 85 |
+
time_warp_window: 5
|
| 86 |
+
time_warp_mode: bicubic
|
| 87 |
+
apply_freq_mask: true
|
| 88 |
+
freq_mask_width_range:
|
| 89 |
+
- 0
|
| 90 |
+
- 30
|
| 91 |
+
lfr_rate: 6
|
| 92 |
+
num_freq_mask: 1
|
| 93 |
+
apply_time_mask: true
|
| 94 |
+
time_mask_width_range:
|
| 95 |
+
- 0
|
| 96 |
+
- 12
|
| 97 |
+
num_time_mask: 1
|
ax_model/vad/am.mvn
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<Nnet>
|
| 2 |
+
<Splice> 400 400
|
| 3 |
+
[ 0 ]
|
| 4 |
+
<AddShift> 400 400
|
| 5 |
+
<LearnRateCoef> 0 [ -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 -8.311879 -8.600912 -9.615928 -10.43595 -11.21292 -11.88333 -12.36243 -12.63706 -12.8818 -12.83066 -12.89103 -12.95666 -13.19763 -13.40598 -13.49113 -13.5546 -13.55639 -13.51915 -13.68284 -13.53289 -13.42107 -13.65519 -13.50713 -13.75251 -13.76715 -13.87408 -13.73109 -13.70412 -13.56073 -13.53488 -13.54895 -13.56228 -13.59408 -13.62047 -13.64198 -13.66109 -13.62669 -13.58297 -13.57387 -13.4739 -13.53063 -13.48348 -13.61047 -13.64716 -13.71546 -13.79184 -13.90614 -14.03098 -14.18205 -14.35881 -14.48419 -14.60172 -14.70591 -14.83362 -14.92122 -15.00622 -15.05122 -15.03119 -14.99028 -14.92302 -14.86927 -14.82691 -14.7972 -14.76909 -14.71356 -14.61277 -14.51696 -14.42252 -14.36405 -14.30451 -14.23161 -14.19851 -14.16633 -14.15649 -14.10504 -13.99518 -13.79562 -13.3996 -12.7767 -11.71208 ]
|
| 6 |
+
<Rescale> 400 400
|
| 7 |
+
<LearnRateCoef> 0 [ 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 0.155775 0.154484 0.1527379 0.1518718 0.1506028 0.1489256 0.147067 0.1447061 0.1436307 0.1443568 0.1451849 0.1455157 0.1452821 0.1445717 0.1439195 0.1435867 0.1436018 0.1438781 0.1442086 0.1448844 0.1454756 0.145663 0.146268 0.1467386 0.1472724 0.147664 0.1480913 0.1483739 0.1488841 0.1493636 0.1497088 0.1500379 0.1502916 0.1505389 0.1506787 0.1507102 0.1505992 0.1505445 0.1505938 0.1508133 0.1509569 0.1512396 0.1514625 0.1516195 0.1516156 0.1515561 0.1514966 0.1513976 0.1512612 0.151076 0.1510596 0.1510431 0.151077 0.1511168 0.1511917 0.151023 0.1508045 0.1505885 0.1503493 0.1502373 0.1501726 0.1500762 0.1500065 0.1499782 0.150057 0.1502658 0.150469 0.1505335 0.1505505 0.1505328 0.1504275 0.1502438 0.1499674 0.1497118 0.1494661 0.1493102 0.1493681 0.1495501 0.1499738 0.1509654 ]
|
| 8 |
+
</Nnet>
|
ax_model/vad/config.yaml
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
frontend: WavFrontendOnline
|
| 2 |
+
frontend_conf:
|
| 3 |
+
fs: 16000
|
| 4 |
+
window: hamming
|
| 5 |
+
n_mels: 80
|
| 6 |
+
frame_length: 25
|
| 7 |
+
frame_shift: 10
|
| 8 |
+
dither: 0.0
|
| 9 |
+
lfr_m: 5
|
| 10 |
+
lfr_n: 1
|
| 11 |
+
|
| 12 |
+
model: FsmnVADStreaming
|
| 13 |
+
model_conf:
|
| 14 |
+
sample_rate: 16000
|
| 15 |
+
detect_mode: 1
|
| 16 |
+
snr_mode: 0
|
| 17 |
+
max_end_silence_time: 800
|
| 18 |
+
max_start_silence_time: 3000
|
| 19 |
+
do_start_point_detection: True
|
| 20 |
+
do_end_point_detection: True
|
| 21 |
+
window_size_ms: 200
|
| 22 |
+
sil_to_speech_time_thres: 150
|
| 23 |
+
speech_to_sil_time_thres: 150
|
| 24 |
+
speech_2_noise_ratio: 1.0
|
| 25 |
+
do_extend: 1
|
| 26 |
+
lookback_time_start_point: 200
|
| 27 |
+
lookahead_time_end_point: 100
|
| 28 |
+
max_single_segment_time: 60000
|
| 29 |
+
snr_thres: -100.0
|
| 30 |
+
noise_frame_num_used_for_snr: 100
|
| 31 |
+
decibel_thres: -100.0
|
| 32 |
+
speech_noise_thres: 0.6
|
| 33 |
+
fe_prior_thres: 0.0001
|
| 34 |
+
silence_pdf_num: 1
|
| 35 |
+
sil_pdf_ids: [0]
|
| 36 |
+
speech_noise_thresh_low: -0.1
|
| 37 |
+
speech_noise_thresh_high: 0.3
|
| 38 |
+
output_frame_probs: False
|
| 39 |
+
frame_in_ms: 10
|
| 40 |
+
frame_length_ms: 25
|
| 41 |
+
|
| 42 |
+
encoder: FSMN
|
| 43 |
+
encoder_conf:
|
| 44 |
+
input_dim: 400
|
| 45 |
+
input_affine_dim: 140
|
| 46 |
+
fsmn_layers: 4
|
| 47 |
+
linear_dim: 250
|
| 48 |
+
proj_dim: 128
|
| 49 |
+
lorder: 20
|
| 50 |
+
rorder: 0
|
| 51 |
+
lstride: 1
|
| 52 |
+
rstride: 0
|
| 53 |
+
output_affine_dim: 140
|
| 54 |
+
output_dim: 248
|
| 55 |
+
|
| 56 |
+
|
ax_model/withitn.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39bf02586f59237894fc2918ab2db4f12ec3c084c41465718832fbd7646ea729
|
| 3 |
+
size 2368
|
ax_spoken_communication_demo.py
ADDED
|
@@ -0,0 +1,719 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import time
|
| 3 |
+
import librosa
|
| 4 |
+
import torch
|
| 5 |
+
import argparse
|
| 6 |
+
import soundfile as sf
|
| 7 |
+
import cn2an
|
| 8 |
+
import requests
|
| 9 |
+
import re
|
| 10 |
+
import numpy as np
|
| 11 |
+
import onnxruntime as ort
|
| 12 |
+
import axengine as axe
|
| 13 |
+
|
| 14 |
+
# 导入SenseVoice相关模块
|
| 15 |
+
from model import SinusoidalPositionEncoder
|
| 16 |
+
from utils.ax_model_bin import AX_SenseVoiceSmall
|
| 17 |
+
from utils.ax_vad_bin import AX_Fsmn_vad
|
| 18 |
+
from utils.vad_utils import merge_vad
|
| 19 |
+
from funasr.tokenizer.sentencepiece_tokenizer import SentencepiecesTokenizer
|
| 20 |
+
|
| 21 |
+
# 导入MeloTTS相关模块
|
| 22 |
+
from libmelotts.python.split_utils import split_sentence
|
| 23 |
+
from libmelotts.python.text import cleaned_text_to_sequence
|
| 24 |
+
from libmelotts.python.text.cleaner import clean_text
|
| 25 |
+
from libmelotts.python.symbols import LANG_TO_SYMBOL_MAP
|
| 26 |
+
|
| 27 |
+
# 配置参数
|
| 28 |
+
# tts 参数
|
| 29 |
+
TTS_MODEL_DIR = "libmelotts/models"
|
| 30 |
+
TTS_MODEL_FILES = {
|
| 31 |
+
"g": "g-zh_mix_en.bin",
|
| 32 |
+
"encoder": "encoder-zh.onnx",
|
| 33 |
+
"decoder": "decoder-zh.axmodel"
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
# Qwen大模型API参数
|
| 37 |
+
QWEN_API_URL = "" # API服务地址 http://10.126.29.158:8000
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# TTS辅助函数(从melotts.py移植)
|
| 41 |
+
def intersperse(lst, item):
|
| 42 |
+
result = [item] * (len(lst) * 2 + 1)
|
| 43 |
+
result[1::2] = lst
|
| 44 |
+
return result
|
| 45 |
+
|
| 46 |
+
# 处理字符无法识别
|
| 47 |
+
def get_text_for_tts_infer(text, language_str, symbol_to_id=None):
|
| 48 |
+
"""音素处理:确保所有数组长度一致"""
|
| 49 |
+
try:
|
| 50 |
+
norm_text, phone, tone, word2ph = clean_text(text, language_str)
|
| 51 |
+
|
| 52 |
+
# 特殊音素直接映射为空字符串
|
| 53 |
+
phone_mapping = {
|
| 54 |
+
'ɛ': '', 'æ': '', 'ʌ': '', 'ʊ': '', 'ɔ': '', 'ɪ': '', 'ɝ': '', 'ɚ': '', 'ɑ': '',
|
| 55 |
+
'ʒ': '', 'θ': '', 'ð': '', 'ŋ': '', 'ʃ': '', 'ʧ': '', 'ʤ': '', 'ː': '', 'ˈ': '',
|
| 56 |
+
'ˌ': '', 'ʰ': '', 'ʲ': '', 'ʷ': '', 'ʔ': '', 'ɾ': '', 'ɹ': '', 'ɫ': '', 'ɡ': '',
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# 同步处理 phone 和 tone,确保它们长度一致
|
| 60 |
+
processed_phone = []
|
| 61 |
+
processed_tone = []
|
| 62 |
+
removed_symbols = set()
|
| 63 |
+
|
| 64 |
+
for p, t in zip(phone, tone):
|
| 65 |
+
if p in phone_mapping:
|
| 66 |
+
# 特殊音素直接删除,同时删除对应的 tone
|
| 67 |
+
removed_symbols.add(p)
|
| 68 |
+
elif p in symbol_to_id:
|
| 69 |
+
# 正常音素保留,同时保留对应的 tone
|
| 70 |
+
processed_phone.append(p)
|
| 71 |
+
processed_tone.append(t)
|
| 72 |
+
else:
|
| 73 |
+
# 其他未知音素也删除
|
| 74 |
+
removed_symbols.add(p)
|
| 75 |
+
|
| 76 |
+
# 记录被删除的音素
|
| 77 |
+
if removed_symbols:
|
| 78 |
+
print(f"[音素过滤] 删除了 {len(removed_symbols)} 个特殊音素: {sorted(removed_symbols)}")
|
| 79 |
+
print(f"[音素过滤] 处理后音素序列长度: {len(processed_phone)}")
|
| 80 |
+
print(f"[音素过滤] 处理后音调序列长度: {len(processed_tone)}")
|
| 81 |
+
|
| 82 |
+
# 如果没有有效音素,使用默认音素,
|
| 83 |
+
if not processed_phone:
|
| 84 |
+
print("[警告] 没有有效音素,使用默认中文音素")
|
| 85 |
+
processed_phone = ['ni', 'hao']
|
| 86 |
+
processed_tone = ['1', '3']
|
| 87 |
+
word2ph = [1, 1]
|
| 88 |
+
|
| 89 |
+
# 确保 word2ph 的长度与处理后的音素序列匹配
|
| 90 |
+
if len(processed_phone) != len(phone):
|
| 91 |
+
print(f"[警告] 音素序列长度变化: {len(phone)} -> {len(processed_phone)}")
|
| 92 |
+
# 简单处理:重新计算 word2ph
|
| 93 |
+
word2ph = [1] * len(processed_phone)
|
| 94 |
+
|
| 95 |
+
phone, tone, language = cleaned_text_to_sequence(processed_phone, processed_tone, language_str, symbol_to_id)
|
| 96 |
+
|
| 97 |
+
phone = intersperse(phone, 0)
|
| 98 |
+
tone = intersperse(tone, 0)
|
| 99 |
+
language = intersperse(language, 0)
|
| 100 |
+
|
| 101 |
+
phone = np.array(phone, dtype=np.int32)
|
| 102 |
+
tone = np.array(tone, dtype=np.int32)
|
| 103 |
+
language = np.array(language, dtype=np.int32)
|
| 104 |
+
word2ph = np.array(word2ph, dtype=np.int32) * 2
|
| 105 |
+
word2ph[0] += 1
|
| 106 |
+
return phone, tone, language, norm_text, word2ph
|
| 107 |
+
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"[错误] 文本处理失败: {e}")
|
| 110 |
+
import traceback
|
| 111 |
+
traceback.print_exc()
|
| 112 |
+
raise e
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def audio_numpy_concat(segment_data_list, sr, speed=1.):
|
| 116 |
+
audio_segments = []
|
| 117 |
+
for segment_data in segment_data_list:
|
| 118 |
+
audio_segments += segment_data.reshape(-1).tolist()
|
| 119 |
+
audio_segments += [0] * int((sr * 0.05) / speed)
|
| 120 |
+
audio_segments = np.array(audio_segments).astype(np.float32)
|
| 121 |
+
return audio_segments
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def merge_sub_audio(sub_audio_list, pad_size, audio_len):
|
| 125 |
+
# Average pad part
|
| 126 |
+
if pad_size > 0:
|
| 127 |
+
for i in range(len(sub_audio_list) - 1):
|
| 128 |
+
sub_audio_list[i][-pad_size:] += sub_audio_list[i+1][:pad_size]
|
| 129 |
+
sub_audio_list[i][-pad_size:] /= 2
|
| 130 |
+
if i > 0:
|
| 131 |
+
sub_audio_list[i] = sub_audio_list[i][pad_size:]
|
| 132 |
+
|
| 133 |
+
sub_audio = np.concatenate(sub_audio_list, axis=-1)
|
| 134 |
+
return sub_audio[:audio_len]
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def calc_word2pronoun(word2ph, pronoun_lens):
|
| 138 |
+
indice = [0]
|
| 139 |
+
for ph in word2ph[:-1]:
|
| 140 |
+
indice.append(indice[-1] + ph)
|
| 141 |
+
word2pronoun = []
|
| 142 |
+
for i, ph in zip(indice, word2ph):
|
| 143 |
+
word2pronoun.append(np.sum(pronoun_lens[i : i + ph]))
|
| 144 |
+
return word2pronoun
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def generate_slices(word2pronoun, dec_len):
|
| 148 |
+
pn_start, pn_end = 0, 0
|
| 149 |
+
zp_start, zp_end = 0, 0
|
| 150 |
+
zp_len = 0
|
| 151 |
+
pn_slices = []
|
| 152 |
+
zp_slices = []
|
| 153 |
+
while pn_end < len(word2pronoun):
|
| 154 |
+
# 前一个slice长度大于2 且 加上现在这个字没有超过dec_len,则往前overlap两个字
|
| 155 |
+
if pn_end - pn_start > 2 and np.sum(word2pronoun[pn_end - 2 : pn_end + 1]) <= dec_len:
|
| 156 |
+
zp_len = np.sum(word2pronoun[pn_end - 2 : pn_end])
|
| 157 |
+
zp_start = zp_end - zp_len
|
| 158 |
+
pn_start = pn_end - 2
|
| 159 |
+
else:
|
| 160 |
+
zp_len = 0
|
| 161 |
+
zp_start = zp_end
|
| 162 |
+
pn_start = pn_end
|
| 163 |
+
|
| 164 |
+
while pn_end < len(word2pronoun) and zp_len + word2pronoun[pn_end] <= dec_len:
|
| 165 |
+
zp_len += word2pronoun[pn_end]
|
| 166 |
+
pn_end += 1
|
| 167 |
+
zp_end = zp_start + zp_len
|
| 168 |
+
pn_slices.append(slice(pn_start, pn_end))
|
| 169 |
+
zp_slices.append(slice(zp_start, zp_end))
|
| 170 |
+
return pn_slices, zp_slices
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
# 确认中英文
|
| 174 |
+
def lang_detect_with_regex(text):
|
| 175 |
+
"""
|
| 176 |
+
语言识别
|
| 177 |
+
"""
|
| 178 |
+
# 移除所有数字
|
| 179 |
+
text_without_digits = re.sub(r'\d+', '', text)
|
| 180 |
+
|
| 181 |
+
if not text_without_digits:
|
| 182 |
+
return 'unknown'
|
| 183 |
+
|
| 184 |
+
# 检查是否包含中文字符 #中文优先
|
| 185 |
+
if re.search(r'[\u4e00-\u9fff]', text_without_digits):
|
| 186 |
+
return 'chinese'
|
| 187 |
+
else:
|
| 188 |
+
# 检查是否包含英文字母
|
| 189 |
+
if re.search(r'[a-zA-Z]', text_without_digits):
|
| 190 |
+
return 'english'
|
| 191 |
+
else:
|
| 192 |
+
return 'unknown'
|
| 193 |
+
|
| 194 |
+
class QwenTranslationAPI:
|
| 195 |
+
def __init__(self, api_url=QWEN_API_URL):
|
| 196 |
+
self.api_url = api_url
|
| 197 |
+
self.session_id = f"speech_translate_{int(time.time())}"
|
| 198 |
+
|
| 199 |
+
def translate(self, text_content, max_retries=3, timeout=120):
|
| 200 |
+
"""调用千问API进行处理"""
|
| 201 |
+
if not text_content or text_content.strip() == "":
|
| 202 |
+
return "输入文本为空"
|
| 203 |
+
|
| 204 |
+
if lang_detect_with_regex(text_content)=='chinese':
|
| 205 |
+
prompt_f = "回答(限制在100个字以内)"
|
| 206 |
+
else:
|
| 207 |
+
prompt_f = "回答(限制在100个字以内)"
|
| 208 |
+
|
| 209 |
+
prompt = f"{prompt_f}:{text_content}"
|
| 210 |
+
print(f"[API] 发送请求: {prompt}")
|
| 211 |
+
|
| 212 |
+
for attempt in range(max_retries):
|
| 213 |
+
try:
|
| 214 |
+
# 第一步:发送生成请求
|
| 215 |
+
generate_url = f"{self.api_url}/api/generate"
|
| 216 |
+
payload = {
|
| 217 |
+
"prompt": prompt,
|
| 218 |
+
"temperature": 0.1, # 降低温度以获得更确定的结果
|
| 219 |
+
"repetition_penalty": 1.0,
|
| 220 |
+
"top-p": 0.9,
|
| 221 |
+
"top-k": 40,
|
| 222 |
+
"max_new_tokens": 512
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
print(f"[API] 开始生成请求 (尝试 {attempt + 1}/{max_retries})")
|
| 226 |
+
response = requests.post(generate_url, json=payload, timeout=30)
|
| 227 |
+
response.raise_for_status()
|
| 228 |
+
print("[API] 生成请求成功")
|
| 229 |
+
|
| 230 |
+
# 第二步:轮询获取结果并合并所有chunk
|
| 231 |
+
result_url = f"{self.api_url}/api/generate_provider"
|
| 232 |
+
start_time = time.time()
|
| 233 |
+
full_translation = ""
|
| 234 |
+
last_chunk = ""
|
| 235 |
+
|
| 236 |
+
while time.time() - start_time < timeout:
|
| 237 |
+
try:
|
| 238 |
+
result_response = requests.get(result_url, timeout=10)
|
| 239 |
+
result_data = result_response.json()
|
| 240 |
+
|
| 241 |
+
# 获取当前chunk
|
| 242 |
+
current_chunk = result_data.get("response", "")#.strip()
|
| 243 |
+
full_translation += current_chunk
|
| 244 |
+
|
| 245 |
+
# 检查是否完成
|
| 246 |
+
if result_data.get("done", False):
|
| 247 |
+
# 确保获取到完整的结果
|
| 248 |
+
print(f"[API] 完成: {full_translation}")
|
| 249 |
+
return full_translation
|
| 250 |
+
|
| 251 |
+
time.sleep(0.05)
|
| 252 |
+
|
| 253 |
+
except requests.exceptions.RequestException as e:
|
| 254 |
+
print(f"[API] 轮询请求失败: {e}")
|
| 255 |
+
if time.time() - start_time > timeout:
|
| 256 |
+
break
|
| 257 |
+
continue
|
| 258 |
+
|
| 259 |
+
print(f"[API] 轮询超时,尝试第 {attempt + 1} 次重试")
|
| 260 |
+
|
| 261 |
+
except requests.exceptions.RequestException as e:
|
| 262 |
+
print(f"[API] 请求失败 (尝试 {attempt + 1}/{max_retries}): {e}")
|
| 263 |
+
if attempt < max_retries - 1:
|
| 264 |
+
wait_time = 2 ** attempt # 指数退避
|
| 265 |
+
print(f"[API] 等待 {wait_time} 秒后重试...")
|
| 266 |
+
time.sleep(wait_time)
|
| 267 |
+
else:
|
| 268 |
+
return f"失败: {str(e)}"
|
| 269 |
+
except Exception as e:
|
| 270 |
+
print(f"[API] 过程出错: {e}")
|
| 271 |
+
return f"失败: {str(e)}"
|
| 272 |
+
|
| 273 |
+
return "超时,请检查API服务状态"
|
| 274 |
+
|
| 275 |
+
class SpeechTranslationPipeline:
|
| 276 |
+
def __init__(self,
|
| 277 |
+
tts_model_dir, tts_model_files,
|
| 278 |
+
asr_model_dir="ax_model", seq_len=132,
|
| 279 |
+
tts_dec_len=128, sample_rate=44100, tts_speed=0.8,
|
| 280 |
+
qwen_api_url=QWEN_API_URL):
|
| 281 |
+
self.tts_model_dir = tts_model_dir
|
| 282 |
+
self.tts_model_files = tts_model_files
|
| 283 |
+
self.asr_model_dir = asr_model_dir
|
| 284 |
+
self.seq_len = seq_len
|
| 285 |
+
self.tts_dec_len = tts_dec_len
|
| 286 |
+
self.sample_rate = sample_rate
|
| 287 |
+
self.tts_speed = tts_speed
|
| 288 |
+
self.qwen_api_url = qwen_api_url
|
| 289 |
+
|
| 290 |
+
# 初始化ASR模型
|
| 291 |
+
self._init_asr_models()
|
| 292 |
+
|
| 293 |
+
# 初始化TTS模型
|
| 294 |
+
self._init_tts_models()
|
| 295 |
+
|
| 296 |
+
# 初始化API
|
| 297 |
+
self.translator = QwenTranslationAPI(api_url=qwen_api_url)
|
| 298 |
+
|
| 299 |
+
# 验证所有必需文件存在
|
| 300 |
+
self._validate_files()
|
| 301 |
+
|
| 302 |
+
def _init_asr_models(self):
|
| 303 |
+
"""初始化语音识别相关模型"""
|
| 304 |
+
print("Initializing SenseVoice models...")
|
| 305 |
+
|
| 306 |
+
# VAD模型
|
| 307 |
+
self.model_vad = AX_Fsmn_vad(self.asr_model_dir)
|
| 308 |
+
|
| 309 |
+
# 位置编码
|
| 310 |
+
self.embed = SinusoidalPositionEncoder()
|
| 311 |
+
self.position_encoding = self.embed.get_position_encoding(
|
| 312 |
+
torch.randn(1, self.seq_len, 560)).numpy()
|
| 313 |
+
|
| 314 |
+
# ASR模型
|
| 315 |
+
self.model_bin = AX_SenseVoiceSmall(self.asr_model_dir, seq_len=self.seq_len)
|
| 316 |
+
|
| 317 |
+
# Tokenizer
|
| 318 |
+
tokenizer_path = os.path.join(self.asr_model_dir, "chn_jpn_yue_eng_ko_spectok.bpe.model")
|
| 319 |
+
self.tokenizer = SentencepiecesTokenizer(bpemodel=tokenizer_path)
|
| 320 |
+
|
| 321 |
+
print("SenseVoice models initialized successfully.")
|
| 322 |
+
|
| 323 |
+
def _init_tts_models(self):
|
| 324 |
+
"""初始化TTS相关模型"""
|
| 325 |
+
print("Initializing MeloTTS models...")
|
| 326 |
+
init_start = time.time()
|
| 327 |
+
|
| 328 |
+
# 加载encoder和decoder模型
|
| 329 |
+
enc_model = os.path.join(self.tts_model_dir, self.tts_model_files["encoder"])
|
| 330 |
+
dec_model = os.path.join(self.tts_model_dir, self.tts_model_files["decoder"])
|
| 331 |
+
|
| 332 |
+
model_load_start = time.time()
|
| 333 |
+
self.sess_enc = ort.InferenceSession(enc_model, providers=["CPUExecutionProvider"], sess_options=ort.SessionOptions())
|
| 334 |
+
self.sess_dec = axe.InferenceSession(dec_model)
|
| 335 |
+
print(f" Load encoder/decoder models: {(time.time() - model_load_start)*1000:.2f}ms")
|
| 336 |
+
|
| 337 |
+
# 加载静态输入g
|
| 338 |
+
g_file = os.path.join(self.tts_model_dir, self.tts_model_files["g"])
|
| 339 |
+
self.tts_g = np.fromfile(g_file, dtype=np.float32).reshape(1, 256, 1)
|
| 340 |
+
|
| 341 |
+
# 设置语言和symbol映射(默认支持中英混合)
|
| 342 |
+
self.tts_language = "ZH_MIX_EN"
|
| 343 |
+
self.symbol_to_id = {s: i for i, s in enumerate(LANG_TO_SYMBOL_MAP[self.tts_language])}
|
| 344 |
+
|
| 345 |
+
# 预热:提前加载所有懒加载的模块(这是主要耗时部分)
|
| 346 |
+
print(" Warming up TTS modules (loading language models, tokenizers, etc.)...")
|
| 347 |
+
warmup_start = time.time()
|
| 348 |
+
|
| 349 |
+
# 中英混合预热
|
| 350 |
+
try:
|
| 351 |
+
warmup_start_mix = time.time()
|
| 352 |
+
warmup_text_mix = "这是一个test测试。"
|
| 353 |
+
_, _, _, _, _ = get_text_for_tts_infer(warmup_text_mix, self.tts_language, symbol_to_id=self.symbol_to_id)
|
| 354 |
+
print(f" Mixed ZH-EN warm-up: {(time.time() - warmup_start_mix)*1000:.2f}ms")
|
| 355 |
+
except Exception as e:
|
| 356 |
+
print(f" Warning: Mixed warm-up failed: {e}")
|
| 357 |
+
|
| 358 |
+
total_init_time = (time.time() - init_start) * 1000
|
| 359 |
+
print(f"MeloTTS models initialized successfully. Total init time: {total_init_time:.2f}ms ({total_init_time/1000:.2f}s)")
|
| 360 |
+
|
| 361 |
+
def _validate_files(self):
|
| 362 |
+
"""验证所有必需的文件都存在"""
|
| 363 |
+
# 检查TTS相关文件
|
| 364 |
+
for key, filename in self.tts_model_files.items():
|
| 365 |
+
filepath = os.path.join(self.tts_model_dir, filename)
|
| 366 |
+
if not os.path.exists(filepath):
|
| 367 |
+
raise FileNotFoundError(f"TTS模型文件不存在: {filepath}")
|
| 368 |
+
|
| 369 |
+
# 检查API服务是否可用(可选)
|
| 370 |
+
try:
|
| 371 |
+
response = requests.get(f"{self.qwen_api_url}/api/generate_provider", timeout=5)
|
| 372 |
+
print("[API检查] 千问API服务���接正常")
|
| 373 |
+
except:
|
| 374 |
+
print("[API警告] 无法连接到千问API服务,请确保已启动API服务")
|
| 375 |
+
|
| 376 |
+
def speech_recognition(self, speech, fs):
|
| 377 |
+
"""
|
| 378 |
+
第一步:语音识别(ASR)
|
| 379 |
+
"""
|
| 380 |
+
speech_lengths = len(speech)
|
| 381 |
+
|
| 382 |
+
# VAD处理
|
| 383 |
+
print("Running VAD...")
|
| 384 |
+
vad_start_time = time.time()
|
| 385 |
+
res_vad = self.model_vad(speech)[0]
|
| 386 |
+
vad_segments = merge_vad(res_vad, 15 * 1000)
|
| 387 |
+
vad_time_cost = time.time() - vad_start_time
|
| 388 |
+
print(f"VAD processing time: {vad_time_cost:.2f} seconds")
|
| 389 |
+
print(f"VAD segments detected: {len(vad_segments)}")
|
| 390 |
+
|
| 391 |
+
# ASR处理
|
| 392 |
+
print("Running ASR...")
|
| 393 |
+
asr_start_time = time.time()
|
| 394 |
+
all_results = ""
|
| 395 |
+
|
| 396 |
+
# 遍历每个VAD片段并处理
|
| 397 |
+
for i, segment in enumerate(vad_segments):
|
| 398 |
+
segment_start, segment_end = segment
|
| 399 |
+
start_sample = int(segment_start / 1000 * fs)
|
| 400 |
+
end_sample = min(int(segment_end / 1000 * fs), speech_lengths)
|
| 401 |
+
segment_speech = speech[start_sample:end_sample]
|
| 402 |
+
|
| 403 |
+
# 为当前片段创建临时文件
|
| 404 |
+
segment_filename = f"temp_segment_{i}.wav"
|
| 405 |
+
sf.write(segment_filename, segment_speech, fs)
|
| 406 |
+
|
| 407 |
+
# 对当前片段进行识别
|
| 408 |
+
try:
|
| 409 |
+
segment_res = self.model_bin(
|
| 410 |
+
segment_filename,
|
| 411 |
+
"auto", # 语言自动检测
|
| 412 |
+
True, # withitn
|
| 413 |
+
self.position_encoding,
|
| 414 |
+
tokenizer=self.tokenizer,
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
all_results += segment_res
|
| 418 |
+
|
| 419 |
+
# 清理临时文件
|
| 420 |
+
if os.path.exists(segment_filename):
|
| 421 |
+
os.remove(segment_filename)
|
| 422 |
+
|
| 423 |
+
except Exception as e:
|
| 424 |
+
if os.path.exists(segment_filename):
|
| 425 |
+
os.remove(segment_filename)
|
| 426 |
+
print(f"Error processing segment {i}: {e}")
|
| 427 |
+
continue
|
| 428 |
+
|
| 429 |
+
asr_time_cost = time.time() - asr_start_time
|
| 430 |
+
print(f"ASR processing time: {asr_time_cost:.2f} seconds")
|
| 431 |
+
print(f"ASR Result: {all_results}")
|
| 432 |
+
|
| 433 |
+
return all_results.strip()
|
| 434 |
+
|
| 435 |
+
def run_translation(self, text_content):
|
| 436 |
+
"""
|
| 437 |
+
第二步:调用Qwen大模型API处理
|
| 438 |
+
"""
|
| 439 |
+
print("Starting translation via API...")
|
| 440 |
+
translation_start_time = time.time()
|
| 441 |
+
|
| 442 |
+
# 使用API进行处理
|
| 443 |
+
translate_content = self.translator.translate(text_content)
|
| 444 |
+
|
| 445 |
+
translation_time_cost = time.time() - translation_start_time
|
| 446 |
+
print(f"Translation processing time: {translation_time_cost:.2f} seconds")
|
| 447 |
+
print(f"Translation Result: {translate_content}")
|
| 448 |
+
|
| 449 |
+
return translate_content
|
| 450 |
+
|
| 451 |
+
def run_tts(self, translate_content, output_dir, output_wav=None):
|
| 452 |
+
"""
|
| 453 |
+
第三步:使用TTS模型合成语音
|
| 454 |
+
"""
|
| 455 |
+
output_path = os.path.join(output_dir, output_wav)
|
| 456 |
+
|
| 457 |
+
try:
|
| 458 |
+
# 处理中文文本中的数字
|
| 459 |
+
if lang_detect_with_regex(translate_content) == "chinese":
|
| 460 |
+
translate_content = cn2an.transform(translate_content, "an2cn")
|
| 461 |
+
|
| 462 |
+
print(f"TTS synthesis for text: {translate_content}")
|
| 463 |
+
|
| 464 |
+
# 分句
|
| 465 |
+
sens = split_sentence(translate_content, language_str=self.tts_language)
|
| 466 |
+
print(f"Text split into {len(sens)} sentences")
|
| 467 |
+
|
| 468 |
+
# 最终音频列表
|
| 469 |
+
audio_list = []
|
| 470 |
+
|
| 471 |
+
# 遍历每个句子
|
| 472 |
+
for n, se in enumerate(sens):
|
| 473 |
+
# 处理英文大小写连接
|
| 474 |
+
if self.tts_language in ['EN', 'ZH_MIX_EN']:
|
| 475 |
+
se = re.sub(r'([a-z])([A-Z])', r'\1 \2', se)
|
| 476 |
+
|
| 477 |
+
print(f"Processing sentence[{n}]: {se}")
|
| 478 |
+
|
| 479 |
+
# 转换文本为音素和音调
|
| 480 |
+
phones, tones, lang_ids, norm_text, word2ph = get_text_for_tts_infer(
|
| 481 |
+
se, self.tts_language, symbol_to_id=self.symbol_to_id)
|
| 482 |
+
|
| 483 |
+
# 运行encoder
|
| 484 |
+
encoder_start = time.time()
|
| 485 |
+
z_p, pronoun_lens, audio_len = self.sess_enc.run(None, input_feed={
|
| 486 |
+
'phone': phones, 'g': self.tts_g,
|
| 487 |
+
'tone': tones, 'language': lang_ids,
|
| 488 |
+
'noise_scale': np.array([0], dtype=np.float32),
|
| 489 |
+
'length_scale': np.array([1.0 / self.tts_speed], dtype=np.float32),
|
| 490 |
+
'noise_scale_w': np.array([0], dtype=np.float32),
|
| 491 |
+
'sdp_ratio': np.array([0], dtype=np.float32)})
|
| 492 |
+
print(f"Encoder run time: {1000 * (time.time() - encoder_start):.2f}ms")
|
| 493 |
+
|
| 494 |
+
# 计算每个词的发音长度
|
| 495 |
+
word2pronoun = calc_word2pronoun(word2ph, pronoun_lens)
|
| 496 |
+
# 生成切片
|
| 497 |
+
pn_slices, zp_slices = generate_slices(word2pronoun, self.tts_dec_len)
|
| 498 |
+
|
| 499 |
+
audio_len = audio_len[0]
|
| 500 |
+
sub_audio_list = []
|
| 501 |
+
|
| 502 |
+
for i, (ps, zs) in enumerate(zip(pn_slices, zp_slices)):
|
| 503 |
+
zp_slice = z_p[..., zs]
|
| 504 |
+
|
| 505 |
+
# Padding前zp的长度
|
| 506 |
+
sub_dec_len = zp_slice.shape[-1]
|
| 507 |
+
# Padding前输出音频的长度
|
| 508 |
+
sub_audio_len = 512 * sub_dec_len
|
| 509 |
+
|
| 510 |
+
# Padding到dec_len
|
| 511 |
+
if zp_slice.shape[-1] < self.tts_dec_len:
|
| 512 |
+
zp_slice = np.concatenate((zp_slice, np.zeros((*zp_slice.shape[:-1], self.tts_dec_len - zp_slice.shape[-1]), dtype=np.float32)), axis=-1)
|
| 513 |
+
|
| 514 |
+
decoder_start = time.time()
|
| 515 |
+
audio = self.sess_dec.run(None, input_feed={"z_p": zp_slice, "g": self.tts_g})[0].flatten()
|
| 516 |
+
|
| 517 |
+
# 处理overlap
|
| 518 |
+
audio_start = 0
|
| 519 |
+
if len(sub_audio_list) > 0:
|
| 520 |
+
if pn_slices[i - 1].stop > ps.start:
|
| 521 |
+
# 去掉第一个字
|
| 522 |
+
audio_start = 512 * word2pronoun[ps.start]
|
| 523 |
+
|
| 524 |
+
audio_end = sub_audio_len
|
| 525 |
+
if i < len(pn_slices) - 1:
|
| 526 |
+
if ps.stop > pn_slices[i + 1].start:
|
| 527 |
+
# 去掉最后一个字
|
| 528 |
+
audio_end = sub_audio_len - 512 * word2pronoun[ps.stop - 1]
|
| 529 |
+
|
| 530 |
+
audio = audio[audio_start:audio_end]
|
| 531 |
+
print(f"Decode slice[{i}]: decoder run time {1000 * (time.time() - decoder_start):.2f}ms")
|
| 532 |
+
sub_audio_list.append(audio)
|
| 533 |
+
|
| 534 |
+
# 合并子音频
|
| 535 |
+
sub_audio = merge_sub_audio(sub_audio_list, 0, audio_len)
|
| 536 |
+
audio_list.append(sub_audio)
|
| 537 |
+
|
| 538 |
+
# 拼接所有句子的音频
|
| 539 |
+
audio = audio_numpy_concat(audio_list, sr=self.sample_rate, speed=self.tts_speed)
|
| 540 |
+
|
| 541 |
+
# 保存音频文件
|
| 542 |
+
sf.write(output_path, audio, self.sample_rate)
|
| 543 |
+
print(f"TTS audio saved to {output_path}")
|
| 544 |
+
|
| 545 |
+
return output_path
|
| 546 |
+
|
| 547 |
+
except Exception as e:
|
| 548 |
+
print(f"TTS synthesis failed: {e}")
|
| 549 |
+
import traceback
|
| 550 |
+
traceback.print_exc()
|
| 551 |
+
raise e
|
| 552 |
+
|
| 553 |
+
def full_pipeline(self, speech, fs, output_dir=None, output_tts=None):
|
| 554 |
+
"""
|
| 555 |
+
完整Pipeline:语音识别 -> qwen -> TTS合成
|
| 556 |
+
"""
|
| 557 |
+
|
| 558 |
+
# 第一步:语音识别
|
| 559 |
+
print("\n----------------------VAD+ASR----------------------------\n")
|
| 560 |
+
start_time = time.time() # 记录开始时间
|
| 561 |
+
text_content = self.speech_recognition(speech, fs)
|
| 562 |
+
asr_time = time.time() - start_time # 计算耗时
|
| 563 |
+
print(f"语音识别耗时: {asr_time:.2f} 秒")
|
| 564 |
+
|
| 565 |
+
if not text_content or text_content.strip() == "":
|
| 566 |
+
raise ValueError("ASR未能识别出有效文本")
|
| 567 |
+
|
| 568 |
+
# 第二步:qwen
|
| 569 |
+
print("\n---------------------Qwen---------------------------\n")
|
| 570 |
+
start_time = time.time() # 记录开始时间
|
| 571 |
+
translate_content = self.run_translation(text_content)
|
| 572 |
+
translate_time = time.time() - start_time # 计算耗时
|
| 573 |
+
print(f"qwen耗时: {translate_time:.2f} 秒")
|
| 574 |
+
|
| 575 |
+
# 第三步:TTS合成
|
| 576 |
+
print("-------------------------TTS-------------------------------\n")
|
| 577 |
+
start_time = time.time() # 记录开始时间
|
| 578 |
+
output_path = self.run_tts(translate_content, output_dir, output_tts)
|
| 579 |
+
tts_time = time.time() - start_time # 计算耗时
|
| 580 |
+
print(f"TTS合成耗时: {tts_time:.2f} 秒")
|
| 581 |
+
|
| 582 |
+
return {
|
| 583 |
+
"original_text": text_content,
|
| 584 |
+
"translated_text": translate_content,
|
| 585 |
+
"audio_path": output_path
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
def main():
|
| 589 |
+
parser = argparse.ArgumentParser(description="Speech Recognition, Translation and TTS Pipeline")
|
| 590 |
+
parser.add_argument("--audio_dir", type=str, default="./input_question", help="Input audio directory path")
|
| 591 |
+
parser.add_argument("--output_dir", type=str, default="./output_answer", help="Output directory")
|
| 592 |
+
parser.add_argument("--api_url", type=str, default="http://10.126.29.158:8000", help="Qwen API server URL")
|
| 593 |
+
|
| 594 |
+
args = parser.parse_args()
|
| 595 |
+
print("-------------------START------------------------\n")
|
| 596 |
+
os.makedirs(args.output_dir, exist_ok=True)
|
| 597 |
+
|
| 598 |
+
# 检查音频目录是否存���
|
| 599 |
+
if not os.path.exists(args.audio_dir):
|
| 600 |
+
print(f"错误: 音频目录不存在: {args.audio_dir}")
|
| 601 |
+
return
|
| 602 |
+
|
| 603 |
+
# 获取音频目录中的所有.wav文件
|
| 604 |
+
audio_files = []
|
| 605 |
+
for file in os.listdir(args.audio_dir):
|
| 606 |
+
if file.lower().endswith(('.wav', '.mp3')):
|
| 607 |
+
audio_files.append(os.path.join(args.audio_dir, file))
|
| 608 |
+
|
| 609 |
+
# 如果没有找到音频文件
|
| 610 |
+
if not audio_files:
|
| 611 |
+
print(f"错误: 在目录 {args.audio_dir} 中没有找到音频文件")
|
| 612 |
+
return
|
| 613 |
+
|
| 614 |
+
# 按文件名排序,确保处理顺序
|
| 615 |
+
audio_files.sort()
|
| 616 |
+
print(f"找到 {len(audio_files)} 个音频文件: {[os.path.basename(f) for f in audio_files]}")
|
| 617 |
+
|
| 618 |
+
# 初始化pipeline(只需一次)
|
| 619 |
+
pipeline = SpeechTranslationPipeline(
|
| 620 |
+
tts_model_dir=TTS_MODEL_DIR,
|
| 621 |
+
tts_model_files=TTS_MODEL_FILES,
|
| 622 |
+
asr_model_dir="ax_model",
|
| 623 |
+
seq_len=132,
|
| 624 |
+
tts_dec_len=128,
|
| 625 |
+
sample_rate=44100,
|
| 626 |
+
tts_speed=0.8,
|
| 627 |
+
qwen_api_url=args.api_url
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
# 处理每个音频文件
|
| 631 |
+
all_results = []
|
| 632 |
+
total_start_time = time.time()
|
| 633 |
+
|
| 634 |
+
for i, audio_file in enumerate(audio_files):
|
| 635 |
+
print(f"\n{'='*60}")
|
| 636 |
+
print(f"处理第 {i+1}/{len(audio_files)} 个音频文件: {os.path.basename(audio_file)}")
|
| 637 |
+
print(f"{'='*60}")
|
| 638 |
+
|
| 639 |
+
file_start_time = time.time()
|
| 640 |
+
|
| 641 |
+
try:
|
| 642 |
+
# 加载音频
|
| 643 |
+
speech, fs = librosa.load(audio_file, sr=None)
|
| 644 |
+
if fs != 16000:
|
| 645 |
+
print(f"重采样音频从 {fs}Hz 到 16000Hz")
|
| 646 |
+
speech = librosa.resample(y=speech, orig_sr=fs, target_sr=16000)
|
| 647 |
+
fs = 16000
|
| 648 |
+
audio_duration = librosa.get_duration(y=speech, sr=fs)
|
| 649 |
+
|
| 650 |
+
# 生成输出文件名
|
| 651 |
+
base_name = os.path.splitext(os.path.basename(audio_file))[0]
|
| 652 |
+
output_tts = f"{base_name}_answer.wav"
|
| 653 |
+
|
| 654 |
+
# 运行pipeline
|
| 655 |
+
result = pipeline.full_pipeline(speech, fs, args.output_dir, output_tts)
|
| 656 |
+
|
| 657 |
+
# 计算处理时间
|
| 658 |
+
file_time_cost = time.time() - file_start_time
|
| 659 |
+
|
| 660 |
+
out_wav = os.path.join(args.output_dir,output_tts)
|
| 661 |
+
speech, fs = librosa.load(out_wav, sr=None)
|
| 662 |
+
output_duration = librosa.get_duration(y=speech, sr=fs)
|
| 663 |
+
rtf = file_time_cost / output_duration
|
| 664 |
+
|
| 665 |
+
# 添加文件信息到结果
|
| 666 |
+
result.update({
|
| 667 |
+
"audio_file": audio_file,
|
| 668 |
+
"processing_time": file_time_cost,
|
| 669 |
+
"output_duration": output_duration,
|
| 670 |
+
"rtf": rtf
|
| 671 |
+
})
|
| 672 |
+
|
| 673 |
+
all_results.append(result)
|
| 674 |
+
|
| 675 |
+
print(f"\n文件处理完成: {os.path.basename(audio_file)}")
|
| 676 |
+
print(f"原始文本: {result['original_text']}")
|
| 677 |
+
print(f"回答文本: {result['translated_text']}")
|
| 678 |
+
print(f"生成音频: {result['audio_path']}")
|
| 679 |
+
print(f"处理时间: {file_time_cost:.2f} 秒")
|
| 680 |
+
print(f"音频时长: {output_duration:.2f} 秒")
|
| 681 |
+
print(f"RTF: {rtf:.2f}")
|
| 682 |
+
|
| 683 |
+
except Exception as e:
|
| 684 |
+
print(f"处理文件 {audio_file} 时出错: {e}")
|
| 685 |
+
import traceback
|
| 686 |
+
traceback.print_exc()
|
| 687 |
+
continue
|
| 688 |
+
|
| 689 |
+
# 输出总体结果
|
| 690 |
+
total_time_cost = time.time() - total_start_time
|
| 691 |
+
print(f"\n{'='*80}")
|
| 692 |
+
print("所有文件处理完成!")
|
| 693 |
+
print(f"{'='*80}")
|
| 694 |
+
print(f"总共处理了 {len(all_results)} 个文件")
|
| 695 |
+
print(f"总处理时间: {total_time_cost:.2f} 秒")
|
| 696 |
+
|
| 697 |
+
# 保存汇总结果
|
| 698 |
+
summary_file = os.path.join(args.output_dir, "processing_summary.txt")
|
| 699 |
+
with open(summary_file, 'w', encoding='utf-8') as f:
|
| 700 |
+
f.write("批量处理结果汇总\n")
|
| 701 |
+
f.write("=" * 50 + "\n\n")
|
| 702 |
+
|
| 703 |
+
for i, result in enumerate(all_results):
|
| 704 |
+
f.write(f"文件 {i+1}: {os.path.basename(result['audio_file'])}\n")
|
| 705 |
+
f.write(f" 原始文本: {result['original_text']}\n")
|
| 706 |
+
f.write(f" 回答结果: {result['translated_text']}\n")
|
| 707 |
+
f.write(f" 合成音频: {os.path.basename(result['audio_path'])}\n")
|
| 708 |
+
f.write(f" 处理时间: {result['processing_time']:.2f} 秒\n")
|
| 709 |
+
f.write(f" 音频时长: {result['output_duration']:.2f} 秒\n")
|
| 710 |
+
f.write(f" RTF: {result['rtf']:.2f}\n")
|
| 711 |
+
f.write("-" * 50 + "\n")
|
| 712 |
+
|
| 713 |
+
f.write(f"\n总计: {len(all_results)} 个文件\n")
|
| 714 |
+
f.write(f"总处理时间: {total_time_cost:.2f} 秒\n")
|
| 715 |
+
|
| 716 |
+
print(f"详细结果已保存到: {summary_file}")
|
| 717 |
+
|
| 718 |
+
if __name__ == "__main__":
|
| 719 |
+
main()
|
config.json
ADDED
|
File without changes
|
input_question/Q1.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8991bc8a91bc377ad8ba3e9962edebdceadb7d1d468eb28881fef83738f4c4d1
|
| 3 |
+
size 177644
|
input_question/Q2.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9d817c1110392680cf4873e97f373229d29449c62fd551dc7fde2a360960c61
|
| 3 |
+
size 235244
|
input_question/Q3.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:121fefb370b53aab86072cafc55ab54ed3ff487d3c9955063e0cda9fae7bf5b8
|
| 3 |
+
size 132044
|
libaxllm/main_api_ax650
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e800cd6e00dd2ad7303cb6fb6b867a33704665bded213fe4bd3be3df025c0821
|
| 3 |
+
size 1064760
|
libaxllm/main_api_axcl_aarch64
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3558444d93ce7459db247421128aca6ba3fdbde5932eff6aea66653fa7370cdf
|
| 3 |
+
size 1816560
|
libaxllm/main_api_axcl_x86
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8cbbde225235ace328ae230320d7f9b2d6a9321a8dca3179f4d770edc65a2e0
|
| 3 |
+
size 8811440
|
libaxllm/post_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"enable_temperature" : true,
|
| 3 |
+
"temperature" : 0.9,
|
| 4 |
+
|
| 5 |
+
"enable_repetition_penalty" : false,
|
| 6 |
+
"repetition_penalty" : 1.2,
|
| 7 |
+
"penalty_window" : 20,
|
| 8 |
+
|
| 9 |
+
"enable_top_p_sampling" : false,
|
| 10 |
+
"top_p" : 0.8,
|
| 11 |
+
|
| 12 |
+
"enable_top_k_sampling" : true,
|
| 13 |
+
"top_k" : 10
|
| 14 |
+
}
|
libaxllm/qwen2.5_tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libaxllm/qwen2.5_tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libaxllm/qwen2.5_tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"151643": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"151644": {
|
| 13 |
+
"content": "<|im_start|>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"151645": {
|
| 21 |
+
"content": "<|im_end|>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"151646": {
|
| 29 |
+
"content": "<|object_ref_start|>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": false,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"151647": {
|
| 37 |
+
"content": "<|object_ref_end|>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
},
|
| 44 |
+
"151648": {
|
| 45 |
+
"content": "<|box_start|>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false,
|
| 50 |
+
"special": true
|
| 51 |
+
},
|
| 52 |
+
"151649": {
|
| 53 |
+
"content": "<|box_end|>",
|
| 54 |
+
"lstrip": false,
|
| 55 |
+
"normalized": false,
|
| 56 |
+
"rstrip": false,
|
| 57 |
+
"single_word": false,
|
| 58 |
+
"special": true
|
| 59 |
+
},
|
| 60 |
+
"151650": {
|
| 61 |
+
"content": "<|quad_start|>",
|
| 62 |
+
"lstrip": false,
|
| 63 |
+
"normalized": false,
|
| 64 |
+
"rstrip": false,
|
| 65 |
+
"single_word": false,
|
| 66 |
+
"special": true
|
| 67 |
+
},
|
| 68 |
+
"151651": {
|
| 69 |
+
"content": "<|quad_end|>",
|
| 70 |
+
"lstrip": false,
|
| 71 |
+
"normalized": false,
|
| 72 |
+
"rstrip": false,
|
| 73 |
+
"single_word": false,
|
| 74 |
+
"special": true
|
| 75 |
+
},
|
| 76 |
+
"151652": {
|
| 77 |
+
"content": "<|vision_start|>",
|
| 78 |
+
"lstrip": false,
|
| 79 |
+
"normalized": false,
|
| 80 |
+
"rstrip": false,
|
| 81 |
+
"single_word": false,
|
| 82 |
+
"special": true
|
| 83 |
+
},
|
| 84 |
+
"151653": {
|
| 85 |
+
"content": "<|vision_end|>",
|
| 86 |
+
"lstrip": false,
|
| 87 |
+
"normalized": false,
|
| 88 |
+
"rstrip": false,
|
| 89 |
+
"single_word": false,
|
| 90 |
+
"special": true
|
| 91 |
+
},
|
| 92 |
+
"151654": {
|
| 93 |
+
"content": "<|vision_pad|>",
|
| 94 |
+
"lstrip": false,
|
| 95 |
+
"normalized": false,
|
| 96 |
+
"rstrip": false,
|
| 97 |
+
"single_word": false,
|
| 98 |
+
"special": true
|
| 99 |
+
},
|
| 100 |
+
"151655": {
|
| 101 |
+
"content": "<|image_pad|>",
|
| 102 |
+
"lstrip": false,
|
| 103 |
+
"normalized": false,
|
| 104 |
+
"rstrip": false,
|
| 105 |
+
"single_word": false,
|
| 106 |
+
"special": true
|
| 107 |
+
},
|
| 108 |
+
"151656": {
|
| 109 |
+
"content": "<|video_pad|>",
|
| 110 |
+
"lstrip": false,
|
| 111 |
+
"normalized": false,
|
| 112 |
+
"rstrip": false,
|
| 113 |
+
"single_word": false,
|
| 114 |
+
"special": true
|
| 115 |
+
},
|
| 116 |
+
"151657": {
|
| 117 |
+
"content": "<tool_call>",
|
| 118 |
+
"lstrip": false,
|
| 119 |
+
"normalized": false,
|
| 120 |
+
"rstrip": false,
|
| 121 |
+
"single_word": false,
|
| 122 |
+
"special": false
|
| 123 |
+
},
|
| 124 |
+
"151658": {
|
| 125 |
+
"content": "</tool_call>",
|
| 126 |
+
"lstrip": false,
|
| 127 |
+
"normalized": false,
|
| 128 |
+
"rstrip": false,
|
| 129 |
+
"single_word": false,
|
| 130 |
+
"special": false
|
| 131 |
+
},
|
| 132 |
+
"151659": {
|
| 133 |
+
"content": "<|fim_prefix|>",
|
| 134 |
+
"lstrip": false,
|
| 135 |
+
"normalized": false,
|
| 136 |
+
"rstrip": false,
|
| 137 |
+
"single_word": false,
|
| 138 |
+
"special": false
|
| 139 |
+
},
|
| 140 |
+
"151660": {
|
| 141 |
+
"content": "<|fim_middle|>",
|
| 142 |
+
"lstrip": false,
|
| 143 |
+
"normalized": false,
|
| 144 |
+
"rstrip": false,
|
| 145 |
+
"single_word": false,
|
| 146 |
+
"special": false
|
| 147 |
+
},
|
| 148 |
+
"151661": {
|
| 149 |
+
"content": "<|fim_suffix|>",
|
| 150 |
+
"lstrip": false,
|
| 151 |
+
"normalized": false,
|
| 152 |
+
"rstrip": false,
|
| 153 |
+
"single_word": false,
|
| 154 |
+
"special": false
|
| 155 |
+
},
|
| 156 |
+
"151662": {
|
| 157 |
+
"content": "<|fim_pad|>",
|
| 158 |
+
"lstrip": false,
|
| 159 |
+
"normalized": false,
|
| 160 |
+
"rstrip": false,
|
| 161 |
+
"single_word": false,
|
| 162 |
+
"special": false
|
| 163 |
+
},
|
| 164 |
+
"151663": {
|
| 165 |
+
"content": "<|repo_name|>",
|
| 166 |
+
"lstrip": false,
|
| 167 |
+
"normalized": false,
|
| 168 |
+
"rstrip": false,
|
| 169 |
+
"single_word": false,
|
| 170 |
+
"special": false
|
| 171 |
+
},
|
| 172 |
+
"151664": {
|
| 173 |
+
"content": "<|file_sep|>",
|
| 174 |
+
"lstrip": false,
|
| 175 |
+
"normalized": false,
|
| 176 |
+
"rstrip": false,
|
| 177 |
+
"single_word": false,
|
| 178 |
+
"special": false
|
| 179 |
+
}
|
| 180 |
+
},
|
| 181 |
+
"additional_special_tokens": [
|
| 182 |
+
"<|im_start|>",
|
| 183 |
+
"<|im_end|>",
|
| 184 |
+
"<|object_ref_start|>",
|
| 185 |
+
"<|object_ref_end|>",
|
| 186 |
+
"<|box_start|>",
|
| 187 |
+
"<|box_end|>",
|
| 188 |
+
"<|quad_start|>",
|
| 189 |
+
"<|quad_end|>",
|
| 190 |
+
"<|vision_start|>",
|
| 191 |
+
"<|vision_end|>",
|
| 192 |
+
"<|vision_pad|>",
|
| 193 |
+
"<|image_pad|>",
|
| 194 |
+
"<|video_pad|>"
|
| 195 |
+
],
|
| 196 |
+
"bos_token": null,
|
| 197 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
| 198 |
+
"clean_up_tokenization_spaces": false,
|
| 199 |
+
"eos_token": "<|im_end|>",
|
| 200 |
+
"errors": "replace",
|
| 201 |
+
"model_max_length": 131072,
|
| 202 |
+
"pad_token": "<|endoftext|>",
|
| 203 |
+
"split_special_tokens": false,
|
| 204 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 205 |
+
"unk_token": null,
|
| 206 |
+
"add_bos_token": false
|
| 207 |
+
}
|
libaxllm/qwen2.5_tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libaxllm/qwen2.5_tokenizer_uid.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoTokenizer, PreTrainedTokenizerFast
|
| 2 |
+
from http.server import HTTPServer, BaseHTTPRequestHandler
|
| 3 |
+
import json
|
| 4 |
+
import argparse
|
| 5 |
+
import uuid
|
| 6 |
+
|
| 7 |
+
# 全局字典:存储 uid 到 Tokenizer_Http 实例的映射
|
| 8 |
+
tokenizers = {}
|
| 9 |
+
|
| 10 |
+
class Tokenizer_Http():
|
| 11 |
+
def __init__(self):
|
| 12 |
+
model_id = "qwen2.5_tokenizer"
|
| 13 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 14 |
+
self.messages = [
|
| 15 |
+
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
|
| 16 |
+
]
|
| 17 |
+
self.token_ids = []
|
| 18 |
+
|
| 19 |
+
def encode(self, prompt, last_reply=None):
|
| 20 |
+
if last_reply is not None:
|
| 21 |
+
self.messages.append({"role": "assistant", "content": last_reply})
|
| 22 |
+
text = self.tokenizer.apply_chat_template(
|
| 23 |
+
self.messages,
|
| 24 |
+
tokenize=False,
|
| 25 |
+
add_generation_prompt=True
|
| 26 |
+
)
|
| 27 |
+
# print("生成的文本:\n============\n", text, "============\n")
|
| 28 |
+
self.token_ids = self.tokenizer.encode(text)[:-3]
|
| 29 |
+
self.messages.append({"role": "user", "content": prompt})
|
| 30 |
+
|
| 31 |
+
text = self.tokenizer.apply_chat_template(
|
| 32 |
+
self.messages,
|
| 33 |
+
tokenize=False,
|
| 34 |
+
add_generation_prompt=True
|
| 35 |
+
)
|
| 36 |
+
print("生成的文本:\n============\n", text, "============\n")
|
| 37 |
+
token_ids = self.tokenizer.encode(text)
|
| 38 |
+
# 找出新增部分
|
| 39 |
+
diff = token_ids[len(self.token_ids):]
|
| 40 |
+
self.token_ids = token_ids
|
| 41 |
+
print(self.decode(diff))
|
| 42 |
+
return token_ids, diff
|
| 43 |
+
|
| 44 |
+
def decode(self, token_ids):
|
| 45 |
+
return self.tokenizer.decode(token_ids)
|
| 46 |
+
|
| 47 |
+
@property
|
| 48 |
+
def bos_id(self):
|
| 49 |
+
return self.tokenizer.bos_token_id
|
| 50 |
+
|
| 51 |
+
@property
|
| 52 |
+
def eos_id(self):
|
| 53 |
+
return self.tokenizer.eos_token_id
|
| 54 |
+
|
| 55 |
+
@property
|
| 56 |
+
def bos_token(self):
|
| 57 |
+
return self.tokenizer.bos_token
|
| 58 |
+
|
| 59 |
+
@property
|
| 60 |
+
def eos_token(self):
|
| 61 |
+
return self.tokenizer.eos_token
|
| 62 |
+
|
| 63 |
+
def reset(self, system_prompt="You are Qwen, created by Alibaba Cloud. You are a helpful assistant."):
|
| 64 |
+
self.messages = [
|
| 65 |
+
{"role": "system", "content": system_prompt},
|
| 66 |
+
]
|
| 67 |
+
text = self.tokenizer.apply_chat_template(
|
| 68 |
+
self.messages,
|
| 69 |
+
tokenize=False,
|
| 70 |
+
add_generation_prompt=True
|
| 71 |
+
)
|
| 72 |
+
token_ids = self.tokenizer.encode(text)[:-3]
|
| 73 |
+
self.token_ids = token_ids
|
| 74 |
+
print(self.decode(token_ids))
|
| 75 |
+
return token_ids
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
class Request(BaseHTTPRequestHandler):
|
| 79 |
+
timeout = 5
|
| 80 |
+
server_version = 'Apache'
|
| 81 |
+
|
| 82 |
+
def do_GET(self):
|
| 83 |
+
print("GET 请求路径:", self.path)
|
| 84 |
+
self.send_response(200)
|
| 85 |
+
self.send_header("Content-Type", "application/json")
|
| 86 |
+
self.end_headers()
|
| 87 |
+
|
| 88 |
+
# 新增接口:获取 uid
|
| 89 |
+
if '/get_uid' in self.path:
|
| 90 |
+
new_uid = str(uuid.uuid4())
|
| 91 |
+
print("新 uid:", new_uid)
|
| 92 |
+
# 为该 uid 创建一个新的 Tokenizer_Http 实例
|
| 93 |
+
tokenizers[new_uid] = Tokenizer_Http()
|
| 94 |
+
msg = json.dumps({'uid': new_uid})
|
| 95 |
+
elif '/bos_id' in self.path:
|
| 96 |
+
# 获取 uid 参数(例如 ?uid=xxx)
|
| 97 |
+
uid = self.get_query_param("uid")
|
| 98 |
+
instance: Tokenizer_Http = tokenizers.get(uid)
|
| 99 |
+
if instance is None:
|
| 100 |
+
msg = json.dumps({'error': 'Invalid uid'})
|
| 101 |
+
else:
|
| 102 |
+
bos_id = instance.bos_id
|
| 103 |
+
msg = json.dumps({'bos_id': bos_id if bos_id is not None else -1})
|
| 104 |
+
elif '/eos_id' in self.path:
|
| 105 |
+
uid = self.get_query_param("uid")
|
| 106 |
+
instance: Tokenizer_Http = tokenizers.get(uid)
|
| 107 |
+
if instance is None:
|
| 108 |
+
msg = json.dumps({'error': 'Invalid uid'})
|
| 109 |
+
else:
|
| 110 |
+
eos_id = instance.eos_id
|
| 111 |
+
msg = json.dumps({'eos_id': eos_id if eos_id is not None else -1})
|
| 112 |
+
else:
|
| 113 |
+
msg = json.dumps({'error': 'Invalid GET endpoint'})
|
| 114 |
+
|
| 115 |
+
print("响应消息:", msg)
|
| 116 |
+
self.wfile.write(msg.encode())
|
| 117 |
+
|
| 118 |
+
def do_POST(self):
|
| 119 |
+
content_length = int(self.headers.get('content-length', 0))
|
| 120 |
+
data = self.rfile.read(content_length).decode()
|
| 121 |
+
print("POST 请求路径:", self.path)
|
| 122 |
+
print("接收到的数据:", data)
|
| 123 |
+
req = json.loads(data)
|
| 124 |
+
|
| 125 |
+
self.send_response(200)
|
| 126 |
+
self.send_header("Content-Type", "application/json")
|
| 127 |
+
self.end_headers()
|
| 128 |
+
|
| 129 |
+
if '/encode' in self.path:
|
| 130 |
+
# 请求数据中必须包含 uid, text, 和可选的 last_reply
|
| 131 |
+
uid = req.get('uid')
|
| 132 |
+
prompt = req.get('text')
|
| 133 |
+
last_reply = req.get('last_reply')
|
| 134 |
+
instance: Tokenizer_Http = tokenizers.get(uid)
|
| 135 |
+
if instance is None:
|
| 136 |
+
msg = json.dumps({'error': 'Invalid uid'})
|
| 137 |
+
else:
|
| 138 |
+
token_ids, diff = instance.encode(prompt, last_reply)
|
| 139 |
+
msg = json.dumps({'token_ids': token_ids, 'diff': diff})
|
| 140 |
+
elif '/decode' in self.path:
|
| 141 |
+
uid = req.get('uid')
|
| 142 |
+
token_ids = req.get('token_ids')
|
| 143 |
+
instance: Tokenizer_Http = tokenizers.get(uid)
|
| 144 |
+
if instance is None:
|
| 145 |
+
msg = json.dumps({'error': 'Invalid uid'})
|
| 146 |
+
else:
|
| 147 |
+
text = instance.decode(token_ids)
|
| 148 |
+
msg = json.dumps({'text': text})
|
| 149 |
+
elif '/reset' in self.path:
|
| 150 |
+
uid = req.get("uid")
|
| 151 |
+
system_prompt = req.get("system_prompt")
|
| 152 |
+
instance: Tokenizer_Http = tokenizers.get(uid)
|
| 153 |
+
if instance is None:
|
| 154 |
+
msg = json.dumps({'error': 'Invalid uid'})
|
| 155 |
+
else:
|
| 156 |
+
if system_prompt is not None:
|
| 157 |
+
print("system_prompt:", system_prompt)
|
| 158 |
+
token_ids = instance.reset(system_prompt)
|
| 159 |
+
msg = json.dumps({'token_ids': token_ids})
|
| 160 |
+
else:
|
| 161 |
+
token_ids = instance.reset()
|
| 162 |
+
msg = json.dumps({'token_ids': token_ids})
|
| 163 |
+
else:
|
| 164 |
+
msg = json.dumps({'error': 'Invalid POST endpoint'})
|
| 165 |
+
|
| 166 |
+
print("响应消息:", msg)
|
| 167 |
+
self.wfile.write(msg.encode())
|
| 168 |
+
|
| 169 |
+
def get_query_param(self, key):
|
| 170 |
+
"""
|
| 171 |
+
辅助函数:从 GET 请求的 URL 中获取查询参数的值
|
| 172 |
+
例如:/bos_id?uid=xxx
|
| 173 |
+
"""
|
| 174 |
+
from urllib.parse import urlparse, parse_qs
|
| 175 |
+
query = urlparse(self.path).query
|
| 176 |
+
params = parse_qs(query)
|
| 177 |
+
values = params.get(key)
|
| 178 |
+
return values[0] if values else None
|
| 179 |
+
|
| 180 |
+
if __name__ == "__main__":
|
| 181 |
+
parser = argparse.ArgumentParser()
|
| 182 |
+
parser.add_argument('--host', type=str, default='0.0.0.0')
|
| 183 |
+
parser.add_argument('--port', type=int, default=12345)
|
| 184 |
+
args = parser.parse_args()
|
| 185 |
+
|
| 186 |
+
host = (args.host, args.port)
|
| 187 |
+
print('Server running at http://%s:%s' % host)
|
| 188 |
+
server = HTTPServer(host, Request)
|
| 189 |
+
server.serve_forever()
|
libaxllm/run_qwen2.5_1.5b_ctx_ax650_api.sh
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
./main_api_ax650 \
|
| 2 |
+
--template_filename_axmodel "./qwen2.5-1.5b-ctx-ax650/qwen2_p128_l%d_together.axmodel" \
|
| 3 |
+
--axmodel_num 28 \
|
| 4 |
+
--url_tokenizer_model "http://0.0.0.0:12345" \
|
| 5 |
+
--filename_post_axmodel "./qwen2.5-1.5b-ctx-ax650/qwen2_post.axmodel" \
|
| 6 |
+
--filename_tokens_embed "./qwen2.5-1.5b-ctx-ax650/model.embed_tokens.weight.bfloat16.bin" \
|
| 7 |
+
--tokens_embed_num 151936 \
|
| 8 |
+
--tokens_embed_size 1536
|
| 9 |
+
#--use_mmap_load_embed 1
|
| 10 |
+
#--live_print 1
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
#--system_prompt "你的名字叫小智(allen),你是一个人畜无害的AI助手。深圳市今天(4月1日)阴天,愚人节,气温在14°C至19°C之间,微风。" \
|
| 14 |
+
#--kvcache_path "./kvcache" \
|
| 15 |
+
|
libaxllm/run_qwen2.5_1.5b_ctx_axcl_aarch64_api.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
./main_api_axcl_aarch64 \
|
| 2 |
+
--system_prompt "你的名字叫小智(allen),你是一个人畜无害的AI助手。深圳市今天(4月1日)阴天,愚人节,气温在14°C至19°C之间,微风。" \
|
| 3 |
+
--template_filename_axmodel "qwen2.5-1.5b-ctx-ax650/qwen2_p128_l%d_together.axmodel" \
|
| 4 |
+
--axmodel_num 28 \
|
| 5 |
+
--url_tokenizer_model "http://127.0.0.1:12345" \
|
| 6 |
+
--filename_post_axmodel "qwen2.5-1.5b-ctx-ax650/qwen2_post.axmodel" \
|
| 7 |
+
--filename_tokens_embed "qwen2.5-1.5b-ctx-ax650/model.embed_tokens.weight.bfloat16.bin" \
|
| 8 |
+
--tokens_embed_num 151936 \
|
| 9 |
+
--tokens_embed_size 1536 \
|
| 10 |
+
--use_mmap_load_embed 1 \
|
| 11 |
+
--devices 0
|
| 12 |
+
|
| 13 |
+
# --kvcache_path "./kvcache" \
|
libaxllm/run_qwen2.5_1.5b_ctx_axcl_x86_api.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
./main_api_axcl_x86 \
|
| 2 |
+
--system_prompt "你的名字叫小智(allen),你是一个人畜无害的AI助手。深圳市今天(4月1日)阴天,愚人节,气温在14°C至19°C之间,微风。" \
|
| 3 |
+
--template_filename_axmodel "qwen2.5-1.5b-ctx-ax650/qwen2_p128_l%d_together.axmodel" \
|
| 4 |
+
--axmodel_num 28 \
|
| 5 |
+
--url_tokenizer_model "http://127.0.0.1:12345" \
|
| 6 |
+
--filename_post_axmodel "qwen2.5-1.5b-ctx-ax650/qwen2_post.axmodel" \
|
| 7 |
+
--filename_tokens_embed "qwen2.5-1.5b-ctx-ax650/model.embed_tokens.weight.bfloat16.bin" \
|
| 8 |
+
--tokens_embed_num 151936 \
|
| 9 |
+
--tokens_embed_size 1536 \
|
| 10 |
+
--use_mmap_load_embed 1 \
|
| 11 |
+
--devices 0
|
| 12 |
+
|
| 13 |
+
# --kvcache_path "./kvcache" \
|
libmelotts/models/decoder-en.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90c93c0fa978cc1c68fbac6a78707dd75b8b9069cb01a1ade6846e2435aa1eb1
|
| 3 |
+
size 44093802
|
libmelotts/models/decoder-zh.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37ea2d8401f18dd371eec50b90bd39dcadf9684aaf3543dace8ce1a9499ef253
|
| 3 |
+
size 44092592
|
libmelotts/models/encoder-en.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cc51185fb81934c7490c5f9ac993fff7efa98ab41c08cd3753c96abcb297582
|
| 3 |
+
size 31488385
|
libmelotts/models/encoder-zh.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2b0a5bc2789faef16b4bfc56ab4905364f8163a59f2db3d071b4a14792bfee5
|
| 3 |
+
size 31397760
|
libmelotts/models/g-en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:094bf0dbe1cd6c9408707209b2b7261b9df2cd5917d310bfac5945a15a31821a
|
| 3 |
+
size 1024
|
libmelotts/models/g-jp.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c01dd0961bbe1effca4ed378d2969d6fbd9b579133b722f6968db5cf4d22281e
|
| 3 |
+
size 1024
|
libmelotts/models/g-zh_mix_en.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c70d897674847882bd35e780aee696ddaff8d04d5c57e4f9cf37611b6821879f
|
| 3 |
+
size 1024
|
libmelotts/models/lexicon.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libmelotts/models/tokens.txt
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_ 0
|
| 2 |
+
AA 1
|
| 3 |
+
E 2
|
| 4 |
+
EE 3
|
| 5 |
+
En 4
|
| 6 |
+
N 5
|
| 7 |
+
OO 6
|
| 8 |
+
V 7
|
| 9 |
+
a 8
|
| 10 |
+
a: 9
|
| 11 |
+
aa 10
|
| 12 |
+
ae 11
|
| 13 |
+
ah 12
|
| 14 |
+
ai 13
|
| 15 |
+
an 14
|
| 16 |
+
ang 15
|
| 17 |
+
ao 16
|
| 18 |
+
aw 17
|
| 19 |
+
ay 18
|
| 20 |
+
b 19
|
| 21 |
+
by 20
|
| 22 |
+
c 21
|
| 23 |
+
ch 22
|
| 24 |
+
d 23
|
| 25 |
+
dh 24
|
| 26 |
+
dy 25
|
| 27 |
+
e 26
|
| 28 |
+
e: 27
|
| 29 |
+
eh 28
|
| 30 |
+
ei 29
|
| 31 |
+
en 30
|
| 32 |
+
eng 31
|
| 33 |
+
er 32
|
| 34 |
+
ey 33
|
| 35 |
+
f 34
|
| 36 |
+
g 35
|
| 37 |
+
gy 36
|
| 38 |
+
h 37
|
| 39 |
+
hh 38
|
| 40 |
+
hy 39
|
| 41 |
+
i 40
|
| 42 |
+
i0 41
|
| 43 |
+
i: 42
|
| 44 |
+
ia 43
|
| 45 |
+
ian 44
|
| 46 |
+
iang 45
|
| 47 |
+
iao 46
|
| 48 |
+
ie 47
|
| 49 |
+
ih 48
|
| 50 |
+
in 49
|
| 51 |
+
ing 50
|
| 52 |
+
iong 51
|
| 53 |
+
ir 52
|
| 54 |
+
iu 53
|
| 55 |
+
iy 54
|
| 56 |
+
j 55
|
| 57 |
+
jh 56
|
| 58 |
+
k 57
|
| 59 |
+
ky 58
|
| 60 |
+
l 59
|
| 61 |
+
m 60
|
| 62 |
+
my 61
|
| 63 |
+
n 62
|
| 64 |
+
ng 63
|
| 65 |
+
ny 64
|
| 66 |
+
o 65
|
| 67 |
+
o: 66
|
| 68 |
+
ong 67
|
| 69 |
+
ou 68
|
| 70 |
+
ow 69
|
| 71 |
+
oy 70
|
| 72 |
+
p 71
|
| 73 |
+
py 72
|
| 74 |
+
q 73
|
| 75 |
+
r 74
|
| 76 |
+
ry 75
|
| 77 |
+
s 76
|
| 78 |
+
sh 77
|
| 79 |
+
t 78
|
| 80 |
+
th 79
|
| 81 |
+
ts 80
|
| 82 |
+
ty 81
|
| 83 |
+
u 82
|
| 84 |
+
u: 83
|
| 85 |
+
ua 84
|
| 86 |
+
uai 85
|
| 87 |
+
uan 86
|
| 88 |
+
uang 87
|
| 89 |
+
uh 88
|
| 90 |
+
ui 89
|
| 91 |
+
un 90
|
| 92 |
+
uo 91
|
| 93 |
+
uw 92
|
| 94 |
+
v 93
|
| 95 |
+
van 94
|
| 96 |
+
ve 95
|
| 97 |
+
vn 96
|
| 98 |
+
w 97
|
| 99 |
+
x 98
|
| 100 |
+
y 99
|
| 101 |
+
z 100
|
| 102 |
+
zh 101
|
| 103 |
+
zy 102
|
| 104 |
+
! 103
|
| 105 |
+
? 104
|
| 106 |
+
… 105
|
| 107 |
+
, 106
|
| 108 |
+
. 107
|
| 109 |
+
' 108
|
| 110 |
+
- 109
|
| 111 |
+
SP 110
|
| 112 |
+
UNK 111
|
libmelotts/python/split_utils.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import os
|
| 3 |
+
import glob
|
| 4 |
+
import numpy as np
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
import re
|
| 7 |
+
|
| 8 |
+
def split_sentence(text, min_len=10, language_str='EN'):
|
| 9 |
+
if language_str in ['EN', 'FR', 'ES', 'SP']:
|
| 10 |
+
sentences = split_sentences_latin(text, min_len=min_len)
|
| 11 |
+
else:
|
| 12 |
+
sentences = split_sentences_zh(text, min_len=min_len)
|
| 13 |
+
return sentences
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def split_sentences_latin(text, min_len=10):
|
| 17 |
+
text = re.sub('[。!?;]', '.', text)
|
| 18 |
+
text = re.sub('[,]', ',', text)
|
| 19 |
+
text = re.sub('[“”]', '"', text)
|
| 20 |
+
text = re.sub('[‘’]', "'", text)
|
| 21 |
+
text = re.sub(r"[\<\>\(\)\[\]\"\«\»]+", "", text)
|
| 22 |
+
return [item.strip() for item in txtsplit(text, 256, 512) if item.strip()]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def split_sentences_zh(text, min_len=10):
|
| 26 |
+
text = re.sub('[。!?;]', '.', text)
|
| 27 |
+
text = re.sub('[,]', ',', text)
|
| 28 |
+
# 将文本中的换行符、空格和制表符替换为空格
|
| 29 |
+
text = re.sub('[\n\t ]+', ' ', text)
|
| 30 |
+
# 在标点符号后添加一个空格
|
| 31 |
+
text = re.sub('([,.!?;])', r'\1 $#!', text)
|
| 32 |
+
# 分隔句子并去除前后空格
|
| 33 |
+
# sentences = [s.strip() for s in re.split('(。|!|?|;)', text)]
|
| 34 |
+
sentences = [s.strip() for s in text.split('$#!')]
|
| 35 |
+
if len(sentences[-1]) == 0: del sentences[-1]
|
| 36 |
+
|
| 37 |
+
new_sentences = []
|
| 38 |
+
new_sent = []
|
| 39 |
+
count_len = 0
|
| 40 |
+
for ind, sent in enumerate(sentences):
|
| 41 |
+
new_sent.append(sent)
|
| 42 |
+
count_len += len(sent)
|
| 43 |
+
if count_len > min_len or ind == len(sentences) - 1:
|
| 44 |
+
count_len = 0
|
| 45 |
+
new_sentences.append(' '.join(new_sent))
|
| 46 |
+
new_sent = []
|
| 47 |
+
return merge_short_sentences_zh(new_sentences)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def merge_short_sentences_en(sens):
|
| 51 |
+
"""Avoid short sentences by merging them with the following sentence.
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
List[str]: list of input sentences.
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
List[str]: list of output sentences.
|
| 58 |
+
"""
|
| 59 |
+
sens_out = []
|
| 60 |
+
for s in sens:
|
| 61 |
+
# If the previous sentense is too short, merge them with
|
| 62 |
+
# the current sentence.
|
| 63 |
+
if len(sens_out) > 0 and len(sens_out[-1].split(" ")) <= 2:
|
| 64 |
+
sens_out[-1] = sens_out[-1] + " " + s
|
| 65 |
+
else:
|
| 66 |
+
sens_out.append(s)
|
| 67 |
+
try:
|
| 68 |
+
if len(sens_out[-1].split(" ")) <= 2:
|
| 69 |
+
sens_out[-2] = sens_out[-2] + " " + sens_out[-1]
|
| 70 |
+
sens_out.pop(-1)
|
| 71 |
+
except:
|
| 72 |
+
pass
|
| 73 |
+
return sens_out
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def merge_short_sentences_zh(sens):
|
| 77 |
+
# return sens
|
| 78 |
+
"""Avoid short sentences by merging them with the following sentence.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
List[str]: list of input sentences.
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
List[str]: list of output sentences.
|
| 85 |
+
"""
|
| 86 |
+
sens_out = []
|
| 87 |
+
for s in sens:
|
| 88 |
+
# If the previous sentense is too short, merge them with
|
| 89 |
+
# the current sentence.
|
| 90 |
+
if len(sens_out) > 0 and len(sens_out[-1]) <= 2:
|
| 91 |
+
sens_out[-1] = sens_out[-1] + " " + s
|
| 92 |
+
else:
|
| 93 |
+
sens_out.append(s)
|
| 94 |
+
try:
|
| 95 |
+
if len(sens_out[-1]) <= 2:
|
| 96 |
+
sens_out[-2] = sens_out[-2] + " " + sens_out[-1]
|
| 97 |
+
sens_out.pop(-1)
|
| 98 |
+
except:
|
| 99 |
+
pass
|
| 100 |
+
return sens_out
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def txtsplit(text, desired_length=100, max_length=200):
|
| 105 |
+
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
| 106 |
+
text = re.sub(r'\n\n+', '\n', text)
|
| 107 |
+
text = re.sub(r'\s+', ' ', text)
|
| 108 |
+
text = re.sub(r'[""]', '"', text)
|
| 109 |
+
text = re.sub(r'([,.?!])', r'\1 ', text)
|
| 110 |
+
text = re.sub(r'\s+', ' ', text)
|
| 111 |
+
|
| 112 |
+
rv = []
|
| 113 |
+
in_quote = False
|
| 114 |
+
current = ""
|
| 115 |
+
split_pos = []
|
| 116 |
+
pos = -1
|
| 117 |
+
end_pos = len(text) - 1
|
| 118 |
+
def seek(delta):
|
| 119 |
+
nonlocal pos, in_quote, current
|
| 120 |
+
is_neg = delta < 0
|
| 121 |
+
for _ in range(abs(delta)):
|
| 122 |
+
if is_neg:
|
| 123 |
+
pos -= 1
|
| 124 |
+
current = current[:-1]
|
| 125 |
+
else:
|
| 126 |
+
pos += 1
|
| 127 |
+
current += text[pos]
|
| 128 |
+
if text[pos] == '"':
|
| 129 |
+
in_quote = not in_quote
|
| 130 |
+
return text[pos]
|
| 131 |
+
def peek(delta):
|
| 132 |
+
p = pos + delta
|
| 133 |
+
return text[p] if p < end_pos and p >= 0 else ""
|
| 134 |
+
def commit():
|
| 135 |
+
nonlocal rv, current, split_pos
|
| 136 |
+
rv.append(current)
|
| 137 |
+
current = ""
|
| 138 |
+
split_pos = []
|
| 139 |
+
while pos < end_pos:
|
| 140 |
+
c = seek(1)
|
| 141 |
+
if len(current) >= max_length:
|
| 142 |
+
if len(split_pos) > 0 and len(current) > (desired_length / 2):
|
| 143 |
+
d = pos - split_pos[-1]
|
| 144 |
+
seek(-d)
|
| 145 |
+
else:
|
| 146 |
+
while c not in '!?.\n ' and pos > 0 and len(current) > desired_length:
|
| 147 |
+
c = seek(-1)
|
| 148 |
+
commit()
|
| 149 |
+
elif not in_quote and (c in '!?\n' or (c in '.,' and peek(1) in '\n ')):
|
| 150 |
+
while pos < len(text) - 1 and len(current) < max_length and peek(1) in '!?.':
|
| 151 |
+
c = seek(1)
|
| 152 |
+
split_pos.append(pos)
|
| 153 |
+
if len(current) >= desired_length:
|
| 154 |
+
commit()
|
| 155 |
+
elif in_quote and peek(1) == '"' and peek(2) in '\n ':
|
| 156 |
+
seek(2)
|
| 157 |
+
split_pos.append(pos)
|
| 158 |
+
rv.append(current)
|
| 159 |
+
rv = [s.strip() for s in rv]
|
| 160 |
+
rv = [s for s in rv if len(s) > 0 and not re.match(r'^[\s\.,;:!?]*$', s)]
|
| 161 |
+
return rv
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
if __name__ == '__main__':
|
| 165 |
+
zh_text = "好的,我来给你讲一个故事吧。从前有一个小姑娘,她叫做小红。小红非常喜欢在森林里玩耍,她经常会和她的小伙伴们一起去探险。有一天,小红和她的小伙伴们走到了森林深处,突然遇到了一只凶猛的野兽。小红的小伙伴们都吓得不敢动弹,但是小红并没有被吓倒,她勇敢地走向野兽,用她的智慧和勇气成功地制服了野兽,保护了她的小伙伴们。从那以后,小红变得更加勇敢和自信,成为了她小伙伴们心中的英雄。"
|
| 166 |
+
en_text = "I didn’t know what to do. I said please kill her because it would be better than being kidnapped,” Ben, whose surname CNN is not using for security concerns, said on Wednesday. “It’s a nightmare. I said ‘please kill her, don’t take her there.’"
|
| 167 |
+
sp_text = "¡Claro! ¿En qué tema te gustaría que te hable en español? Puedo proporcionarte información o conversar contigo sobre una amplia variedad de temas, desde cultura y comida hasta viajes y tecnología. ¿Tienes alguna preferencia en particular?"
|
| 168 |
+
fr_text = "Bien sûr ! En quelle matière voudriez-vous que je vous parle en français ? Je peux vous fournir des informations ou discuter avec vous sur une grande variété de sujets, que ce soit la culture, la nourriture, les voyages ou la technologie. Avez-vous une préférence particulière ?"
|
| 169 |
+
|
| 170 |
+
print(split_sentence(zh_text, language_str='ZH'))
|
| 171 |
+
print(split_sentence(en_text, language_str='EN'))
|
| 172 |
+
print(split_sentence(sp_text, language_str='SP'))
|
| 173 |
+
print(split_sentence(fr_text, language_str='FR'))
|
libmelotts/python/symbols.py
ADDED
|
@@ -0,0 +1,1237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
zh_mix_en_symbols = [
|
| 3 |
+
"_",
|
| 4 |
+
"AA",
|
| 5 |
+
"E",
|
| 6 |
+
"EE",
|
| 7 |
+
"En",
|
| 8 |
+
"N",
|
| 9 |
+
"OO",
|
| 10 |
+
"V",
|
| 11 |
+
"a",
|
| 12 |
+
"a:",
|
| 13 |
+
"aa",
|
| 14 |
+
"ae",
|
| 15 |
+
"ah",
|
| 16 |
+
"ai",
|
| 17 |
+
"an",
|
| 18 |
+
"ang",
|
| 19 |
+
"ao",
|
| 20 |
+
"aw",
|
| 21 |
+
"ay",
|
| 22 |
+
"b",
|
| 23 |
+
"by",
|
| 24 |
+
"c",
|
| 25 |
+
"ch",
|
| 26 |
+
"d",
|
| 27 |
+
"dh",
|
| 28 |
+
"dy",
|
| 29 |
+
"e",
|
| 30 |
+
"e:",
|
| 31 |
+
"eh",
|
| 32 |
+
"ei",
|
| 33 |
+
"en",
|
| 34 |
+
"eng",
|
| 35 |
+
"er",
|
| 36 |
+
"ey",
|
| 37 |
+
"f",
|
| 38 |
+
"g",
|
| 39 |
+
"gy",
|
| 40 |
+
"h",
|
| 41 |
+
"hh",
|
| 42 |
+
"hy",
|
| 43 |
+
"i",
|
| 44 |
+
"i0",
|
| 45 |
+
"i:",
|
| 46 |
+
"ia",
|
| 47 |
+
"ian",
|
| 48 |
+
"iang",
|
| 49 |
+
"iao",
|
| 50 |
+
"ie",
|
| 51 |
+
"ih",
|
| 52 |
+
"in",
|
| 53 |
+
"ing",
|
| 54 |
+
"iong",
|
| 55 |
+
"ir",
|
| 56 |
+
"iu",
|
| 57 |
+
"iy",
|
| 58 |
+
"j",
|
| 59 |
+
"jh",
|
| 60 |
+
"k",
|
| 61 |
+
"ky",
|
| 62 |
+
"l",
|
| 63 |
+
"m",
|
| 64 |
+
"my",
|
| 65 |
+
"n",
|
| 66 |
+
"ng",
|
| 67 |
+
"ny",
|
| 68 |
+
"o",
|
| 69 |
+
"o:",
|
| 70 |
+
"ong",
|
| 71 |
+
"ou",
|
| 72 |
+
"ow",
|
| 73 |
+
"oy",
|
| 74 |
+
"p",
|
| 75 |
+
"py",
|
| 76 |
+
"q",
|
| 77 |
+
"r",
|
| 78 |
+
"ry",
|
| 79 |
+
"s",
|
| 80 |
+
"sh",
|
| 81 |
+
"t",
|
| 82 |
+
"th",
|
| 83 |
+
"ts",
|
| 84 |
+
"ty",
|
| 85 |
+
"u",
|
| 86 |
+
"u:",
|
| 87 |
+
"ua",
|
| 88 |
+
"uai",
|
| 89 |
+
"uan",
|
| 90 |
+
"uang",
|
| 91 |
+
"uh",
|
| 92 |
+
"ui",
|
| 93 |
+
"un",
|
| 94 |
+
"uo",
|
| 95 |
+
"uw",
|
| 96 |
+
"v",
|
| 97 |
+
"van",
|
| 98 |
+
"ve",
|
| 99 |
+
"vn",
|
| 100 |
+
"w",
|
| 101 |
+
"x",
|
| 102 |
+
"y",
|
| 103 |
+
"z",
|
| 104 |
+
"zh",
|
| 105 |
+
"zy",
|
| 106 |
+
"!",
|
| 107 |
+
"?",
|
| 108 |
+
"…",
|
| 109 |
+
",",
|
| 110 |
+
".",
|
| 111 |
+
"'",
|
| 112 |
+
"-",
|
| 113 |
+
"SP",
|
| 114 |
+
"UNK"
|
| 115 |
+
]
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
jp_symbols = [
|
| 119 |
+
"_",
|
| 120 |
+
"\"",
|
| 121 |
+
"(",
|
| 122 |
+
")",
|
| 123 |
+
"*",
|
| 124 |
+
"/",
|
| 125 |
+
":",
|
| 126 |
+
"AA",
|
| 127 |
+
"E",
|
| 128 |
+
"EE",
|
| 129 |
+
"En",
|
| 130 |
+
"N",
|
| 131 |
+
"OO",
|
| 132 |
+
"Q",
|
| 133 |
+
"V",
|
| 134 |
+
"[",
|
| 135 |
+
"\\",
|
| 136 |
+
"]",
|
| 137 |
+
"^",
|
| 138 |
+
"a",
|
| 139 |
+
"a:",
|
| 140 |
+
"aa",
|
| 141 |
+
"ae",
|
| 142 |
+
"ah",
|
| 143 |
+
"ai",
|
| 144 |
+
"an",
|
| 145 |
+
"ang",
|
| 146 |
+
"ao",
|
| 147 |
+
"aw",
|
| 148 |
+
"ay",
|
| 149 |
+
"b",
|
| 150 |
+
"by",
|
| 151 |
+
"c",
|
| 152 |
+
"ch",
|
| 153 |
+
"d",
|
| 154 |
+
"dh",
|
| 155 |
+
"dy",
|
| 156 |
+
"e",
|
| 157 |
+
"e:",
|
| 158 |
+
"eh",
|
| 159 |
+
"ei",
|
| 160 |
+
"en",
|
| 161 |
+
"eng",
|
| 162 |
+
"er",
|
| 163 |
+
"ey",
|
| 164 |
+
"f",
|
| 165 |
+
"g",
|
| 166 |
+
"gy",
|
| 167 |
+
"h",
|
| 168 |
+
"hh",
|
| 169 |
+
"hy",
|
| 170 |
+
"i",
|
| 171 |
+
"i0",
|
| 172 |
+
"i:",
|
| 173 |
+
"ia",
|
| 174 |
+
"ian",
|
| 175 |
+
"iang",
|
| 176 |
+
"iao",
|
| 177 |
+
"ie",
|
| 178 |
+
"ih",
|
| 179 |
+
"in",
|
| 180 |
+
"ing",
|
| 181 |
+
"iong",
|
| 182 |
+
"ir",
|
| 183 |
+
"iu",
|
| 184 |
+
"iy",
|
| 185 |
+
"j",
|
| 186 |
+
"jh",
|
| 187 |
+
"k",
|
| 188 |
+
"ky",
|
| 189 |
+
"l",
|
| 190 |
+
"m",
|
| 191 |
+
"my",
|
| 192 |
+
"n",
|
| 193 |
+
"ng",
|
| 194 |
+
"ny",
|
| 195 |
+
"o",
|
| 196 |
+
"o:",
|
| 197 |
+
"ong",
|
| 198 |
+
"ou",
|
| 199 |
+
"ow",
|
| 200 |
+
"oy",
|
| 201 |
+
"p",
|
| 202 |
+
"py",
|
| 203 |
+
"q",
|
| 204 |
+
"r",
|
| 205 |
+
"ry",
|
| 206 |
+
"s",
|
| 207 |
+
"sh",
|
| 208 |
+
"t",
|
| 209 |
+
"th",
|
| 210 |
+
"ts",
|
| 211 |
+
"ty",
|
| 212 |
+
"u",
|
| 213 |
+
"u:",
|
| 214 |
+
"ua",
|
| 215 |
+
"uai",
|
| 216 |
+
"uan",
|
| 217 |
+
"uang",
|
| 218 |
+
"uh",
|
| 219 |
+
"ui",
|
| 220 |
+
"un",
|
| 221 |
+
"uo",
|
| 222 |
+
"uw",
|
| 223 |
+
"v",
|
| 224 |
+
"van",
|
| 225 |
+
"ve",
|
| 226 |
+
"vn",
|
| 227 |
+
"w",
|
| 228 |
+
"x",
|
| 229 |
+
"y",
|
| 230 |
+
"z",
|
| 231 |
+
"zh",
|
| 232 |
+
"zy",
|
| 233 |
+
"~",
|
| 234 |
+
"æ",
|
| 235 |
+
"ç",
|
| 236 |
+
"ð",
|
| 237 |
+
"ø",
|
| 238 |
+
"ŋ",
|
| 239 |
+
"œ",
|
| 240 |
+
"ɐ",
|
| 241 |
+
"ɑ",
|
| 242 |
+
"ɒ",
|
| 243 |
+
"ɔ",
|
| 244 |
+
"ɕ",
|
| 245 |
+
"ə",
|
| 246 |
+
"ɛ",
|
| 247 |
+
"ɜ",
|
| 248 |
+
"ɡ",
|
| 249 |
+
"ɣ",
|
| 250 |
+
"ɥ",
|
| 251 |
+
"ɦ",
|
| 252 |
+
"ɪ",
|
| 253 |
+
"ɫ",
|
| 254 |
+
"ɬ",
|
| 255 |
+
"ɭ",
|
| 256 |
+
"ɯ",
|
| 257 |
+
"ɲ",
|
| 258 |
+
"ɵ",
|
| 259 |
+
"ɸ",
|
| 260 |
+
"ɹ",
|
| 261 |
+
"ɾ",
|
| 262 |
+
"ʁ",
|
| 263 |
+
"ʃ",
|
| 264 |
+
"ʊ",
|
| 265 |
+
"ʌ",
|
| 266 |
+
"ʎ",
|
| 267 |
+
"ʏ",
|
| 268 |
+
"ʑ",
|
| 269 |
+
"ʒ",
|
| 270 |
+
"ʝ",
|
| 271 |
+
"ʲ",
|
| 272 |
+
"ˈ",
|
| 273 |
+
"ˌ",
|
| 274 |
+
"ː",
|
| 275 |
+
"̃",
|
| 276 |
+
"̩",
|
| 277 |
+
"β",
|
| 278 |
+
"θ",
|
| 279 |
+
"ᄀ",
|
| 280 |
+
"ᄁ",
|
| 281 |
+
"ᄂ",
|
| 282 |
+
"ᄃ",
|
| 283 |
+
"ᄄ",
|
| 284 |
+
"ᄅ",
|
| 285 |
+
"ᄆ",
|
| 286 |
+
"ᄇ",
|
| 287 |
+
"ᄈ",
|
| 288 |
+
"ᄉ",
|
| 289 |
+
"ᄊ",
|
| 290 |
+
"ᄋ",
|
| 291 |
+
"ᄌ",
|
| 292 |
+
"ᄍ",
|
| 293 |
+
"ᄎ",
|
| 294 |
+
"ᄏ",
|
| 295 |
+
"ᄐ",
|
| 296 |
+
"ᄑ",
|
| 297 |
+
"ᄒ",
|
| 298 |
+
"ᅡ",
|
| 299 |
+
"ᅢ",
|
| 300 |
+
"ᅣ",
|
| 301 |
+
"ᅤ",
|
| 302 |
+
"ᅥ",
|
| 303 |
+
"ᅦ",
|
| 304 |
+
"ᅧ",
|
| 305 |
+
"ᅨ",
|
| 306 |
+
"ᅩ",
|
| 307 |
+
"ᅪ",
|
| 308 |
+
"ᅫ",
|
| 309 |
+
"ᅬ",
|
| 310 |
+
"ᅭ",
|
| 311 |
+
"ᅮ",
|
| 312 |
+
"ᅯ",
|
| 313 |
+
"ᅰ",
|
| 314 |
+
"ᅱ",
|
| 315 |
+
"ᅲ",
|
| 316 |
+
"ᅳ",
|
| 317 |
+
"ᅴ",
|
| 318 |
+
"ᅵ",
|
| 319 |
+
"ᆨ",
|
| 320 |
+
"ᆫ",
|
| 321 |
+
"ᆮ",
|
| 322 |
+
"ᆯ",
|
| 323 |
+
"ᆷ",
|
| 324 |
+
"ᆸ",
|
| 325 |
+
"ᆼ",
|
| 326 |
+
"ㄸ",
|
| 327 |
+
"!",
|
| 328 |
+
"?",
|
| 329 |
+
"…",
|
| 330 |
+
",",
|
| 331 |
+
".",
|
| 332 |
+
"'",
|
| 333 |
+
"-",
|
| 334 |
+
"¿",
|
| 335 |
+
"¡",
|
| 336 |
+
"SP",
|
| 337 |
+
"UNK"
|
| 338 |
+
]
|
| 339 |
+
|
| 340 |
+
en_symbols = [
|
| 341 |
+
"_",
|
| 342 |
+
"\"",
|
| 343 |
+
"(",
|
| 344 |
+
")",
|
| 345 |
+
"*",
|
| 346 |
+
"/",
|
| 347 |
+
":",
|
| 348 |
+
"AA",
|
| 349 |
+
"E",
|
| 350 |
+
"EE",
|
| 351 |
+
"En",
|
| 352 |
+
"N",
|
| 353 |
+
"OO",
|
| 354 |
+
"Q",
|
| 355 |
+
"V",
|
| 356 |
+
"[",
|
| 357 |
+
"\\",
|
| 358 |
+
"]",
|
| 359 |
+
"^",
|
| 360 |
+
"a",
|
| 361 |
+
"a:",
|
| 362 |
+
"aa",
|
| 363 |
+
"ae",
|
| 364 |
+
"ah",
|
| 365 |
+
"ai",
|
| 366 |
+
"an",
|
| 367 |
+
"ang",
|
| 368 |
+
"ao",
|
| 369 |
+
"aw",
|
| 370 |
+
"ay",
|
| 371 |
+
"b",
|
| 372 |
+
"by",
|
| 373 |
+
"c",
|
| 374 |
+
"ch",
|
| 375 |
+
"d",
|
| 376 |
+
"dh",
|
| 377 |
+
"dy",
|
| 378 |
+
"e",
|
| 379 |
+
"e:",
|
| 380 |
+
"eh",
|
| 381 |
+
"ei",
|
| 382 |
+
"en",
|
| 383 |
+
"eng",
|
| 384 |
+
"er",
|
| 385 |
+
"ey",
|
| 386 |
+
"f",
|
| 387 |
+
"g",
|
| 388 |
+
"gy",
|
| 389 |
+
"h",
|
| 390 |
+
"hh",
|
| 391 |
+
"hy",
|
| 392 |
+
"i",
|
| 393 |
+
"i0",
|
| 394 |
+
"i:",
|
| 395 |
+
"ia",
|
| 396 |
+
"ian",
|
| 397 |
+
"iang",
|
| 398 |
+
"iao",
|
| 399 |
+
"ie",
|
| 400 |
+
"ih",
|
| 401 |
+
"in",
|
| 402 |
+
"ing",
|
| 403 |
+
"iong",
|
| 404 |
+
"ir",
|
| 405 |
+
"iu",
|
| 406 |
+
"iy",
|
| 407 |
+
"j",
|
| 408 |
+
"jh",
|
| 409 |
+
"k",
|
| 410 |
+
"ky",
|
| 411 |
+
"l",
|
| 412 |
+
"m",
|
| 413 |
+
"my",
|
| 414 |
+
"n",
|
| 415 |
+
"ng",
|
| 416 |
+
"ny",
|
| 417 |
+
"o",
|
| 418 |
+
"o:",
|
| 419 |
+
"ong",
|
| 420 |
+
"ou",
|
| 421 |
+
"ow",
|
| 422 |
+
"oy",
|
| 423 |
+
"p",
|
| 424 |
+
"py",
|
| 425 |
+
"q",
|
| 426 |
+
"r",
|
| 427 |
+
"ry",
|
| 428 |
+
"s",
|
| 429 |
+
"sh",
|
| 430 |
+
"t",
|
| 431 |
+
"th",
|
| 432 |
+
"ts",
|
| 433 |
+
"ty",
|
| 434 |
+
"u",
|
| 435 |
+
"u:",
|
| 436 |
+
"ua",
|
| 437 |
+
"uai",
|
| 438 |
+
"uan",
|
| 439 |
+
"uang",
|
| 440 |
+
"uh",
|
| 441 |
+
"ui",
|
| 442 |
+
"un",
|
| 443 |
+
"uo",
|
| 444 |
+
"uw",
|
| 445 |
+
"v",
|
| 446 |
+
"van",
|
| 447 |
+
"ve",
|
| 448 |
+
"vn",
|
| 449 |
+
"w",
|
| 450 |
+
"x",
|
| 451 |
+
"y",
|
| 452 |
+
"z",
|
| 453 |
+
"zh",
|
| 454 |
+
"zy",
|
| 455 |
+
"~",
|
| 456 |
+
"¡",
|
| 457 |
+
"¿",
|
| 458 |
+
"æ",
|
| 459 |
+
"ç",
|
| 460 |
+
"ð",
|
| 461 |
+
"ø",
|
| 462 |
+
"ŋ",
|
| 463 |
+
"œ",
|
| 464 |
+
"ɐ",
|
| 465 |
+
"ɑ",
|
| 466 |
+
"ɒ",
|
| 467 |
+
"ɔ",
|
| 468 |
+
"ɕ",
|
| 469 |
+
"ə",
|
| 470 |
+
"ɛ",
|
| 471 |
+
"ɜ",
|
| 472 |
+
"ɡ",
|
| 473 |
+
"ɣ",
|
| 474 |
+
"ɥ",
|
| 475 |
+
"ɦ",
|
| 476 |
+
"ɪ",
|
| 477 |
+
"ɫ",
|
| 478 |
+
"ɬ",
|
| 479 |
+
"ɭ",
|
| 480 |
+
"ɯ",
|
| 481 |
+
"ɲ",
|
| 482 |
+
"ɵ",
|
| 483 |
+
"ɸ",
|
| 484 |
+
"ɹ",
|
| 485 |
+
"ɾ",
|
| 486 |
+
"ʁ",
|
| 487 |
+
"ʃ",
|
| 488 |
+
"ʊ",
|
| 489 |
+
"ʌ",
|
| 490 |
+
"ʎ",
|
| 491 |
+
"ʏ",
|
| 492 |
+
"ʑ",
|
| 493 |
+
"ʒ",
|
| 494 |
+
"ʝ",
|
| 495 |
+
"ʲ",
|
| 496 |
+
"ˈ",
|
| 497 |
+
"ˌ",
|
| 498 |
+
"ː",
|
| 499 |
+
"̃",
|
| 500 |
+
"̩",
|
| 501 |
+
"β",
|
| 502 |
+
"θ",
|
| 503 |
+
"ᄀ",
|
| 504 |
+
"ᄁ",
|
| 505 |
+
"ᄂ",
|
| 506 |
+
"ᄃ",
|
| 507 |
+
"ᄄ",
|
| 508 |
+
"ᄅ",
|
| 509 |
+
"ᄆ",
|
| 510 |
+
"ᄇ",
|
| 511 |
+
"ᄈ",
|
| 512 |
+
"ᄉ",
|
| 513 |
+
"ᄊ",
|
| 514 |
+
"ᄋ",
|
| 515 |
+
"ᄌ",
|
| 516 |
+
"ᄍ",
|
| 517 |
+
"ᄎ",
|
| 518 |
+
"ᄏ",
|
| 519 |
+
"ᄐ",
|
| 520 |
+
"ᄑ",
|
| 521 |
+
"ᄒ",
|
| 522 |
+
"ᅡ",
|
| 523 |
+
"ᅢ",
|
| 524 |
+
"ᅣ",
|
| 525 |
+
"ᅤ",
|
| 526 |
+
"ᅥ",
|
| 527 |
+
"ᅦ",
|
| 528 |
+
"ᅧ",
|
| 529 |
+
"ᅨ",
|
| 530 |
+
"ᅩ",
|
| 531 |
+
"ᅪ",
|
| 532 |
+
"ᅫ",
|
| 533 |
+
"ᅬ",
|
| 534 |
+
"ᅭ",
|
| 535 |
+
"ᅮ",
|
| 536 |
+
"ᅯ",
|
| 537 |
+
"ᅰ",
|
| 538 |
+
"ᅱ",
|
| 539 |
+
"ᅲ",
|
| 540 |
+
"ᅳ",
|
| 541 |
+
"ᅴ",
|
| 542 |
+
"ᅵ",
|
| 543 |
+
"ᆨ",
|
| 544 |
+
"ᆫ",
|
| 545 |
+
"ᆮ",
|
| 546 |
+
"ᆯ",
|
| 547 |
+
"ᆷ",
|
| 548 |
+
"ᆸ",
|
| 549 |
+
"ᆼ",
|
| 550 |
+
"ㄸ",
|
| 551 |
+
"!",
|
| 552 |
+
"?",
|
| 553 |
+
"…",
|
| 554 |
+
",",
|
| 555 |
+
".",
|
| 556 |
+
"'",
|
| 557 |
+
"-",
|
| 558 |
+
"SP",
|
| 559 |
+
"UNK"
|
| 560 |
+
]
|
| 561 |
+
|
| 562 |
+
kr_symbols = [
|
| 563 |
+
"_",
|
| 564 |
+
"\"",
|
| 565 |
+
"(",
|
| 566 |
+
")",
|
| 567 |
+
"*",
|
| 568 |
+
"/",
|
| 569 |
+
":",
|
| 570 |
+
"AA",
|
| 571 |
+
"E",
|
| 572 |
+
"EE",
|
| 573 |
+
"En",
|
| 574 |
+
"N",
|
| 575 |
+
"OO",
|
| 576 |
+
"Q",
|
| 577 |
+
"V",
|
| 578 |
+
"[",
|
| 579 |
+
"\\",
|
| 580 |
+
"]",
|
| 581 |
+
"^",
|
| 582 |
+
"a",
|
| 583 |
+
"a:",
|
| 584 |
+
"aa",
|
| 585 |
+
"ae",
|
| 586 |
+
"ah",
|
| 587 |
+
"ai",
|
| 588 |
+
"an",
|
| 589 |
+
"ang",
|
| 590 |
+
"ao",
|
| 591 |
+
"aw",
|
| 592 |
+
"ay",
|
| 593 |
+
"b",
|
| 594 |
+
"by",
|
| 595 |
+
"c",
|
| 596 |
+
"ch",
|
| 597 |
+
"d",
|
| 598 |
+
"dh",
|
| 599 |
+
"dy",
|
| 600 |
+
"e",
|
| 601 |
+
"e:",
|
| 602 |
+
"eh",
|
| 603 |
+
"ei",
|
| 604 |
+
"en",
|
| 605 |
+
"eng",
|
| 606 |
+
"er",
|
| 607 |
+
"ey",
|
| 608 |
+
"f",
|
| 609 |
+
"g",
|
| 610 |
+
"gy",
|
| 611 |
+
"h",
|
| 612 |
+
"hh",
|
| 613 |
+
"hy",
|
| 614 |
+
"i",
|
| 615 |
+
"i0",
|
| 616 |
+
"i:",
|
| 617 |
+
"ia",
|
| 618 |
+
"ian",
|
| 619 |
+
"iang",
|
| 620 |
+
"iao",
|
| 621 |
+
"ie",
|
| 622 |
+
"ih",
|
| 623 |
+
"in",
|
| 624 |
+
"ing",
|
| 625 |
+
"iong",
|
| 626 |
+
"ir",
|
| 627 |
+
"iu",
|
| 628 |
+
"iy",
|
| 629 |
+
"j",
|
| 630 |
+
"jh",
|
| 631 |
+
"k",
|
| 632 |
+
"ky",
|
| 633 |
+
"l",
|
| 634 |
+
"m",
|
| 635 |
+
"my",
|
| 636 |
+
"n",
|
| 637 |
+
"ng",
|
| 638 |
+
"ny",
|
| 639 |
+
"o",
|
| 640 |
+
"o:",
|
| 641 |
+
"ong",
|
| 642 |
+
"ou",
|
| 643 |
+
"ow",
|
| 644 |
+
"oy",
|
| 645 |
+
"p",
|
| 646 |
+
"py",
|
| 647 |
+
"q",
|
| 648 |
+
"r",
|
| 649 |
+
"ry",
|
| 650 |
+
"s",
|
| 651 |
+
"sh",
|
| 652 |
+
"t",
|
| 653 |
+
"th",
|
| 654 |
+
"ts",
|
| 655 |
+
"ty",
|
| 656 |
+
"u",
|
| 657 |
+
"u:",
|
| 658 |
+
"ua",
|
| 659 |
+
"uai",
|
| 660 |
+
"uan",
|
| 661 |
+
"uang",
|
| 662 |
+
"uh",
|
| 663 |
+
"ui",
|
| 664 |
+
"un",
|
| 665 |
+
"uo",
|
| 666 |
+
"uw",
|
| 667 |
+
"v",
|
| 668 |
+
"van",
|
| 669 |
+
"ve",
|
| 670 |
+
"vn",
|
| 671 |
+
"w",
|
| 672 |
+
"x",
|
| 673 |
+
"y",
|
| 674 |
+
"z",
|
| 675 |
+
"zh",
|
| 676 |
+
"zy",
|
| 677 |
+
"~",
|
| 678 |
+
"¡",
|
| 679 |
+
"¿",
|
| 680 |
+
"æ",
|
| 681 |
+
"ç",
|
| 682 |
+
"ð",
|
| 683 |
+
"ø",
|
| 684 |
+
"ŋ",
|
| 685 |
+
"œ",
|
| 686 |
+
"ɐ",
|
| 687 |
+
"ɑ",
|
| 688 |
+
"ɒ",
|
| 689 |
+
"ɔ",
|
| 690 |
+
"ɕ",
|
| 691 |
+
"ə",
|
| 692 |
+
"ɛ",
|
| 693 |
+
"ɜ",
|
| 694 |
+
"ɡ",
|
| 695 |
+
"ɣ",
|
| 696 |
+
"ɥ",
|
| 697 |
+
"ɦ",
|
| 698 |
+
"ɪ",
|
| 699 |
+
"ɫ",
|
| 700 |
+
"ɬ",
|
| 701 |
+
"ɭ",
|
| 702 |
+
"ɯ",
|
| 703 |
+
"ɲ",
|
| 704 |
+
"ɵ",
|
| 705 |
+
"ɸ",
|
| 706 |
+
"ɹ",
|
| 707 |
+
"ɾ",
|
| 708 |
+
"ʁ",
|
| 709 |
+
"ʃ",
|
| 710 |
+
"ʊ",
|
| 711 |
+
"ʌ",
|
| 712 |
+
"ʎ",
|
| 713 |
+
"ʏ",
|
| 714 |
+
"ʑ",
|
| 715 |
+
"ʒ",
|
| 716 |
+
"ʝ",
|
| 717 |
+
"ʲ",
|
| 718 |
+
"ˈ",
|
| 719 |
+
"ˌ",
|
| 720 |
+
"ː",
|
| 721 |
+
"̃",
|
| 722 |
+
"̩",
|
| 723 |
+
"β",
|
| 724 |
+
"θ",
|
| 725 |
+
"ᄀ",
|
| 726 |
+
"ᄁ",
|
| 727 |
+
"ᄂ",
|
| 728 |
+
"ᄃ",
|
| 729 |
+
"ᄄ",
|
| 730 |
+
"ᄅ",
|
| 731 |
+
"ᄆ",
|
| 732 |
+
"ᄇ",
|
| 733 |
+
"ᄈ",
|
| 734 |
+
"ᄉ",
|
| 735 |
+
"ᄊ",
|
| 736 |
+
"ᄋ",
|
| 737 |
+
"ᄌ",
|
| 738 |
+
"ᄍ",
|
| 739 |
+
"ᄎ",
|
| 740 |
+
"ᄏ",
|
| 741 |
+
"ᄐ",
|
| 742 |
+
"ᄑ",
|
| 743 |
+
"ᄒ",
|
| 744 |
+
"ᅡ",
|
| 745 |
+
"ᅢ",
|
| 746 |
+
"ᅣ",
|
| 747 |
+
"ᅤ",
|
| 748 |
+
"ᅥ",
|
| 749 |
+
"ᅦ",
|
| 750 |
+
"ᅧ",
|
| 751 |
+
"ᅨ",
|
| 752 |
+
"ᅩ",
|
| 753 |
+
"ᅪ",
|
| 754 |
+
"ᅫ",
|
| 755 |
+
"ᅬ",
|
| 756 |
+
"ᅭ",
|
| 757 |
+
"ᅮ",
|
| 758 |
+
"ᅯ",
|
| 759 |
+
"ᅰ",
|
| 760 |
+
"ᅱ",
|
| 761 |
+
"ᅲ",
|
| 762 |
+
"ᅳ",
|
| 763 |
+
"ᅴ",
|
| 764 |
+
"ᅵ",
|
| 765 |
+
"ᆨ",
|
| 766 |
+
"ᆫ",
|
| 767 |
+
"ᆮ",
|
| 768 |
+
"ᆯ",
|
| 769 |
+
"ᆷ",
|
| 770 |
+
"ᆸ",
|
| 771 |
+
"ᆼ",
|
| 772 |
+
"ㄸ",
|
| 773 |
+
"!",
|
| 774 |
+
"?",
|
| 775 |
+
"…",
|
| 776 |
+
",",
|
| 777 |
+
".",
|
| 778 |
+
"'",
|
| 779 |
+
"-",
|
| 780 |
+
"SP",
|
| 781 |
+
"UNK"
|
| 782 |
+
]
|
| 783 |
+
|
| 784 |
+
es_symbols = [
|
| 785 |
+
"_",
|
| 786 |
+
"\"",
|
| 787 |
+
"(",
|
| 788 |
+
")",
|
| 789 |
+
"*",
|
| 790 |
+
"/",
|
| 791 |
+
":",
|
| 792 |
+
"AA",
|
| 793 |
+
"E",
|
| 794 |
+
"EE",
|
| 795 |
+
"En",
|
| 796 |
+
"N",
|
| 797 |
+
"OO",
|
| 798 |
+
"Q",
|
| 799 |
+
"V",
|
| 800 |
+
"[",
|
| 801 |
+
"\\",
|
| 802 |
+
"]",
|
| 803 |
+
"^",
|
| 804 |
+
"a",
|
| 805 |
+
"a:",
|
| 806 |
+
"aa",
|
| 807 |
+
"ae",
|
| 808 |
+
"ah",
|
| 809 |
+
"ai",
|
| 810 |
+
"an",
|
| 811 |
+
"ang",
|
| 812 |
+
"ao",
|
| 813 |
+
"aw",
|
| 814 |
+
"ay",
|
| 815 |
+
"b",
|
| 816 |
+
"by",
|
| 817 |
+
"c",
|
| 818 |
+
"ch",
|
| 819 |
+
"d",
|
| 820 |
+
"dh",
|
| 821 |
+
"dy",
|
| 822 |
+
"e",
|
| 823 |
+
"e:",
|
| 824 |
+
"eh",
|
| 825 |
+
"ei",
|
| 826 |
+
"en",
|
| 827 |
+
"eng",
|
| 828 |
+
"er",
|
| 829 |
+
"ey",
|
| 830 |
+
"f",
|
| 831 |
+
"g",
|
| 832 |
+
"gy",
|
| 833 |
+
"h",
|
| 834 |
+
"hh",
|
| 835 |
+
"hy",
|
| 836 |
+
"i",
|
| 837 |
+
"i0",
|
| 838 |
+
"i:",
|
| 839 |
+
"ia",
|
| 840 |
+
"ian",
|
| 841 |
+
"iang",
|
| 842 |
+
"iao",
|
| 843 |
+
"ie",
|
| 844 |
+
"ih",
|
| 845 |
+
"in",
|
| 846 |
+
"ing",
|
| 847 |
+
"iong",
|
| 848 |
+
"ir",
|
| 849 |
+
"iu",
|
| 850 |
+
"iy",
|
| 851 |
+
"j",
|
| 852 |
+
"jh",
|
| 853 |
+
"k",
|
| 854 |
+
"ky",
|
| 855 |
+
"l",
|
| 856 |
+
"m",
|
| 857 |
+
"my",
|
| 858 |
+
"n",
|
| 859 |
+
"ng",
|
| 860 |
+
"ny",
|
| 861 |
+
"o",
|
| 862 |
+
"o:",
|
| 863 |
+
"ong",
|
| 864 |
+
"ou",
|
| 865 |
+
"ow",
|
| 866 |
+
"oy",
|
| 867 |
+
"p",
|
| 868 |
+
"py",
|
| 869 |
+
"q",
|
| 870 |
+
"r",
|
| 871 |
+
"ry",
|
| 872 |
+
"s",
|
| 873 |
+
"sh",
|
| 874 |
+
"t",
|
| 875 |
+
"th",
|
| 876 |
+
"ts",
|
| 877 |
+
"ty",
|
| 878 |
+
"u",
|
| 879 |
+
"u:",
|
| 880 |
+
"ua",
|
| 881 |
+
"uai",
|
| 882 |
+
"uan",
|
| 883 |
+
"uang",
|
| 884 |
+
"uh",
|
| 885 |
+
"ui",
|
| 886 |
+
"un",
|
| 887 |
+
"uo",
|
| 888 |
+
"uw",
|
| 889 |
+
"v",
|
| 890 |
+
"van",
|
| 891 |
+
"ve",
|
| 892 |
+
"vn",
|
| 893 |
+
"w",
|
| 894 |
+
"x",
|
| 895 |
+
"y",
|
| 896 |
+
"z",
|
| 897 |
+
"zh",
|
| 898 |
+
"zy",
|
| 899 |
+
"~",
|
| 900 |
+
"¡",
|
| 901 |
+
"¿",
|
| 902 |
+
"æ",
|
| 903 |
+
"ç",
|
| 904 |
+
"ð",
|
| 905 |
+
"ø",
|
| 906 |
+
"ŋ",
|
| 907 |
+
"œ",
|
| 908 |
+
"ɐ",
|
| 909 |
+
"ɑ",
|
| 910 |
+
"ɒ",
|
| 911 |
+
"ɔ",
|
| 912 |
+
"ɕ",
|
| 913 |
+
"ə",
|
| 914 |
+
"ɛ",
|
| 915 |
+
"ɜ",
|
| 916 |
+
"ɡ",
|
| 917 |
+
"ɣ",
|
| 918 |
+
"ɥ",
|
| 919 |
+
"ɦ",
|
| 920 |
+
"ɪ",
|
| 921 |
+
"ɫ",
|
| 922 |
+
"ɬ",
|
| 923 |
+
"ɭ",
|
| 924 |
+
"ɯ",
|
| 925 |
+
"ɲ",
|
| 926 |
+
"ɵ",
|
| 927 |
+
"ɸ",
|
| 928 |
+
"ɹ",
|
| 929 |
+
"ɾ",
|
| 930 |
+
"ʁ",
|
| 931 |
+
"ʃ",
|
| 932 |
+
"ʊ",
|
| 933 |
+
"ʌ",
|
| 934 |
+
"ʎ",
|
| 935 |
+
"ʏ",
|
| 936 |
+
"ʑ",
|
| 937 |
+
"ʒ",
|
| 938 |
+
"ʝ",
|
| 939 |
+
"ʲ",
|
| 940 |
+
"ˈ",
|
| 941 |
+
"ˌ",
|
| 942 |
+
"ː",
|
| 943 |
+
"̃",
|
| 944 |
+
"̩",
|
| 945 |
+
"β",
|
| 946 |
+
"θ",
|
| 947 |
+
"ᄀ",
|
| 948 |
+
"ᄁ",
|
| 949 |
+
"ᄂ",
|
| 950 |
+
"ᄃ",
|
| 951 |
+
"ᄄ",
|
| 952 |
+
"ᄅ",
|
| 953 |
+
"ᄆ",
|
| 954 |
+
"ᄇ",
|
| 955 |
+
"ᄈ",
|
| 956 |
+
"ᄉ",
|
| 957 |
+
"ᄊ",
|
| 958 |
+
"ᄋ",
|
| 959 |
+
"ᄌ",
|
| 960 |
+
"ᄍ",
|
| 961 |
+
"ᄎ",
|
| 962 |
+
"ᄏ",
|
| 963 |
+
"ᄐ",
|
| 964 |
+
"ᄑ",
|
| 965 |
+
"ᄒ",
|
| 966 |
+
"ᅡ",
|
| 967 |
+
"ᅢ",
|
| 968 |
+
"ᅣ",
|
| 969 |
+
"ᅤ",
|
| 970 |
+
"ᅥ",
|
| 971 |
+
"ᅦ",
|
| 972 |
+
"ᅧ",
|
| 973 |
+
"ᅨ",
|
| 974 |
+
"ᅩ",
|
| 975 |
+
"ᅪ",
|
| 976 |
+
"ᅫ",
|
| 977 |
+
"ᅬ",
|
| 978 |
+
"ᅭ",
|
| 979 |
+
"ᅮ",
|
| 980 |
+
"ᅯ",
|
| 981 |
+
"ᅰ",
|
| 982 |
+
"ᅱ",
|
| 983 |
+
"ᅲ",
|
| 984 |
+
"ᅳ",
|
| 985 |
+
"ᅴ",
|
| 986 |
+
"ᅵ",
|
| 987 |
+
"ᆨ",
|
| 988 |
+
"ᆫ",
|
| 989 |
+
"ᆮ",
|
| 990 |
+
"ᆯ",
|
| 991 |
+
"ᆷ",
|
| 992 |
+
"ᆸ",
|
| 993 |
+
"ᆼ",
|
| 994 |
+
"ㄸ",
|
| 995 |
+
"!",
|
| 996 |
+
"?",
|
| 997 |
+
"…",
|
| 998 |
+
",",
|
| 999 |
+
".",
|
| 1000 |
+
"'",
|
| 1001 |
+
"-",
|
| 1002 |
+
"SP",
|
| 1003 |
+
"UNK"
|
| 1004 |
+
]
|
| 1005 |
+
|
| 1006 |
+
fr_symbols = [
|
| 1007 |
+
"_",
|
| 1008 |
+
"\"",
|
| 1009 |
+
"(",
|
| 1010 |
+
")",
|
| 1011 |
+
"*",
|
| 1012 |
+
"/",
|
| 1013 |
+
":",
|
| 1014 |
+
"AA",
|
| 1015 |
+
"E",
|
| 1016 |
+
"EE",
|
| 1017 |
+
"En",
|
| 1018 |
+
"N",
|
| 1019 |
+
"OO",
|
| 1020 |
+
"Q",
|
| 1021 |
+
"V",
|
| 1022 |
+
"[",
|
| 1023 |
+
"\\",
|
| 1024 |
+
"]",
|
| 1025 |
+
"^",
|
| 1026 |
+
"a",
|
| 1027 |
+
"a:",
|
| 1028 |
+
"aa",
|
| 1029 |
+
"ae",
|
| 1030 |
+
"ah",
|
| 1031 |
+
"ai",
|
| 1032 |
+
"an",
|
| 1033 |
+
"ang",
|
| 1034 |
+
"ao",
|
| 1035 |
+
"aw",
|
| 1036 |
+
"ay",
|
| 1037 |
+
"b",
|
| 1038 |
+
"by",
|
| 1039 |
+
"c",
|
| 1040 |
+
"ch",
|
| 1041 |
+
"d",
|
| 1042 |
+
"dh",
|
| 1043 |
+
"dy",
|
| 1044 |
+
"e",
|
| 1045 |
+
"e:",
|
| 1046 |
+
"eh",
|
| 1047 |
+
"ei",
|
| 1048 |
+
"en",
|
| 1049 |
+
"eng",
|
| 1050 |
+
"er",
|
| 1051 |
+
"ey",
|
| 1052 |
+
"f",
|
| 1053 |
+
"g",
|
| 1054 |
+
"gy",
|
| 1055 |
+
"h",
|
| 1056 |
+
"hh",
|
| 1057 |
+
"hy",
|
| 1058 |
+
"i",
|
| 1059 |
+
"i0",
|
| 1060 |
+
"i:",
|
| 1061 |
+
"ia",
|
| 1062 |
+
"ian",
|
| 1063 |
+
"iang",
|
| 1064 |
+
"iao",
|
| 1065 |
+
"ie",
|
| 1066 |
+
"ih",
|
| 1067 |
+
"in",
|
| 1068 |
+
"ing",
|
| 1069 |
+
"iong",
|
| 1070 |
+
"ir",
|
| 1071 |
+
"iu",
|
| 1072 |
+
"iy",
|
| 1073 |
+
"j",
|
| 1074 |
+
"jh",
|
| 1075 |
+
"k",
|
| 1076 |
+
"ky",
|
| 1077 |
+
"l",
|
| 1078 |
+
"m",
|
| 1079 |
+
"my",
|
| 1080 |
+
"n",
|
| 1081 |
+
"ng",
|
| 1082 |
+
"ny",
|
| 1083 |
+
"o",
|
| 1084 |
+
"o:",
|
| 1085 |
+
"ong",
|
| 1086 |
+
"ou",
|
| 1087 |
+
"ow",
|
| 1088 |
+
"oy",
|
| 1089 |
+
"p",
|
| 1090 |
+
"py",
|
| 1091 |
+
"q",
|
| 1092 |
+
"r",
|
| 1093 |
+
"ry",
|
| 1094 |
+
"s",
|
| 1095 |
+
"sh",
|
| 1096 |
+
"t",
|
| 1097 |
+
"th",
|
| 1098 |
+
"ts",
|
| 1099 |
+
"ty",
|
| 1100 |
+
"u",
|
| 1101 |
+
"u:",
|
| 1102 |
+
"ua",
|
| 1103 |
+
"uai",
|
| 1104 |
+
"uan",
|
| 1105 |
+
"uang",
|
| 1106 |
+
"uh",
|
| 1107 |
+
"ui",
|
| 1108 |
+
"un",
|
| 1109 |
+
"uo",
|
| 1110 |
+
"uw",
|
| 1111 |
+
"v",
|
| 1112 |
+
"van",
|
| 1113 |
+
"ve",
|
| 1114 |
+
"vn",
|
| 1115 |
+
"w",
|
| 1116 |
+
"x",
|
| 1117 |
+
"y",
|
| 1118 |
+
"z",
|
| 1119 |
+
"zh",
|
| 1120 |
+
"zy",
|
| 1121 |
+
"~",
|
| 1122 |
+
"¡",
|
| 1123 |
+
"¿",
|
| 1124 |
+
"æ",
|
| 1125 |
+
"ç",
|
| 1126 |
+
"ð",
|
| 1127 |
+
"ø",
|
| 1128 |
+
"ŋ",
|
| 1129 |
+
"œ",
|
| 1130 |
+
"ɐ",
|
| 1131 |
+
"ɑ",
|
| 1132 |
+
"ɒ",
|
| 1133 |
+
"ɔ",
|
| 1134 |
+
"ɕ",
|
| 1135 |
+
"ə",
|
| 1136 |
+
"ɛ",
|
| 1137 |
+
"ɜ",
|
| 1138 |
+
"ɡ",
|
| 1139 |
+
"ɣ",
|
| 1140 |
+
"ɥ",
|
| 1141 |
+
"ɦ",
|
| 1142 |
+
"ɪ",
|
| 1143 |
+
"ɫ",
|
| 1144 |
+
"ɬ",
|
| 1145 |
+
"ɭ",
|
| 1146 |
+
"ɯ",
|
| 1147 |
+
"ɲ",
|
| 1148 |
+
"ɵ",
|
| 1149 |
+
"ɸ",
|
| 1150 |
+
"ɹ",
|
| 1151 |
+
"ɾ",
|
| 1152 |
+
"ʁ",
|
| 1153 |
+
"ʃ",
|
| 1154 |
+
"ʊ",
|
| 1155 |
+
"ʌ",
|
| 1156 |
+
"ʎ",
|
| 1157 |
+
"ʏ",
|
| 1158 |
+
"ʑ",
|
| 1159 |
+
"ʒ",
|
| 1160 |
+
"ʝ",
|
| 1161 |
+
"ʲ",
|
| 1162 |
+
"ˈ",
|
| 1163 |
+
"ˌ",
|
| 1164 |
+
"ː",
|
| 1165 |
+
"̃",
|
| 1166 |
+
"̩",
|
| 1167 |
+
"β",
|
| 1168 |
+
"θ",
|
| 1169 |
+
"ᄀ",
|
| 1170 |
+
"ᄁ",
|
| 1171 |
+
"ᄂ",
|
| 1172 |
+
"ᄃ",
|
| 1173 |
+
"ᄄ",
|
| 1174 |
+
"ᄅ",
|
| 1175 |
+
"ᄆ",
|
| 1176 |
+
"ᄇ",
|
| 1177 |
+
"ᄈ",
|
| 1178 |
+
"ᄉ",
|
| 1179 |
+
"ᄊ",
|
| 1180 |
+
"ᄋ",
|
| 1181 |
+
"ᄌ",
|
| 1182 |
+
"ᄍ",
|
| 1183 |
+
"ᄎ",
|
| 1184 |
+
"ᄏ",
|
| 1185 |
+
"ᄐ",
|
| 1186 |
+
"ᄑ",
|
| 1187 |
+
"ᄒ",
|
| 1188 |
+
"ᅡ",
|
| 1189 |
+
"ᅢ",
|
| 1190 |
+
"ᅣ",
|
| 1191 |
+
"ᅤ",
|
| 1192 |
+
"ᅥ",
|
| 1193 |
+
"ᅦ",
|
| 1194 |
+
"ᅧ",
|
| 1195 |
+
"ᅨ",
|
| 1196 |
+
"ᅩ",
|
| 1197 |
+
"ᅪ",
|
| 1198 |
+
"ᅫ",
|
| 1199 |
+
"ᅬ",
|
| 1200 |
+
"ᅭ",
|
| 1201 |
+
"ᅮ",
|
| 1202 |
+
"ᅯ",
|
| 1203 |
+
"ᅰ",
|
| 1204 |
+
"ᅱ",
|
| 1205 |
+
"ᅲ",
|
| 1206 |
+
"ᅳ",
|
| 1207 |
+
"ᅴ",
|
| 1208 |
+
"ᅵ",
|
| 1209 |
+
"ᆨ",
|
| 1210 |
+
"ᆫ",
|
| 1211 |
+
"ᆮ",
|
| 1212 |
+
"ᆯ",
|
| 1213 |
+
"ᆷ",
|
| 1214 |
+
"ᆸ",
|
| 1215 |
+
"ᆼ",
|
| 1216 |
+
"ㄸ",
|
| 1217 |
+
"!",
|
| 1218 |
+
"?",
|
| 1219 |
+
"…",
|
| 1220 |
+
",",
|
| 1221 |
+
".",
|
| 1222 |
+
"'",
|
| 1223 |
+
"-",
|
| 1224 |
+
"SP",
|
| 1225 |
+
"UNK"
|
| 1226 |
+
]
|
| 1227 |
+
|
| 1228 |
+
LANG_TO_SYMBOL_MAP = {
|
| 1229 |
+
"ZH": zh_mix_en_symbols,
|
| 1230 |
+
"ZH_MIX_EN": zh_mix_en_symbols,
|
| 1231 |
+
"JP": jp_symbols,
|
| 1232 |
+
"EN": en_symbols,
|
| 1233 |
+
"KR": kr_symbols,
|
| 1234 |
+
"ES": es_symbols,
|
| 1235 |
+
"SP": es_symbols,
|
| 1236 |
+
"FR": fr_symbols
|
| 1237 |
+
}
|
libmelotts/python/text/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .symbols import *
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def cleaned_text_to_sequence(cleaned_text, tones, language, symbol_to_id=None):
|
| 8 |
+
"""Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
|
| 9 |
+
Args:
|
| 10 |
+
text: string to convert to a sequence
|
| 11 |
+
Returns:
|
| 12 |
+
List of integers corresponding to the symbols in the text
|
| 13 |
+
"""
|
| 14 |
+
symbol_to_id_map = symbol_to_id if symbol_to_id else _symbol_to_id
|
| 15 |
+
phones = [symbol_to_id_map[symbol] for symbol in cleaned_text]
|
| 16 |
+
tone_start = language_tone_start_map[language]
|
| 17 |
+
tones = [i + tone_start for i in tones]
|
| 18 |
+
lang_id = language_id_map[language]
|
| 19 |
+
lang_ids = [lang_id for i in phones]
|
| 20 |
+
return phones, tones, lang_ids
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_bert(norm_text, word2ph, language, device):
|
| 24 |
+
from .chinese_bert import get_bert_feature as zh_bert
|
| 25 |
+
from .english_bert import get_bert_feature as en_bert
|
| 26 |
+
from .japanese_bert import get_bert_feature as jp_bert
|
| 27 |
+
from .chinese_mix import get_bert_feature as zh_mix_en_bert
|
| 28 |
+
from .spanish_bert import get_bert_feature as sp_bert
|
| 29 |
+
from .french_bert import get_bert_feature as fr_bert
|
| 30 |
+
from .korean import get_bert_feature as kr_bert
|
| 31 |
+
|
| 32 |
+
lang_bert_func_map = {"ZH": zh_bert, "EN": en_bert, "JP": jp_bert, 'ZH_MIX_EN': zh_mix_en_bert,
|
| 33 |
+
'FR': fr_bert, 'SP': sp_bert, 'ES': sp_bert, "KR": kr_bert}
|
| 34 |
+
bert = lang_bert_func_map[language](norm_text, word2ph, device)
|
| 35 |
+
return bert
|
libmelotts/python/text/bert-base-multilingual-uncased/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
libmelotts/python/text/bert-base-multilingual-uncased/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libmelotts/python/text/bert-base-multilingual-uncased/tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"do_lower_case": true,
|
| 4 |
+
"mask_token": "[MASK]",
|
| 5 |
+
"model_max_length": 512,
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"special_tokens_map_file": null,
|
| 9 |
+
"strip_accents": null,
|
| 10 |
+
"tokenize_chinese_chars": true,
|
| 11 |
+
"tokenizer_class": "BertTokenizer",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
libmelotts/python/text/bert-base-multilingual-uncased/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libmelotts/python/text/bert-base-uncased/special_tokens_map.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"mask_token": "[MASK]",
|
| 4 |
+
"pad_token": "[PAD]",
|
| 5 |
+
"sep_token": "[SEP]",
|
| 6 |
+
"unk_token": "[UNK]"
|
| 7 |
+
}
|
libmelotts/python/text/bert-base-uncased/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libmelotts/python/text/bert-base-uncased/tokenizer_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": "[CLS]",
|
| 3 |
+
"do_lower_case": true,
|
| 4 |
+
"mask_token": "[MASK]",
|
| 5 |
+
"model_max_length": 512,
|
| 6 |
+
"pad_token": "[PAD]",
|
| 7 |
+
"sep_token": "[SEP]",
|
| 8 |
+
"special_tokens_map_file": null,
|
| 9 |
+
"strip_accents": null,
|
| 10 |
+
"tokenize_chinese_chars": true,
|
| 11 |
+
"tokenizer_class": "BertTokenizer",
|
| 12 |
+
"unk_token": "[UNK]"
|
| 13 |
+
}
|
libmelotts/python/text/bert-base-uncased/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
libmelotts/python/text/chinese.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
import cn2an
|
| 5 |
+
from pypinyin import lazy_pinyin, Style
|
| 6 |
+
|
| 7 |
+
from .symbols import punctuation
|
| 8 |
+
from .tone_sandhi import ToneSandhi
|
| 9 |
+
|
| 10 |
+
current_file_path = os.path.dirname(__file__)
|
| 11 |
+
pinyin_to_symbol_map = {
|
| 12 |
+
line.split("\t")[0]: line.strip().split("\t")[1]
|
| 13 |
+
for line in open(os.path.join(current_file_path, "opencpop-strict.txt")).readlines()
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
rep_map = {
|
| 17 |
+
":": ",",
|
| 18 |
+
";": ",",
|
| 19 |
+
",": ",",
|
| 20 |
+
"。": ".",
|
| 21 |
+
"!": "!",
|
| 22 |
+
"?": "?",
|
| 23 |
+
"\n": ".",
|
| 24 |
+
"·": ",",
|
| 25 |
+
"、": ",",
|
| 26 |
+
"...": "…",
|
| 27 |
+
"$": ".",
|
| 28 |
+
"“": "'",
|
| 29 |
+
"”": "'",
|
| 30 |
+
"‘": "'",
|
| 31 |
+
"’": "'",
|
| 32 |
+
"(": "'",
|
| 33 |
+
")": "'",
|
| 34 |
+
"(": "'",
|
| 35 |
+
")": "'",
|
| 36 |
+
"《": "'",
|
| 37 |
+
"》": "'",
|
| 38 |
+
"【": "'",
|
| 39 |
+
"】": "'",
|
| 40 |
+
"[": "'",
|
| 41 |
+
"]": "'",
|
| 42 |
+
"—": "-",
|
| 43 |
+
"~": "-",
|
| 44 |
+
"~": "-",
|
| 45 |
+
"「": "'",
|
| 46 |
+
"」": "'",
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
tone_modifier = ToneSandhi()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def replace_punctuation(text):
|
| 53 |
+
text = text.replace("嗯", "恩").replace("呣", "母")
|
| 54 |
+
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
|
| 55 |
+
|
| 56 |
+
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
|
| 57 |
+
|
| 58 |
+
replaced_text = re.sub(
|
| 59 |
+
r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
return replaced_text
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def g2p(text):
|
| 66 |
+
pattern = r"(?<=[{0}])\s*".format("".join(punctuation))
|
| 67 |
+
sentences = [i for i in re.split(pattern, text) if i.strip() != ""]
|
| 68 |
+
phones, tones, word2ph = _g2p(sentences)
|
| 69 |
+
assert sum(word2ph) == len(phones)
|
| 70 |
+
assert len(word2ph) == len(text) # Sometimes it will crash,you can add a try-catch.
|
| 71 |
+
phones = ["_"] + phones + ["_"]
|
| 72 |
+
tones = [0] + tones + [0]
|
| 73 |
+
word2ph = [1] + word2ph + [1]
|
| 74 |
+
return phones, tones, word2ph
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _get_initials_finals(word):
|
| 78 |
+
initials = []
|
| 79 |
+
finals = []
|
| 80 |
+
orig_initials = lazy_pinyin(word, neutral_tone_with_five=True, style=Style.INITIALS)
|
| 81 |
+
orig_finals = lazy_pinyin(
|
| 82 |
+
word, neutral_tone_with_five=True, style=Style.FINALS_TONE3
|
| 83 |
+
)
|
| 84 |
+
for c, v in zip(orig_initials, orig_finals):
|
| 85 |
+
initials.append(c)
|
| 86 |
+
finals.append(v)
|
| 87 |
+
return initials, finals
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _g2p(segments):
|
| 91 |
+
import jieba.posseg as psg
|
| 92 |
+
|
| 93 |
+
phones_list = []
|
| 94 |
+
tones_list = []
|
| 95 |
+
word2ph = []
|
| 96 |
+
for seg in segments:
|
| 97 |
+
# Replace all English words in the sentence
|
| 98 |
+
seg = re.sub("[a-zA-Z]+", "", seg)
|
| 99 |
+
seg_cut = psg.lcut(seg)
|
| 100 |
+
initials = []
|
| 101 |
+
finals = []
|
| 102 |
+
seg_cut = tone_modifier.pre_merge_for_modify(seg_cut)
|
| 103 |
+
for word, pos in seg_cut:
|
| 104 |
+
if pos == "eng":
|
| 105 |
+
import pdb; pdb.set_trace()
|
| 106 |
+
continue
|
| 107 |
+
sub_initials, sub_finals = _get_initials_finals(word)
|
| 108 |
+
sub_finals = tone_modifier.modified_tone(word, pos, sub_finals)
|
| 109 |
+
initials.append(sub_initials)
|
| 110 |
+
finals.append(sub_finals)
|
| 111 |
+
|
| 112 |
+
# assert len(sub_initials) == len(sub_finals) == len(word)
|
| 113 |
+
initials = sum(initials, [])
|
| 114 |
+
finals = sum(finals, [])
|
| 115 |
+
#
|
| 116 |
+
for c, v in zip(initials, finals):
|
| 117 |
+
raw_pinyin = c + v
|
| 118 |
+
# NOTE: post process for pypinyin outputs
|
| 119 |
+
# we discriminate i, ii and iii
|
| 120 |
+
if c == v:
|
| 121 |
+
assert c in punctuation
|
| 122 |
+
phone = [c]
|
| 123 |
+
tone = "0"
|
| 124 |
+
word2ph.append(1)
|
| 125 |
+
else:
|
| 126 |
+
v_without_tone = v[:-1]
|
| 127 |
+
tone = v[-1]
|
| 128 |
+
|
| 129 |
+
pinyin = c + v_without_tone
|
| 130 |
+
assert tone in "12345"
|
| 131 |
+
|
| 132 |
+
if c:
|
| 133 |
+
# 多音节
|
| 134 |
+
v_rep_map = {
|
| 135 |
+
"uei": "ui",
|
| 136 |
+
"iou": "iu",
|
| 137 |
+
"uen": "un",
|
| 138 |
+
}
|
| 139 |
+
if v_without_tone in v_rep_map.keys():
|
| 140 |
+
pinyin = c + v_rep_map[v_without_tone]
|
| 141 |
+
else:
|
| 142 |
+
# 单音节
|
| 143 |
+
pinyin_rep_map = {
|
| 144 |
+
"ing": "ying",
|
| 145 |
+
"i": "yi",
|
| 146 |
+
"in": "yin",
|
| 147 |
+
"u": "wu",
|
| 148 |
+
}
|
| 149 |
+
if pinyin in pinyin_rep_map.keys():
|
| 150 |
+
pinyin = pinyin_rep_map[pinyin]
|
| 151 |
+
else:
|
| 152 |
+
single_rep_map = {
|
| 153 |
+
"v": "yu",
|
| 154 |
+
"e": "e",
|
| 155 |
+
"i": "y",
|
| 156 |
+
"u": "w",
|
| 157 |
+
}
|
| 158 |
+
if pinyin[0] in single_rep_map.keys():
|
| 159 |
+
pinyin = single_rep_map[pinyin[0]] + pinyin[1:]
|
| 160 |
+
|
| 161 |
+
assert pinyin in pinyin_to_symbol_map.keys(), (pinyin, seg, raw_pinyin)
|
| 162 |
+
phone = pinyin_to_symbol_map[pinyin].split(" ")
|
| 163 |
+
word2ph.append(len(phone))
|
| 164 |
+
|
| 165 |
+
phones_list += phone
|
| 166 |
+
tones_list += [int(tone)] * len(phone)
|
| 167 |
+
return phones_list, tones_list, word2ph
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def text_normalize(text):
|
| 171 |
+
numbers = re.findall(r"\d+(?:\.?\d+)?", text)
|
| 172 |
+
for number in numbers:
|
| 173 |
+
text = text.replace(number, cn2an.an2cn(number), 1)
|
| 174 |
+
text = replace_punctuation(text)
|
| 175 |
+
return text
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
def get_bert_feature(text, word2ph, device=None):
|
| 179 |
+
from text import chinese_bert
|
| 180 |
+
|
| 181 |
+
return chinese_bert.get_bert_feature(text, word2ph, device=device)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
if __name__ == "__main__":
|
| 185 |
+
from text.chinese_bert import get_bert_feature
|
| 186 |
+
|
| 187 |
+
text = "啊!chemistry 但是《原神》是由,米哈\游自主, [研发]的一款全.新开放世界.冒险游戏"
|
| 188 |
+
text = text_normalize(text)
|
| 189 |
+
print(text)
|
| 190 |
+
phones, tones, word2ph = g2p(text)
|
| 191 |
+
bert = get_bert_feature(text, word2ph)
|
| 192 |
+
|
| 193 |
+
print(phones, tones, word2ph, bert.shape)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
# # 示例用法
|
| 197 |
+
# text = "这是一个示例文本:,你好!这是一个测试...."
|
| 198 |
+
# print(g2p_paddle(text)) # 输出: 这是一个示例文本你好这是一个测试
|