lihongjie commited on
Commit
08f99bf
·
1 Parent(s): b13170e

first commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +76 -0
  2. README.md +59 -3
  3. VoxCPM-0.5B/config.json +52 -0
  4. VoxCPM-0.5B/special_tokens_map.json +81 -0
  5. VoxCPM-0.5B/tokenizer.json +0 -0
  6. VoxCPM-0.5B/tokenizer_config.json +212 -0
  7. assets/en_man1.mp3 +3 -0
  8. assets/en_man1.txt +3 -0
  9. assets/en_woman1.mp3 +3 -0
  10. assets/en_woman1.txt +3 -0
  11. assets/zh_man1.txt +3 -0
  12. assets/zh_man1.wav +3 -0
  13. assets/zh_man2.mp3 +3 -0
  14. assets/zh_man2.txt +3 -0
  15. assets/zh_woman1.txt +3 -0
  16. assets/zh_woman1.wav +3 -0
  17. axmodels/audio_vae.decoder.onnx +3 -0
  18. axmodels/audio_vae.encoder.onnx +3 -0
  19. axmodels/enc_to_lm_proj.axmodel +3 -0
  20. axmodels/feat_encoder.in_proj.axmodel +3 -0
  21. axmodels/feat_encoder.in_proj.onnx +3 -0
  22. axmodels/feat_encoder.special_token.npy +3 -0
  23. axmodels/fsq_layer.axmodel +3 -0
  24. axmodels/lm_to_dit_proj.axmodel +3 -0
  25. axmodels/locdit.part1.axmodel +3 -0
  26. axmodels/locdit.part3.axmodel +3 -0
  27. axmodels/res_to_dit_proj.axmodel +3 -0
  28. axmodels/stop_predictor.axmodel +3 -0
  29. base_lm-axmodels/MiniCPMForCausalLM_p64_l0_together.axmodel +3 -0
  30. base_lm-axmodels/MiniCPMForCausalLM_p64_l10_together.axmodel +3 -0
  31. base_lm-axmodels/MiniCPMForCausalLM_p64_l11_together.axmodel +3 -0
  32. base_lm-axmodels/MiniCPMForCausalLM_p64_l12_together.axmodel +3 -0
  33. base_lm-axmodels/MiniCPMForCausalLM_p64_l13_together.axmodel +3 -0
  34. base_lm-axmodels/MiniCPMForCausalLM_p64_l14_together.axmodel +3 -0
  35. base_lm-axmodels/MiniCPMForCausalLM_p64_l15_together.axmodel +3 -0
  36. base_lm-axmodels/MiniCPMForCausalLM_p64_l16_together.axmodel +3 -0
  37. base_lm-axmodels/MiniCPMForCausalLM_p64_l17_together.axmodel +3 -0
  38. base_lm-axmodels/MiniCPMForCausalLM_p64_l18_together.axmodel +3 -0
  39. base_lm-axmodels/MiniCPMForCausalLM_p64_l19_together.axmodel +3 -0
  40. base_lm-axmodels/MiniCPMForCausalLM_p64_l1_together.axmodel +3 -0
  41. base_lm-axmodels/MiniCPMForCausalLM_p64_l20_together.axmodel +3 -0
  42. base_lm-axmodels/MiniCPMForCausalLM_p64_l21_together.axmodel +3 -0
  43. base_lm-axmodels/MiniCPMForCausalLM_p64_l22_together.axmodel +3 -0
  44. base_lm-axmodels/MiniCPMForCausalLM_p64_l23_together.axmodel +3 -0
  45. base_lm-axmodels/MiniCPMForCausalLM_p64_l2_together.axmodel +3 -0
  46. base_lm-axmodels/MiniCPMForCausalLM_p64_l3_together.axmodel +3 -0
  47. base_lm-axmodels/MiniCPMForCausalLM_p64_l4_together.axmodel +3 -0
  48. base_lm-axmodels/MiniCPMForCausalLM_p64_l5_together.axmodel +3 -0
  49. base_lm-axmodels/MiniCPMForCausalLM_p64_l6_together.axmodel +3 -0
  50. base_lm-axmodels/MiniCPMForCausalLM_p64_l7_together.axmodel +3 -0
.gitattributes CHANGED
@@ -33,3 +33,79 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ axmodels/feat_encoder.in_proj.axmodel filter=lfs diff=lfs merge=lfs -text
37
+ axmodels/lm_to_dit_proj.axmodel filter=lfs diff=lfs merge=lfs -text
38
+ axmodels/stop_predictor.axmodel filter=lfs diff=lfs merge=lfs -text
39
+ axmodels/audio_vae.decoder.onnx filter=lfs diff=lfs merge=lfs -text
40
+ axmodels/audio_vae.encoder.onnx filter=lfs diff=lfs merge=lfs -text
41
+ axmodels/enc_to_lm_proj.axmodel filter=lfs diff=lfs merge=lfs -text
42
+ axmodels/locdit.part1.axmodel filter=lfs diff=lfs merge=lfs -text
43
+ axmodels/locdit.part3.axmodel filter=lfs diff=lfs merge=lfs -text
44
+ axmodels/res_to_dit_proj.axmodel filter=lfs diff=lfs merge=lfs -text
45
+ axmodels/feat_encoder.in_proj.onnx filter=lfs diff=lfs merge=lfs -text
46
+ axmodels/feat_encoder.special_token.npy filter=lfs diff=lfs merge=lfs -text
47
+ axmodels/fsq_layer.axmodel filter=lfs diff=lfs merge=lfs -text
48
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
49
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
50
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
51
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
52
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
53
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
54
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
55
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
56
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
57
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
58
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
59
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
60
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
61
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
62
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
63
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
64
+ base_lm-axmodels/model.embed_tokens.weight.npy filter=lfs diff=lfs merge=lfs -text
65
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
66
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
67
+ base_lm-axmodels/MiniCPMForCausalLM_post.axmodel filter=lfs diff=lfs merge=lfs -text
68
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
69
+ base_lm-axmodels/model.embed_tokens.weight.float32.bin filter=lfs diff=lfs merge=lfs -text
70
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
71
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
72
+ base_lm-axmodels/model.embed_tokens.weight.bfloat16.bin filter=lfs diff=lfs merge=lfs -text
73
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
74
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
75
+ base_lm-axmodels/MiniCPMForCausalLM_p64_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
76
+ feat_decoder_estimator_decoder-axmodels/MiniCPMForCausalLM_p64_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
77
+ feat_decoder_estimator_decoder-axmodels/MiniCPMForCausalLM_post.axmodel filter=lfs diff=lfs merge=lfs -text
78
+ feat_decoder_estimator_decoder-axmodels/model.embed_tokens.weight.bfloat16.bin filter=lfs diff=lfs merge=lfs -text
79
+ feat_decoder_estimator_decoder-axmodels/model.embed_tokens.weight.float32.bin filter=lfs diff=lfs merge=lfs -text
80
+ feat_decoder_estimator_decoder-axmodels/model.embed_tokens.weight.npy filter=lfs diff=lfs merge=lfs -text
81
+ feat_decoder_estimator_decoder-axmodels/MiniCPMForCausalLM_p64_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
82
+ feat_decoder_estimator_decoder-axmodels/MiniCPMForCausalLM_p64_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
83
+ feat_decoder_estimator_decoder-axmodels/MiniCPMForCausalLM_p64_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
84
+ feat_encoder_encoder-axmodels/MiniCPMForCausalLM_p64_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
85
+ feat_encoder_encoder-axmodels/MiniCPMForCausalLM_p64_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
86
+ feat_encoder_encoder-axmodels/MiniCPMForCausalLM_post.axmodel filter=lfs diff=lfs merge=lfs -text
87
+ feat_encoder_encoder-axmodels/model.embed_tokens.weight.bfloat16.bin filter=lfs diff=lfs merge=lfs -text
88
+ feat_encoder_encoder-axmodels/model.embed_tokens.weight.float32.bin filter=lfs diff=lfs merge=lfs -text
89
+ feat_encoder_encoder-axmodels/model.embed_tokens.weight.npy filter=lfs diff=lfs merge=lfs -text
90
+ feat_encoder_encoder-axmodels/MiniCPMForCausalLM_p64_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
91
+ feat_encoder_encoder-axmodels/MiniCPMForCausalLM_p64_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
92
+ residual_lm-axmodels/MiniCPMForCausalLM_post.axmodel filter=lfs diff=lfs merge=lfs -text
93
+ residual_lm-axmodels/model.embed_tokens.weight.bfloat16.bin filter=lfs diff=lfs merge=lfs -text
94
+ residual_lm-axmodels/MiniCPMForCausalLM_p64_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
95
+ residual_lm-axmodels/MiniCPMForCausalLM_p64_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
96
+ residual_lm-axmodels/MiniCPMForCausalLM_p64_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
97
+ residual_lm-axmodels/MiniCPMForCausalLM_p64_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
98
+ residual_lm-axmodels/model.embed_tokens.weight.float32.bin filter=lfs diff=lfs merge=lfs -text
99
+ residual_lm-axmodels/model.embed_tokens.weight.npy filter=lfs diff=lfs merge=lfs -text
100
+ residual_lm-axmodels/MiniCPMForCausalLM_p64_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
101
+ residual_lm-axmodels/MiniCPMForCausalLM_p64_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
102
+ assets/en_woman1.txt filter=lfs diff=lfs merge=lfs -text
103
+ assets/zh_man1.wav filter=lfs diff=lfs merge=lfs -text
104
+ assets/zh_man2.mp3 filter=lfs diff=lfs merge=lfs -text
105
+ assets/zh_man2.txt filter=lfs diff=lfs merge=lfs -text
106
+ assets/zh_woman1.wav filter=lfs diff=lfs merge=lfs -text
107
+ assets/en_man1.txt filter=lfs diff=lfs merge=lfs -text
108
+ assets/en_woman1.mp3 filter=lfs diff=lfs merge=lfs -text
109
+ assets/zh_man1.txt filter=lfs diff=lfs merge=lfs -text
110
+ assets/zh_woman1.txt filter=lfs diff=lfs merge=lfs -text
111
+ assets/en_man1.mp3 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,59 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - en
5
+ - zh
6
+ base_model:
7
+ - VoxCPM
8
+ pipeline_tag: text-to-speech
9
+ library_name: transformers
10
+ tags:
11
+ - VoxCPM
12
+ - Speech
13
+ ---
14
+
15
+ # VoxCPM
16
+ This version of VoxCPM has been converted to run on the Axera NPU using **w8a16** quantization.
17
+ Compatible with Pulsar2 version: 4.2
18
+
19
+ ## Convert tools links:
20
+ For those who are interested in model conversion, you can try to export axmodel through the original repo :
21
+ [VoxCPM offical](https://github.com/OpenBMB/VoxCPM/)
22
+
23
+ [Pulsar2 Link, How to Convert LLM from Huggingface to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/appendix/build_llm.html)
24
+
25
+ [AXera NPU HOST LLM Runtime](https://github.com/AXERA-TECH/VoxCPM.Axera)
26
+
27
+ ## Support Platform
28
+
29
+ - AX650
30
+ - AX650N DEMO Board
31
+ - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
32
+ - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
33
+
34
+
35
+
36
+ ## How to use
37
+
38
+ Download all files from this repository to the device
39
+ ### 1. Install packages
40
+
41
+ #### 1. Install voxcpm axinfer package
42
+ ```
43
+ git clone -b 1.0.4-axmode_infer https://github.com/techshoww/VoxCPM.git
44
+ cd VoxCPM
45
+ pip3 install .
46
+ ```
47
+
48
+ #### 2. Download zipenhancer
49
+ ```
50
+ pip3 install modelscope
51
+ modelscope download --model iic/speech_zipenhancer_ans_multiloss_16k_base --local_dir iic/speech_zipenhancer_ans_multiloss_16k_base
52
+ ```
53
+
54
+ ### 2. Run on Axera Device
55
+ Go to the root directory of this project. run:
56
+ ```
57
+ python3 run_ax650.py
58
+ ```
59
+
VoxCPM-0.5B/config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architecture": "voxcpm",
3
+ "lm_config": {
4
+ "bos_token_id": 1,
5
+ "eos_token_id": 2,
6
+ "hidden_size": 1024,
7
+ "intermediate_size": 4096,
8
+ "max_position_embeddings": 32768,
9
+ "num_attention_heads": 16,
10
+ "num_hidden_layers": 24,
11
+ "num_key_value_heads": 2,
12
+ "rms_norm_eps": 1e-05,
13
+ "rope_theta": 10000,
14
+ "rope_scaling": {
15
+ "type": "longrope",
16
+ "long_factor": [1.0004360675811768, 1.0668443441390991, 1.1631425619125366, 1.3025742769241333, 1.5040205717086792, 1.7941505908966064, 2.2101221084594727, 2.802666664123535, 3.6389970779418945, 4.804192543029785, 6.39855432510376, 8.527148246765137, 11.277542114257812, 14.684998512268066, 18.69317054748535, 23.13019371032715, 27.72362518310547, 32.1606559753418, 36.168827056884766, 39.57627868652344, 42.32667541503906, 44.45526885986328, 46.04962921142578, 47.21482849121094, 48.05115509033203, 48.64370346069336, 49.05967712402344, 49.34980392456055, 49.551246643066406, 49.69068145751953, 49.78697967529297, 49.85338592529297],
17
+ "short_factor": [1.0004360675811768, 1.0668443441390991, 1.1631425619125366, 1.3025742769241333, 1.5040205717086792, 1.7941505908966064, 2.2101221084594727, 2.802666664123535, 3.6389970779418945, 4.804192543029785, 6.39855432510376, 8.527148246765137, 11.277542114257812, 14.684998512268066, 18.69317054748535, 23.13019371032715, 27.72362518310547, 32.1606559753418, 36.168827056884766, 39.57627868652344, 42.32667541503906, 44.45526885986328, 46.04962921142578, 47.21482849121094, 48.05115509033203, 48.64370346069336, 49.05967712402344, 49.34980392456055, 49.551246643066406, 49.69068145751953, 49.78697967529297, 49.85338592529297],
18
+ "original_max_position_embeddings": 32768
19
+ },
20
+ "vocab_size": 73448,
21
+ "scale_emb": 12,
22
+ "dim_model_base": 256,
23
+ "scale_depth": 1.4,
24
+ "use_mup": false
25
+ },
26
+ "patch_size": 2,
27
+ "feat_dim": 64,
28
+ "scalar_quantization_latent_dim": 256,
29
+ "scalar_quantization_scale": 9,
30
+ "residual_lm_num_layers": 6,
31
+ "encoder_config": {
32
+ "hidden_dim": 1024,
33
+ "ffn_dim": 4096,
34
+ "num_heads": 16,
35
+ "num_layers": 4
36
+ },
37
+ "dit_config": {
38
+ "hidden_dim": 1024,
39
+ "ffn_dim": 4096,
40
+ "num_heads": 16,
41
+ "num_layers": 4,
42
+ "cfm_config": {
43
+ "sigma_min": 1e-06,
44
+ "solver": "euler",
45
+ "t_scheduler": "log-norm",
46
+ "inference_cfg_rate": 2.0
47
+ }
48
+ },
49
+ "max_length": 4096,
50
+ "device": "cuda",
51
+ "dtype": "bfloat16"
52
+ }
VoxCPM-0.5B/special_tokens_map.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_end|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_start|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|tool_call|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|execute_start|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<|execute_end|>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "<|fim_prefix|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<|fim_middle|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<|fim_suffix|>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ }
59
+ ],
60
+ "bos_token": {
61
+ "content": "<s>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false
66
+ },
67
+ "eos_token": {
68
+ "content": "</s>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false
73
+ },
74
+ "unk_token": {
75
+ "content": "<unk>",
76
+ "lstrip": false,
77
+ "normalized": false,
78
+ "rstrip": false,
79
+ "single_word": false
80
+ }
81
+ }
VoxCPM-0.5B/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
VoxCPM-0.5B/tokenizer_config.json ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "101": {
30
+ "content": "<|audio_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "102": {
38
+ "content": "<|audio_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "103": {
46
+ "content": "<|audio_prompt_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "104": {
54
+ "content": "<|audio_prompt_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "105": {
62
+ "content": "<|background|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "106": {
70
+ "content": "<|/background|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "107": {
78
+ "content": "<|characters|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "108": {
86
+ "content": "<|/characters|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "109": {
94
+ "content": "<|speaker_id|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "110": {
102
+ "content": "<|/speaker_id|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "111": {
110
+ "content": "<|span|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "112": {
118
+ "content": "<|/span|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": true
124
+ },
125
+ "73440": {
126
+ "content": "<|im_end|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": true
132
+ },
133
+ "73441": {
134
+ "content": "<|im_start|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": true
140
+ },
141
+ "73442": {
142
+ "content": "<|tool_call|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": true
148
+ },
149
+ "73443": {
150
+ "content": "<|execute_start|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": true
156
+ },
157
+ "73444": {
158
+ "content": "<|execute_end|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": true
164
+ },
165
+ "73445": {
166
+ "content": "<|fim_prefix|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": true
172
+ },
173
+ "73446": {
174
+ "content": "<|fim_middle|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": true
180
+ },
181
+ "73447": {
182
+ "content": "<|fim_suffix|>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": true
188
+ }
189
+ },
190
+ "additional_special_tokens": [
191
+ "<|im_end|>",
192
+ "<|im_start|>",
193
+ "<|tool_call|>",
194
+ "<|execute_start|>",
195
+ "<|execute_end|>",
196
+ "<|fim_prefix|>",
197
+ "<|fim_middle|>",
198
+ "<|fim_suffix|>"
199
+ ],
200
+ "bos_token": "<s>",
201
+ "clean_up_tokenization_spaces": false,
202
+ "eos_token": "<|im_end|>",
203
+ "legacy": true,
204
+ "model_max_length": 1000000000000000019884624838656,
205
+ "pad_token": null,
206
+ "sp_model_kwargs": {},
207
+ "spaces_between_special_tokens": false,
208
+ "tokenizer_class": "LlamaTokenizer",
209
+ "unk_token": "<unk>",
210
+ "use_default_system_prompt": false,
211
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
212
+ }
assets/en_man1.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:461dd4cc9cf5bf6b774a9978cc9b7ca96033b214714b12413ecfe9eb1bf03ab9
3
+ size 15309
assets/en_man1.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce5d3c2b96bf649e61817fd44c913c9abfa2314b3265ad6f115fd5c2477cc017
3
+ size 66
assets/en_woman1.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:872ff69b74b37763cfc4a49bdd39d8a2acf51f428e42e1ab9fa3dfc0c4a2e3d4
3
+ size 16941
assets/en_woman1.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14383963cba5217b00603065c4c1fc4167155d5c8ae8d6b5b6b92c81b8eef6b
3
+ size 67
assets/zh_man1.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9b54ab8e18581b2fce95bd8e4f8aa4e840beec28d56304b86359e095c57bce
3
+ size 57
assets/zh_man1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1153fca1303cd20470317a4ba93027cc5e172214b777747215add36f41109e
3
+ size 1536044
assets/zh_man2.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd012ac30fe1ffb5bc3e356a84f4f668a25a62c72f810ffae218f83cbcfdf53e
3
+ size 31761
assets/zh_man2.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c68ca97e76f6a966fbee90d95ba2210dad6f1c07fcae0f445282b0035823472
3
+ size 69
assets/zh_woman1.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff1a7dd8cb643e4f769735733e7547ff66aa5b29d99f674131f3fb448446efa
3
+ size 45
assets/zh_woman1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd199eb7109fd6ce9943cb297e3cf350c1073af014063dfadbdc100230526243
3
+ size 111496
axmodels/audio_vae.decoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eea4adaeec41736b99647b9a6522d5f4e6bb739b2704457a9629d9849876d55
3
+ size 107734268
axmodels/audio_vae.encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba12c6ef452a0203eac6a5114e03fa568788695603bcbf391245595dc8fe5f28
3
+ size 192214193
axmodels/enc_to_lm_proj.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb1a23a19f1df319a888d3d264af1498ef67fa7d9e24d3e9ac1a600fd682a85
3
+ size 1136943
axmodels/feat_encoder.in_proj.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a74576c985e94b51446227a5e6613bb028c68cc5f9e4d85f49230ee600ce315f
3
+ size 88232
axmodels/feat_encoder.in_proj.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd32eeaef9f93e7ea5696aaa1a3ecd1f9f0897df8288808dff6678106c3fd84
3
+ size 266528
axmodels/feat_encoder.special_token.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7e829944d1bbc7953a9ebb763df77c198ab4fd985b789790dcfe92c3dc24f7a
3
+ size 4224
axmodels/fsq_layer.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b4c8e0bb3c40dc7b3eea636bcb83223668ed6e73721b7955a237b35aa7b7c8
3
+ size 595889
axmodels/lm_to_dit_proj.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c90981d8b811c530c9180c579dd736ee054bbad213152730b6f99bc97e7fd5f1
3
+ size 1136860
axmodels/locdit.part1.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da8cd78fe6d4359657e6fdcd49a0c90ba3c984c19b061deed0341b711d5498c
3
+ size 2452671
axmodels/locdit.part3.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7bf4db83090ecbe8b69b63e2ac311324edc820fe7d8e640484af611bd1e5067
3
+ size 76964
axmodels/res_to_dit_proj.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f1b72b47814d65df2dfcb78be4db4cbed43312384616224f925e64efa8fa2b5
3
+ size 1136860
axmodels/stop_predictor.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4836478793e6117dd89cf833623b00a70c59e98d48a1116d79edaa41f50a9f97
3
+ size 1156587
base_lm-axmodels/MiniCPMForCausalLM_p64_l0_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c58274d695a0f23535ba5d6c1c53324f0946cb3067673111840e01a5425fe349
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l10_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cd037cf37a8fc506f985d28137402b89bb07ea8e253fd54fc1e158dc5bbd07
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l11_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cde9371659061764d32821cd3e396a037afdb8246ea2b11c98d5baa403bf140
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l12_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dac1ee90898ee338062d5407b44b13d994c78331bccc256fa9b397df95aee230
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l13_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d18125dd6564abc00301f98276c4b460e9f04f249706e5900e54a9569b0e3d78
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l14_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b167ca963e80c0521120c5137b2d387632b8c4d9fde947c3701b21d7a3d5c8
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l15_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6929c20e5f436df9fc1e9b1099127fedf59e377bce749f7fa2a3cc018fb387f5
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l16_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e2c3fa287bff986e0302c27497daf4eb7b33177cb08b0ab141faca88c858826
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l17_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b478b2331dcb799359d1c667573f3afe1e0554700571ed6b4fa9756b9d4b71
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l18_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00c5338e385f0603d82dc34a3d11154c446c5107fdc621756f9bf7cd76a1032d
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l19_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede8fc90abd1f99344880d9ec56d5086b46a2ff9f3843fd400409714f9165c17
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l1_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927856f2de1d38a55266bed388ab9b1d57768b749c4a8cbffac7ab2b745aca50
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l20_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8cca37a72c959230e830b852c580efbf3b537f15fc2dbce6ad604ee148d4a07
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l21_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34fbf2c43bfbc2471f3e255a81907bb71d63b0f9ffc4151bb71163a0bdd60b86
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l22_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a523be7636d730cafa1aa22db929c7ffdf7876e4207d68cdbc2d04dc049bb2
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l23_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a4df1c1be20d45d0064e4aba003d830b8785e7ea62e8313455cf99d6e13ce5c
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l2_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f13e75f8c079b19a750b9c80784cdfd145bfcbb96c163d24a39be704f589302
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l3_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d05ef03272b545428d7f42dc2455a53e1d44844b300ab7771df24211309dc404
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l4_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b228835da8e1c49a91c7f1180ebddebfe49c43bc2434141228e6405598099f4
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l5_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24ca17b15f3e09cf1c5c204c081392889fdab3e15a3015002955fe9a0220a048
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l6_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a424a62352d28079c1554638aa1369918986f1f4e6d745480662a730c7f337
3
+ size 17555739
base_lm-axmodels/MiniCPMForCausalLM_p64_l7_together.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f79c8caf897df24f2760a7dc1e7796f191a1fa1c403172358e24b87f82909e27
3
+ size 17555739