chua commited on
Commit
11bd624
·
1 Parent(s): 6564fb9

Upload GPT-TTS-int8-light

Browse files
Files changed (24) hide show
  1. .gitattributes +2 -0
  2. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/added_tokens.json +0 -0
  3. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/chat_template.jinja +85 -0
  4. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/genai_config.json +51 -0
  5. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/merges.txt +0 -0
  6. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/model.onnx +3 -0
  7. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/model.onnx.data +3 -0
  8. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/special_tokens_map.json +31 -0
  9. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/tokenizer.json +3 -0
  10. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/tokenizer_config.json +0 -0
  11. GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/vocab.json +0 -0
  12. GPA_TTS/GPA_TTS_INT8/model/reference/038142_global_tokens.npy +3 -0
  13. GPA_TTS/GPA_TTS_INT8/model/runtime_manifest.json +11 -0
  14. GPA_TTS/GPA_TTS_INT8/model/spark_detokenizer_int8.onnx +3 -0
  15. GPA_TTS/GPA_TTS_INT8/model/spark_detokenizer_int8.onnx.data +3 -0
  16. GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/config.json +83 -0
  17. GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/config.yaml +66 -0
  18. GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/model.safetensors +3 -0
  19. GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/wav2vec2-large-xlsr-53/config.json +83 -0
  20. GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/wav2vec2-large-xlsr-53/preprocessor_config.json +9 -0
  21. GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/wav2vec2-large-xlsr-53/pytorch_model.bin +3 -0
  22. GPA_TTS/GPA_TTS_INT8/voices/items/default/global_tokens.npy +3 -0
  23. GPA_TTS/GPA_TTS_INT8/voices/items/default/meta.json +9 -0
  24. GPA_TTS/GPA_TTS_INT8/voices/registry.json +59 -0
.gitattributes CHANGED
@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  figures/GPA.png filter=lfs diff=lfs merge=lfs -text
37
  figures/GPA_intro.png filter=lfs diff=lfs merge=lfs -text
38
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
36
  figures/GPA.png filter=lfs diff=lfs merge=lfs -text
37
  figures/GPA_intro.png filter=lfs diff=lfs merge=lfs -text
38
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+
40
+ *.onnx.data filter=lfs diff=lfs merge=lfs -text
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/added_tokens.json ADDED
The diff for this file is too large to render. See raw diff
 
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/chat_template.jinja ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
+ {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
+ {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
+ {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
+ {%- endfor %}
66
+ {%- endif %}
67
+ {{- '<|im_end|>\n' }}
68
+ {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
+ {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
+ {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
+ {{- '<|im_end|>\n' }}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if add_generation_prompt %}
81
+ {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/genai_config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 1,
4
+ "context_length": 32768,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "provider_options": []
9
+ },
10
+ "filename": "model.onnx",
11
+ "head_size": 128,
12
+ "hidden_size": 512,
13
+ "inputs": {
14
+ "input_ids": "input_ids",
15
+ "attention_mask": "attention_mask",
16
+ "past_key_names": "past_key_values.%d.key",
17
+ "past_value_names": "past_key_values.%d.value"
18
+ },
19
+ "outputs": {
20
+ "logits": "logits",
21
+ "present_key_names": "present.%d.key",
22
+ "present_value_names": "present.%d.value"
23
+ },
24
+ "num_attention_heads": 16,
25
+ "num_hidden_layers": 28,
26
+ "num_key_value_heads": 8
27
+ },
28
+ "eos_token_id": [
29
+ 151645
30
+ ],
31
+ "pad_token_id": 151643,
32
+ "type": "qwen3",
33
+ "vocab_size": 180445
34
+ },
35
+ "search": {
36
+ "diversity_penalty": 0.0,
37
+ "do_sample": false,
38
+ "early_stopping": true,
39
+ "length_penalty": 1.0,
40
+ "max_length": 32768,
41
+ "min_length": 0,
42
+ "no_repeat_ngram_size": 0,
43
+ "num_beams": 1,
44
+ "num_return_sequences": 1,
45
+ "past_present_share_buffer": true,
46
+ "repetition_penalty": 1.0,
47
+ "temperature": 1.0,
48
+ "top_k": 50,
49
+ "top_p": 1.0
50
+ }
51
+ }
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf3ce04936a4722cc907faa16ee9f1391bd88f5a345f1394933ac3ffe3f837ff
3
+ size 327027
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/model.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e00ce27dd0d8aff161c3b9f98399bc2817a6478fae29f05397d4e58d25e6473e
3
+ size 212393216
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01db113d2ddc9192eaed1145c5caced436dbac3e60ee7238b662caf426ecc9f3
3
+ size 17114145
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
GPA_TTS/GPA_TTS_INT8/model/qwen_int4_ort/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
GPA_TTS/GPA_TTS_INT8/model/reference/038142_global_tokens.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af085e80826c70ba3580c9355ad65084dbd6234c1f9dece87c114e1d3cfe269c
3
+ size 384
GPA_TTS/GPA_TTS_INT8/model/runtime_manifest.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sample_rate": 16000,
3
+ "latent_hop_length": 320,
4
+ "global_token_offset": 168111,
5
+ "global_tokens_shape": [
6
+ 1,
7
+ 1,
8
+ 32
9
+ ],
10
+ "eos_token": "<|im_end|>"
11
+ }
GPA_TTS/GPA_TTS_INT8/model/spark_detokenizer_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2715eb8f0906f7cabbe73eb74c61c532897bf1d64221c79a79e7e82fafa6fb8
3
+ size 1896712
GPA_TTS/GPA_TTS_INT8/model/spark_detokenizer_int8.onnx.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04a04ad987486c9affb70af17a2b44f7ca62d14acf00fa512eeb0aff3b297b0
3
+ size 165224448
GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "apply_spec_augment": true,
4
+ "architectures": [
5
+ "Wav2Vec2ForPreTraining"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "bos_token_id": 1,
9
+ "codevector_dim": 768,
10
+ "contrastive_logits_temperature": 0.1,
11
+ "conv_bias": true,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "diversity_loss_weight": 0.1,
42
+ "do_stable_layer_norm": true,
43
+ "eos_token_id": 2,
44
+ "feat_extract_activation": "gelu",
45
+ "feat_extract_dropout": 0.0,
46
+ "feat_extract_norm": "layer",
47
+ "feat_proj_dropout": 0.1,
48
+ "feat_quantizer_dropout": 0.0,
49
+ "final_dropout": 0.0,
50
+ "gradient_checkpointing": false,
51
+ "hidden_act": "gelu",
52
+ "hidden_dropout": 0.1,
53
+ "hidden_size": 1024,
54
+ "initializer_range": 0.02,
55
+ "intermediate_size": 4096,
56
+ "layer_norm_eps": 1e-05,
57
+ "layerdrop": 0.1,
58
+ "mask_channel_length": 10,
59
+ "mask_channel_min_space": 1,
60
+ "mask_channel_other": 0.0,
61
+ "mask_channel_prob": 0.0,
62
+ "mask_channel_selection": "static",
63
+ "mask_feature_length": 10,
64
+ "mask_feature_prob": 0.0,
65
+ "mask_time_length": 10,
66
+ "mask_time_min_space": 1,
67
+ "mask_time_other": 0.0,
68
+ "mask_time_prob": 0.075,
69
+ "mask_time_selection": "static",
70
+ "model_type": "wav2vec2",
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "num_negatives": 100,
79
+ "pad_token_id": 0,
80
+ "proj_codevector_dim": 768,
81
+ "transformers_version": "4.7.0.dev0",
82
+ "vocab_size": 32
83
+ }
GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/config.yaml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio_tokenizer:
2
+ mel_params:
3
+ sample_rate: 16000
4
+ n_fft: 1024
5
+ win_length: 640
6
+ hop_length: 320
7
+ mel_fmin: 10
8
+ mel_fmax: null
9
+ num_mels: 128
10
+
11
+ encoder:
12
+ input_channels: 1024
13
+ vocos_dim: 384
14
+ vocos_intermediate_dim: 2048
15
+ vocos_num_layers: 12
16
+ out_channels: 1024
17
+ sample_ratios: [1,1]
18
+
19
+ decoder:
20
+ input_channel: 1024
21
+ channels: 1536
22
+ rates: [8, 5, 4, 2]
23
+ kernel_sizes: [16,11,8,4]
24
+
25
+ quantizer:
26
+ input_dim: 1024
27
+ codebook_size: 8192
28
+ codebook_dim: 8
29
+ commitment: 0.25
30
+ codebook_loss_weight: 2.0
31
+ use_l2_normlize: True
32
+ threshold_ema_dead_code: 0.2
33
+
34
+ speaker_encoder:
35
+ input_dim: 128
36
+ out_dim: 1024
37
+ latent_dim: 128
38
+ token_num: 32
39
+ fsq_levels: [4, 4, 4, 4, 4, 4]
40
+ fsq_num_quantizers: 1
41
+
42
+ prenet:
43
+ input_channels: 1024
44
+ vocos_dim: 384
45
+ vocos_intermediate_dim: 2048
46
+ vocos_num_layers: 12
47
+ out_channels: 1024
48
+ condition_dim: 1024
49
+ sample_ratios: [1,1]
50
+ use_tanh_at_final: False
51
+
52
+ postnet:
53
+ input_channels: 1024
54
+ vocos_dim: 384
55
+ vocos_intermediate_dim: 2048
56
+ vocos_num_layers: 6
57
+ out_channels: 1024
58
+ use_tanh_at_final: False
59
+ highpass_cutoff_freq: 40
60
+ sample_rate: 16000
61
+ segment_duration: 2.4 # (s)
62
+ max_val_duration: 12 # (s)
63
+ latent_hop_length: 320
64
+ ref_segment_duration: 6
65
+ volume_normalize: true
66
+
GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9940cd48d4446e4340ced82d234bf5618350dd9f5db900ebe47a4fdb03867ec
3
+ size 625518756
GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/wav2vec2-large-xlsr-53/config.json ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "apply_spec_augment": true,
4
+ "architectures": [
5
+ "Wav2Vec2ForPreTraining"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "bos_token_id": 1,
9
+ "codevector_dim": 768,
10
+ "contrastive_logits_temperature": 0.1,
11
+ "conv_bias": true,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "diversity_loss_weight": 0.1,
42
+ "do_stable_layer_norm": true,
43
+ "eos_token_id": 2,
44
+ "feat_extract_activation": "gelu",
45
+ "feat_extract_dropout": 0.0,
46
+ "feat_extract_norm": "layer",
47
+ "feat_proj_dropout": 0.1,
48
+ "feat_quantizer_dropout": 0.0,
49
+ "final_dropout": 0.0,
50
+ "gradient_checkpointing": false,
51
+ "hidden_act": "gelu",
52
+ "hidden_dropout": 0.1,
53
+ "hidden_size": 1024,
54
+ "initializer_range": 0.02,
55
+ "intermediate_size": 4096,
56
+ "layer_norm_eps": 1e-05,
57
+ "layerdrop": 0.1,
58
+ "mask_channel_length": 10,
59
+ "mask_channel_min_space": 1,
60
+ "mask_channel_other": 0.0,
61
+ "mask_channel_prob": 0.0,
62
+ "mask_channel_selection": "static",
63
+ "mask_feature_length": 10,
64
+ "mask_feature_prob": 0.0,
65
+ "mask_time_length": 10,
66
+ "mask_time_min_space": 1,
67
+ "mask_time_other": 0.0,
68
+ "mask_time_prob": 0.075,
69
+ "mask_time_selection": "static",
70
+ "model_type": "wav2vec2",
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "num_negatives": 100,
79
+ "pad_token_id": 0,
80
+ "proj_codevector_dim": 768,
81
+ "transformers_version": "4.7.0.dev0",
82
+ "vocab_size": 32
83
+ }
GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/wav2vec2-large-xlsr-53/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
GPA_TTS/GPA_TTS_INT8/voice/spark_tokenizer_model/wav2vec2-large-xlsr-53/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314340227371a608f71adcd5f0de5933824fe77e55822aa4b24dba9c1c364dcb
3
+ size 1269737156
GPA_TTS/GPA_TTS_INT8/voices/items/default/global_tokens.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af085e80826c70ba3580c9355ad65084dbd6234c1f9dece87c114e1d3cfe269c
3
+ size 384
GPA_TTS/GPA_TTS_INT8/voices/items/default/meta.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "default",
3
+ "voice_id": "default",
4
+ "created_at": "2026-03-28T15:35:57.789814+00:00",
5
+ "updated_at": "2026-03-28T15:35:57.790329+00:00",
6
+ "source_kind": "bundled_reference",
7
+ "source_label": "038142_global_tokens.npy",
8
+ "is_default": true
9
+ }
GPA_TTS/GPA_TTS_INT8/voices/registry.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 1,
3
+ "voices": [
4
+ {
5
+ "name": "default",
6
+ "voice_id": "default",
7
+ "created_at": "2026-03-28T15:35:57.789814+00:00",
8
+ "updated_at": "2026-03-28T15:35:57.790329+00:00",
9
+ "source_kind": "bundled_reference",
10
+ "source_label": "038142_global_tokens.npy",
11
+ "is_default": true
12
+ },
13
+ {
14
+ "name": "linyu-demo",
15
+ "voice_id": "linyu-demo",
16
+ "created_at": "2026-03-28T15:40:45.507448+00:00",
17
+ "updated_at": "2026-03-28T15:40:45.507475+00:00",
18
+ "source_kind": "upload",
19
+ "source_label": "dufu.wav",
20
+ "is_default": false
21
+ },
22
+ {
23
+ "name": "relative_path_check",
24
+ "voice_id": "relative_path_check",
25
+ "created_at": "2026-03-28T15:51:09.978360+00:00",
26
+ "updated_at": "2026-03-28T15:51:09.978384+00:00",
27
+ "source_kind": "path",
28
+ "source_label": "038142.wav",
29
+ "is_default": false
30
+ },
31
+ {
32
+ "name": "smoke_voice",
33
+ "voice_id": "smoke_voice",
34
+ "created_at": "2026-03-28T15:36:07.497354+00:00",
35
+ "updated_at": "2026-03-28T15:36:07.497372+00:00",
36
+ "source_kind": "path",
37
+ "source_label": "038142.wav",
38
+ "is_default": false
39
+ },
40
+ {
41
+ "name": "upload_voice",
42
+ "voice_id": "upload_voice",
43
+ "created_at": "2026-03-28T15:38:30.827871+00:00",
44
+ "updated_at": "2026-03-28T15:38:30.827889+00:00",
45
+ "source_kind": "upload",
46
+ "source_label": "038142.wav",
47
+ "is_default": false
48
+ },
49
+ {
50
+ "name": "曹操",
51
+ "voice_id": "voice-0e1d3735",
52
+ "created_at": "2026-03-28T15:52:06.364216+00:00",
53
+ "updated_at": "2026-03-28T15:52:06.364233+00:00",
54
+ "source_kind": "upload",
55
+ "source_label": "(曹操)-皮肤-【奸雄】:宁教我负天下人,休教天下人负我_爱给网_aigei_com.mp3",
56
+ "is_default": false
57
+ }
58
+ ]
59
+ }