hyoungjoon commited on
Commit
640909a
·
verified ·
1 Parent(s): 3f7ae78

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ decoder/rank0.engine filter=lfs diff=lfs merge=lfs -text
37
+ decoder/rank1.engine filter=lfs diff=lfs merge=lfs -text
38
+ decoder/rank2.engine filter=lfs diff=lfs merge=lfs -text
39
+ decoder/rank3.engine filter=lfs diff=lfs merge=lfs -text
40
+ encoder/rank0.engine filter=lfs diff=lfs merge=lfs -text
41
+ encoder/rank1.engine filter=lfs diff=lfs merge=lfs -text
42
+ encoder/rank2.engine filter=lfs diff=lfs merge=lfs -text
43
+ encoder/rank3.engine filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model Version
2
+ base: Text-Corrector-T5-base backbone model
3
+
4
+ v1.0: fine-finetune
5
+
6
+ v1.1: 한국어 커스텀 데이터셋 + 영어 맞춤법 교정 데이터셋
7
+
8
+ # 사용된 데이터셋
9
+ - 2021년 말뭉치 교정
10
+ - 2022년 말뭉치 교정
11
+ - Standard_Korean_GEC
12
+ - 자연어 분석 후처리용 과교정 검증 데이터
13
+ - 네이버 맞춤법 퀴즈 + 자체 QC 데이터셋
14
+ - <b>모든 데이터셋 합친 후 전처리 진행</b>
15
+
16
+ # 학습에 사용된 데이터셋 크기
17
+ - 2,652,241 문장 쌍 (구축 데이터셋 85%)
18
+
19
+ # 검증 468,011 문장 쌍
20
+ ## 정량 평가
21
+ <table>
22
+ <tr>
23
+ <th>Model</th>
24
+ <th>BLEU Score</th>
25
+ <th>EM Score</th>
26
+ </tr>
27
+ <tr>
28
+ <td>LDCC/Text-Corrector-T5-base</td>
29
+ <td>52.03</td>
30
+ <td>21.84</td>
31
+ </tr>
32
+ <tr>
33
+ <td>LDCC/Text-Corrector-T5-base-finetune</td>
34
+ <td>66.64</td>
35
+ <td>39.65</td>
36
+ </tr>
37
+ </table>
38
+ ## 정성평가 네이버 맞춤법 퀴즈 기준 100 문장 쌍
39
+
40
+ <table>
41
+ <tr>
42
+ <th>Model</th>
43
+ <th>Correct Answers</th>
44
+ <th>Incorrect Answers</th>
45
+ </tr>
46
+ <tr>
47
+ <td>LDCC/Text-Corrector-T5-base</td>
48
+ <td>30</td>
49
+ <td>70</td>
50
+ </tr>
51
+ <tr>
52
+ <td>LDCC/Text-Corrector-T5-base-finetune</td>
53
+ <td>58</td>
54
+ <td>42</td>
55
+ </tr>
56
+ </table>
57
+ <table>
58
+ <tr>
59
+ <th>Input</th>
60
+ <th>Original</th>
61
+ <th>Finetune</th>
62
+ <th>Answer</th>
63
+ </tr>
64
+ <tr>
65
+ <td>내가 너의 뒤치닥거리만 하는 것 같다.</td>
66
+ <td><b>내가 너의 뒤치다꺼리만 하는 것 같다.</b></td>
67
+ <td><b>내가 너의 뒤치다꺼리만 하는 것 같다.</b></td>
68
+ <td><b>내가 너의 뒤치다꺼리만 하는 것 같다.</b></td>
69
+ </tr>
70
+ <tr>
71
+ <td>교실이 돗데기시장처럼 시끄럽다.</td>
72
+ <td>교실이 돗데기 시장처럼 시끄럽다.</td>
73
+ <td><b>교실이 도떼기시장처럼 시끄럽다.</b></td>
74
+ <td>교실이 도떼기시장처럼 시끄럽다.</td>
75
+ </tr>
76
+ <tr>
77
+ <td>친구가 모르는 문제를 갈켜 주다.</td>
78
+ <td>친구가 모르는 문제를 가르쳐 준다.</td>
79
+ <td><b>친구가 모르는 문제를 가르쳐 주다.</b></td>
80
+ <td>친구가 모르는 문제를 가르쳐 주다.</td>
81
+ </tr>
82
+ <tr>
83
+ <td>다리가 저려서 다리를 뻣었다.</td>
84
+ <td>다리가 저려서 다리를 뻣었다.</td>
85
+ <td><b>다리가 저려서 다리를 뻗었다.</b></td>
86
+ <td>다리가 저려서 다리를 뻗었다.</td>
87
+ </tr>
88
+ <tr>
89
+ <td>숫꿩과 같은 의미를 지닌 말은 '장끼'다.</td>
90
+ <td>숫자와 같은 의미를 지닌 말은 '장끼'다.</td>
91
+ <td>숫꿩과 같은 의미를 지닌 말은 '장끼'다.</td>
92
+ <td><b>수꿩과 같은 의미를 지닌 말은 '장끼'다.</b></td>
93
+ </tr>
94
+ <tr>
95
+ <td>내가 누누히 말했다.</td>
96
+ <td><b>내가 누누이 말했다.</b></td>
97
+ <td><b>내가 누누이 말했다.</b></td>
98
+ <td>내가 누누이 말했다.</td>
99
+ </tr>
100
+ <tr>
101
+ <td>그 병사들은 총알받기나 다름없는 운명이었다.</td>
102
+ <td>그 병사들은 총알 받기나 다름없는 운명이었다.</td>
103
+ <td>그 병사들은 총알 받기나 다름없는 운명이었다.</td>
104
+ <td><b>그 병사들은 총알받이나 다름없는 운명이었다.</b></td>
105
+ </tr>
106
+ <tr>
107
+ <td>이슈란이 분리되었다.</td>
108
+ <td>이슈란이 분리되었다.</td>
109
+ <td>이슈란이 분리되었다.</td>
110
+ <td><b>이슈난이 분리되었다.</b></td>
111
+ </tr>
112
+ <tr>
113
+ <td>이유 없는 괄세를 받았다.</td>
114
+ <td>이유 없는 괄세를 받았다.</td>
115
+ <td>이유 없는 괄세를 받았다.</td>
116
+ <td><b>이유 없는 괄시를 받았다.</b></td>
117
+ </tr>
118
+ <tr>
119
+ <td>눈물이 주루룩 흘렀다.</td>
120
+ <td>눈물이 주룩 흘렀다.</td>
121
+ <td><b>눈물이 주르륵 흘렀다.</b></td>
122
+ <td>눈물이 주르륵 흘렀다.</td>
123
+ </tr>
124
+ </table>
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "runs/checkpoint-6697",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "gelu_new",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "gated-gelu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": true,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "num_decoder_layers": 12,
21
+ "num_heads": 12,
22
+ "num_layers": 12,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.38.2",
30
+ "use_cache": true,
31
+ "vocab_size": 45100
32
+ }
decoder/config.json ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.10.0",
3
+ "pretrained_config": {
4
+ "architecture": "DecoderModel",
5
+ "dtype": "bfloat16",
6
+ "logits_dtype": "float32",
7
+ "vocab_size": 45100,
8
+ "max_position_embeddings": 512,
9
+ "hidden_size": 768,
10
+ "num_hidden_layers": 12,
11
+ "num_attention_heads": 12,
12
+ "num_key_value_heads": 12,
13
+ "head_size": 64,
14
+ "qk_layernorm": false,
15
+ "hidden_act": "gelu_new",
16
+ "intermediate_size": null,
17
+ "norm_epsilon": 1e-06,
18
+ "position_embedding_type": "relative",
19
+ "use_parallel_embedding": false,
20
+ "embedding_sharding_dim": 0,
21
+ "share_embedding_table": false,
22
+ "mapping": {
23
+ "world_size": 1,
24
+ "tp_size": 1,
25
+ "pp_size": 1,
26
+ "gpus_per_node": 8
27
+ },
28
+ "quantization": {
29
+ "quant_algo": null,
30
+ "kv_cache_quant_algo": null,
31
+ "group_size": 128,
32
+ "smoothquant_val": null,
33
+ "has_zero_point": false,
34
+ "pre_quant_scale": false,
35
+ "exclude_modules": null
36
+ },
37
+ "kv_dtype": "bfloat16",
38
+ "use_prompt_tuning": false,
39
+ "has_position_embedding": false,
40
+ "layernorm_type": 1,
41
+ "has_attention_qkvo_bias": false,
42
+ "has_mlp_bias": false,
43
+ "has_model_final_layernorm": true,
44
+ "has_embedding_layernorm": false,
45
+ "has_embedding_scale": false,
46
+ "ffn_hidden_size": 3072,
47
+ "q_scaling": 0.125,
48
+ "layernorm_position": 0,
49
+ "mlp_type": 1,
50
+ "relative_attention": true,
51
+ "max_distance": 128,
52
+ "num_buckets": 32,
53
+ "model_type": "t5",
54
+ "rescale_before_lm_head": false,
55
+ "encoder_hidden_size": 768,
56
+ "encoder_num_heads": 12,
57
+ "encoder_head_size": 64,
58
+ "skip_cross_qkv": false,
59
+ "gated_act": true
60
+ },
61
+ "build_config": {
62
+ "max_input_len": 1,
63
+ "max_output_len": 1024,
64
+ "opt_batch_size": null,
65
+ "max_batch_size": 8,
66
+ "max_beam_width": 5,
67
+ "max_num_tokens": 8,
68
+ "opt_num_tokens": 8,
69
+ "max_prompt_embedding_table_size": 0,
70
+ "gather_context_logits": false,
71
+ "gather_generation_logits": false,
72
+ "strongly_typed": false,
73
+ "builder_opt": null,
74
+ "profiling_verbosity": "layer_names_only",
75
+ "enable_debug_output": false,
76
+ "max_draft_len": 0,
77
+ "speculative_decoding_mode": 1,
78
+ "use_refit": false,
79
+ "input_timing_cache": null,
80
+ "output_timing_cache": "model.cache",
81
+ "lora_config": {
82
+ "lora_dir": [],
83
+ "lora_ckpt_source": "hf",
84
+ "max_lora_rank": 64,
85
+ "lora_target_modules": [],
86
+ "trtllm_modules_to_hf_modules": {}
87
+ },
88
+ "auto_parallel_config": {
89
+ "world_size": 1,
90
+ "gpus_per_node": 8,
91
+ "cluster_key": "A100-SXM-40GB",
92
+ "cluster_info": null,
93
+ "sharding_cost_model": "alpha_beta",
94
+ "comm_cost_model": "alpha_beta",
95
+ "enable_pipeline_parallelism": false,
96
+ "enable_shard_unbalanced_shape": false,
97
+ "enable_shard_dynamic_shape": false,
98
+ "enable_reduce_scatter": true,
99
+ "builder_flags": null,
100
+ "debug_mode": false,
101
+ "infer_shape": true,
102
+ "validation_mode": false,
103
+ "same_buffer_io": {
104
+ "past_key_value_(\\d+)": "present_key_value_\\1"
105
+ },
106
+ "same_spec_io": {},
107
+ "sharded_io_allowlist": [
108
+ "past_key_value_\\d+",
109
+ "present_key_value_\\d*"
110
+ ],
111
+ "fast_reduce": true,
112
+ "fill_weights": false,
113
+ "parallel_config_cache": null,
114
+ "profile_cache": null,
115
+ "dump_path": null,
116
+ "debug_outputs": []
117
+ },
118
+ "weight_sparsity": false,
119
+ "weight_streaming": false,
120
+ "use_strip_plan": false,
121
+ "max_encoder_input_len": 1024,
122
+ "use_fused_mlp": false,
123
+ "plugin_config": {
124
+ "bert_attention_plugin": "bfloat16",
125
+ "gpt_attention_plugin": "bfloat16",
126
+ "gemm_plugin": "bfloat16",
127
+ "smooth_quant_gemm_plugin": null,
128
+ "identity_plugin": null,
129
+ "layernorm_quantization_plugin": null,
130
+ "rmsnorm_quantization_plugin": null,
131
+ "nccl_plugin": null,
132
+ "lookup_plugin": null,
133
+ "lora_plugin": null,
134
+ "weight_only_groupwise_quant_matmul_plugin": null,
135
+ "weight_only_quant_matmul_plugin": null,
136
+ "quantize_per_token_plugin": false,
137
+ "quantize_tensor_plugin": false,
138
+ "moe_plugin": null,
139
+ "mamba_conv1d_plugin": "float16",
140
+ "context_fmha": false,
141
+ "context_fmha_fp32_acc": false,
142
+ "paged_kv_cache": true,
143
+ "remove_input_padding": true,
144
+ "use_custom_all_reduce": true,
145
+ "multi_block_mode": false,
146
+ "enable_xqa": true,
147
+ "attention_qk_half_accumulation": false,
148
+ "tokens_per_block": 64,
149
+ "use_paged_context_fmha": false,
150
+ "use_fp8_context_fmha": false,
151
+ "use_context_fmha_for_generation": false,
152
+ "multiple_profiles": false,
153
+ "paged_state": true,
154
+ "streamingllm": false
155
+ }
156
+ }
157
+ }
decoder/rank0.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff91653d82ad25c1aa2317c0c815206c4fcea3ea2dc2c83aa3bca26132383d0e
3
+ size 423611156
decoder/rank1.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b937a66c1de6a12b07cb54d1bfed5a799fb27bdd26bfe218788334bbabef3f8f
3
+ size 159743020
decoder/rank2.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b2635d9a8cfc8b307cf43b9d4d9fd1e74447ad3beb5a0eefe68fca1b986850b
3
+ size 159641460
decoder/rank3.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d28b5d8c4e02c7a36e782f4678f2ce2a0f0f05596e1c931032a9613e708a1537
3
+ size 159733060
encoder/config.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.10.0",
3
+ "pretrained_config": {
4
+ "architecture": "EncoderModel",
5
+ "dtype": "bfloat16",
6
+ "logits_dtype": "float32",
7
+ "vocab_size": 45100,
8
+ "max_position_embeddings": 512,
9
+ "hidden_size": 768,
10
+ "num_hidden_layers": 12,
11
+ "num_attention_heads": 12,
12
+ "num_key_value_heads": 12,
13
+ "head_size": 64,
14
+ "qk_layernorm": false,
15
+ "hidden_act": "gelu_new",
16
+ "intermediate_size": null,
17
+ "norm_epsilon": 1e-06,
18
+ "position_embedding_type": "relative",
19
+ "use_parallel_embedding": false,
20
+ "embedding_sharding_dim": 0,
21
+ "share_embedding_table": false,
22
+ "mapping": {
23
+ "world_size": 1,
24
+ "tp_size": 1,
25
+ "pp_size": 1,
26
+ "gpus_per_node": 8
27
+ },
28
+ "quantization": {
29
+ "quant_algo": null,
30
+ "kv_cache_quant_algo": null,
31
+ "group_size": 128,
32
+ "smoothquant_val": null,
33
+ "has_zero_point": false,
34
+ "pre_quant_scale": false,
35
+ "exclude_modules": null
36
+ },
37
+ "kv_dtype": "bfloat16",
38
+ "use_prompt_tuning": false,
39
+ "has_position_embedding": false,
40
+ "layernorm_type": 1,
41
+ "has_attention_qkvo_bias": false,
42
+ "has_mlp_bias": false,
43
+ "has_model_final_layernorm": true,
44
+ "has_embedding_layernorm": false,
45
+ "has_embedding_scale": false,
46
+ "ffn_hidden_size": 3072,
47
+ "q_scaling": 0.125,
48
+ "layernorm_position": 0,
49
+ "mlp_type": 1,
50
+ "relative_attention": true,
51
+ "max_distance": 128,
52
+ "num_buckets": 32,
53
+ "model_type": "t5",
54
+ "gated_act": true
55
+ },
56
+ "build_config": {
57
+ "max_input_len": 1024,
58
+ "max_output_len": 1024,
59
+ "opt_batch_size": null,
60
+ "max_batch_size": 8,
61
+ "max_beam_width": 5,
62
+ "max_num_tokens": 8192,
63
+ "opt_num_tokens": 40,
64
+ "max_prompt_embedding_table_size": 0,
65
+ "gather_context_logits": false,
66
+ "gather_generation_logits": false,
67
+ "strongly_typed": false,
68
+ "builder_opt": null,
69
+ "profiling_verbosity": "layer_names_only",
70
+ "enable_debug_output": false,
71
+ "max_draft_len": 0,
72
+ "speculative_decoding_mode": 1,
73
+ "use_refit": false,
74
+ "input_timing_cache": null,
75
+ "output_timing_cache": "model.cache",
76
+ "lora_config": {
77
+ "lora_dir": [],
78
+ "lora_ckpt_source": "hf",
79
+ "max_lora_rank": 64,
80
+ "lora_target_modules": [],
81
+ "trtllm_modules_to_hf_modules": {}
82
+ },
83
+ "auto_parallel_config": {
84
+ "world_size": 1,
85
+ "gpus_per_node": 8,
86
+ "cluster_key": "A100-SXM-40GB",
87
+ "cluster_info": null,
88
+ "sharding_cost_model": "alpha_beta",
89
+ "comm_cost_model": "alpha_beta",
90
+ "enable_pipeline_parallelism": false,
91
+ "enable_shard_unbalanced_shape": false,
92
+ "enable_shard_dynamic_shape": false,
93
+ "enable_reduce_scatter": true,
94
+ "builder_flags": null,
95
+ "debug_mode": false,
96
+ "infer_shape": true,
97
+ "validation_mode": false,
98
+ "same_buffer_io": {
99
+ "past_key_value_(\\d+)": "present_key_value_\\1"
100
+ },
101
+ "same_spec_io": {},
102
+ "sharded_io_allowlist": [
103
+ "past_key_value_\\d+",
104
+ "present_key_value_\\d*"
105
+ ],
106
+ "fast_reduce": true,
107
+ "fill_weights": false,
108
+ "parallel_config_cache": null,
109
+ "profile_cache": null,
110
+ "dump_path": null,
111
+ "debug_outputs": []
112
+ },
113
+ "weight_sparsity": false,
114
+ "weight_streaming": false,
115
+ "use_strip_plan": false,
116
+ "max_encoder_input_len": 1024,
117
+ "use_fused_mlp": false,
118
+ "plugin_config": {
119
+ "bert_attention_plugin": "bfloat16",
120
+ "gpt_attention_plugin": "bfloat16",
121
+ "gemm_plugin": "bfloat16",
122
+ "smooth_quant_gemm_plugin": null,
123
+ "identity_plugin": null,
124
+ "layernorm_quantization_plugin": null,
125
+ "rmsnorm_quantization_plugin": null,
126
+ "nccl_plugin": null,
127
+ "lookup_plugin": null,
128
+ "lora_plugin": null,
129
+ "weight_only_groupwise_quant_matmul_plugin": null,
130
+ "weight_only_quant_matmul_plugin": null,
131
+ "quantize_per_token_plugin": false,
132
+ "quantize_tensor_plugin": false,
133
+ "moe_plugin": null,
134
+ "mamba_conv1d_plugin": "float16",
135
+ "context_fmha": false,
136
+ "context_fmha_fp32_acc": false,
137
+ "paged_kv_cache": false,
138
+ "remove_input_padding": true,
139
+ "use_custom_all_reduce": true,
140
+ "multi_block_mode": false,
141
+ "enable_xqa": true,
142
+ "attention_qk_half_accumulation": false,
143
+ "tokens_per_block": 64,
144
+ "use_paged_context_fmha": false,
145
+ "use_fp8_context_fmha": false,
146
+ "use_context_fmha_for_generation": false,
147
+ "multiple_profiles": false,
148
+ "paged_state": true,
149
+ "streamingllm": false
150
+ }
151
+ }
152
+ }
encoder/rank0.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e585c29eb514d75d190954f9a3de86b2d2a85adffae0a8058a5fc16e53733834
3
+ size 297090020
encoder/rank1.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc15ae0f4ff5295428e5e3d25377bd038d7c32b33c078b241354d98bf2ae0875
3
+ size 127533828
encoder/rank2.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:803a87f1e9565437352ed3014d2f9a08d33db3e1755c794561da6405cfc1aa8b
3
+ size 127533828
encoder/rank3.engine ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7746e5a36e580bea09ada6d07b903dd47043b93131d6e1f974ec90b97d56a2
3
+ size 127533572
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ebba0a7b0fadda80677fe980264027fd8c51f822c486201b39dfdfe8804a570
3
+ size 1196021
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<pad>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<unk>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [],
30
+ "clean_up_tokenization_spaces": true,
31
+ "eos_token": "</s>",
32
+ "extra_ids": 0,
33
+ "legacy": true,
34
+ "model_max_length": 1000000000000000019884624838656,
35
+ "pad_token": "<pad>",
36
+ "sp_model_kwargs": {},
37
+ "tokenizer_class": "T5Tokenizer",
38
+ "unk_token": "<unk>"
39
+ }