上传xyz模型

Browse files

Files changed (10) hide show

README.md +10 -575
config.json +32 -0
config_sentence_transformers.json +10 -0
modules.json +20 -0
pytorch_model.bin +3 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +64 -0
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -2,130 +2,12 @@
 model-index:
 - name: XYZ-embedding-zh-v2
   results:
-  - dataset:
-      config: default
-      name: MTEB AFQMC
-      revision: None
-      split: validation
-      type: C-MTEB/AFQMC
-    metrics:
-    - type: cos_sim_pearson
-      value: 55.51799059309076
-    - type: cos_sim_spearman
-      value: 58.407433584137806
-    - type: manhattan_pearson
-      value: 57.17473672145622
-    - type: manhattan_spearman
-      value: 58.389018054159955
-    - type: euclidean_pearson
-      value: 57.19483956761451
-    - type: euclidean_spearman
-      value: 58.407433584137806
-    - type: main_score
-      value: 58.407433584137806
-    task:
-      type: STS
-  - dataset:
-      config: default
-      name: MTEB ATEC
-      revision: None
-      split: test
-      type: C-MTEB/ATEC
-    metrics:
-    - type: cos_sim_pearson
-      value: 57.31078155367183
-    - type: cos_sim_spearman
-      value: 57.59782762324478
-    - type: manhattan_pearson
-      value: 62.525487007985035
-    - type: manhattan_spearman
-      value: 57.591139966303615
-    - type: euclidean_pearson
-      value: 62.53702437760052
-    - type: euclidean_spearman
-      value: 57.597828749091384
-    - type: main_score
-      value: 57.59782762324478
-    task:
-      type: STS
-  - dataset:
-      config: zh
-      name: MTEB AmazonReviewsClassification (zh)
-      revision: 1399c76144fd37290681b995c656ef9b2e06e26d
-      split: test
-      type: mteb/amazon_reviews_multi
-    metrics:
-    - type: accuracy
-      value: 49.374
-    - type: accuracy_stderr
-      value: 1.436636349254743
-    - type: f1
-      value: 47.115240601017774
-    - type: f1_stderr
-      value: 1.5642799356594534
-    - type: main_score
-      value: 49.374
-    task:
-      type: Classification
-  - dataset:
-      config: default
-      name: MTEB BQ
-      revision: None
-      split: test
-      type: C-MTEB/BQ
-    metrics:
-    - type: cos_sim_pearson
-      value: 71.49514309404829
-    - type: cos_sim_spearman
-      value: 72.66161713021279
-    - type: manhattan_pearson
-      value: 71.03443640254005
-    - type: manhattan_spearman
-      value: 72.63439621980275
-    - type: euclidean_pearson
-      value: 71.06830370642658
-    - type: euclidean_spearman
-      value: 72.66161713043078
-    - type: main_score
-      value: 72.66161713021279
-    task:
-      type: STS
-  - dataset:
-      config: default
-      name: MTEB CLSClusteringP2P
-      revision: None
-      split: test
-      type: C-MTEB/CLSClusteringP2P
-    metrics:
-    - type: v_measure
-      value: 57.237692641281
-    - type: v_measure_std
-      value: 1.2777768354339174
-    - type: main_score
-      value: 57.237692641281
-    task:
-      type: Clustering
-  - dataset:
-      config: default
-      name: MTEB CLSClusteringS2S
-      revision: None
-      split: test
-      type: C-MTEB/CLSClusteringS2S
-    metrics:
-    - type: v_measure
-      value: 48.41686666939331
-    - type: v_measure_std
-      value: 1.7663118461900793
-    - type: main_score
-      value: 48.41686666939331
-    task:
-      type: Clustering
   - dataset:
       config: default
       name: MTEB CMedQAv1
       revision: None
       split: test
-      type: C-MTEB/CMedQAv1-reranking
     metrics:
     - type: map
       value: 89.9766367822762
@@ -140,7 +22,7 @@ model-index:
       name: MTEB CMedQAv2
       revision: None
       split: test
-      type: C-MTEB/CMedQAv2-reranking
     metrics:
     - type: map
       value: 89.04628340075982
@@ -221,77 +103,6 @@ model-index:
       value: 48.294
     task:
       type: Retrieval
-  - dataset:
-      config: default
-      name: MTEB Cmnli
-      revision: None
-      split: validation
-      type: C-MTEB/CMNLI
-    metrics:
-    - type: cos_sim_accuracy
-      value: 82.8983764281419
-    - type: cos_sim_accuracy_threshold
-      value: 56.05731010437012
-    - type: cos_sim_ap
-      value: 90.23156362696572
-    - type: cos_sim_f1
-      value: 83.83207278307574
-    - type: cos_sim_f1_threshold
-      value: 52.05453634262085
-    - type: cos_sim_precision
-      value: 78.91044160132068
-    - type: cos_sim_recall
-      value: 89.40846387654898
-    - type: dot_accuracy
-      value: 82.8983764281419
-    - type: dot_accuracy_threshold
-      value: 56.05730414390564
-    - type: dot_ap
-      value: 90.20952356258861
-    - type: dot_f1
-      value: 83.83207278307574
-    - type: dot_f1_threshold
-      value: 52.054524421691895
-    - type: dot_precision
-      value: 78.91044160132068
-    - type: dot_recall
-      value: 89.40846387654898
-    - type: euclidean_accuracy
-      value: 82.8983764281419
-    - type: euclidean_accuracy_threshold
-      value: 93.74719858169556
-    - type: euclidean_ap
-      value: 90.23156283510565
-    - type: euclidean_f1
-      value: 83.83207278307574
-    - type: euclidean_f1_threshold
-      value: 97.92392253875732
-    - type: euclidean_precision
-      value: 78.91044160132068
-    - type: euclidean_recall
-      value: 89.40846387654898
-    - type: manhattan_accuracy
-      value: 82.85027059530968
-    - type: manhattan_accuracy_threshold
-      value: 3164.584159851074
-    - type: manhattan_ap
-      value: 90.23178004516869
-    - type: manhattan_f1
-      value: 83.82157123834887
-    - type: manhattan_f1_threshold
-      value: 3273.5992431640625
-    - type: manhattan_precision
-      value: 79.76768743400211
-    - type: manhattan_recall
-      value: 88.30956277764788
-    - type: max_accuracy
-      value: 82.8983764281419
-    - type: max_ap
-      value: 90.23178004516869
-    - type: max_f1
-      value: 83.83207278307574
-    task:
-      type: PairClassification
   - dataset:
       config: default
       name: MTEB CovidRetrieval
@@ -505,71 +316,6 @@ model-index:
       value: 70.294
     task:
       type: Retrieval
-  - dataset:
-      config: default
-      name: MTEB IFlyTek
-      revision: None
-      split: validation
-      type: C-MTEB/IFlyTek-classification
-    metrics:
-    - type: accuracy
-      value: 52.743362831858406
-    - type: accuracy_stderr
-      value: 0.23768288128480788
-    - type: f1
-      value: 41.1548855278405
-    - type: f1_stderr
-      value: 0.4088759842813554
-    - type: main_score
-      value: 52.743362831858406
-    task:
-      type: Classification
-  - dataset:
-      config: default
-      name: MTEB JDReview
-      revision: None
-      split: test
-      type: C-MTEB/JDReview-classification
-    metrics:
-    - type: accuracy
-      value: 89.08067542213884
-    - type: accuracy_stderr
-      value: 0.9559278951487445
-    - type: ap
-      value: 60.875320104586564
-    - type: ap_stderr
-      value: 2.137806661565934
-    - type: f1
-      value: 84.39314192399665
-    - type: f1_stderr
-      value: 1.132407155321657
-    - type: main_score
-      value: 89.08067542213884
-    task:
-      type: Classification
-  - dataset:
-      config: default
-      name: MTEB LCQMC
-      revision: None
-      split: test
-      type: C-MTEB/LCQMC
-    metrics:
-    - type: cos_sim_pearson
-      value: 73.3633875566899
-    - type: cos_sim_spearman
-      value: 79.27679599527615
-    - type: manhattan_pearson
-      value: 79.12061667088273
-    - type: manhattan_spearman
-      value: 79.26989882781706
-    - type: euclidean_pearson
-      value: 79.12871362068391
-    - type: euclidean_spearman
-      value: 79.27679377557219
-    - type: main_score
-      value: 79.27679599527615
-    task:
-      type: STS
   - dataset:
       config: default
       name: MTEB MMarcoReranking
@@ -656,44 +402,6 @@ model-index:
       value: 82.505
     task:
       type: Retrieval
-  - dataset:
-      config: zh-CN
-      name: MTEB MassiveIntentClassification (zh-CN)
-      revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
-      split: test
-      type: mteb/amazon_massive_intent
-    metrics:
-    - type: accuracy
-      value: 77.9388029589778
-    - type: accuracy_stderr
-      value: 1.416192788478398
-    - type: f1
-      value: 74.77765701086211
-    - type: f1_stderr
-      value: 1.254859698486085
-    - type: main_score
-      value: 77.9388029589778
-    task:
-      type: Classification
-  - dataset:
-      config: zh-CN
-      name: MTEB MassiveScenarioClassification (zh-CN)
-      revision: 7d571f92784cd94a019292a1f45445077d0ef634
-      split: test
-      type: mteb/amazon_massive_scenario
-    metrics:
-    - type: accuracy
-      value: 83.8231338264963
-    - type: accuracy_stderr
-      value: 0.6973305760755886
-    - type: f1
-      value: 83.13105322628088
-    - type: f1_stderr
-      value: 0.600506118139685
-    - type: main_score
-      value: 83.8231338264963
-    task:
-      type: Classification
   - dataset:
       config: default
       name: MTEB MedicalRetrieval
@@ -765,211 +473,6 @@ model-index:
       value: 68.041
     task:
       type: Retrieval
-  - dataset:
-      config: default
-      name: MTEB MultilingualSentiment
-      revision: None
-      split: validation
-      type: C-MTEB/MultilingualSentiment-classification
-    metrics:
-    - type: accuracy
-      value: 78.60333333333334
-    - type: accuracy_stderr
-      value: 0.3331499495555859
-    - type: f1
-      value: 78.4814340961856
-    - type: f1_stderr
-      value: 0.45721454672060496
-    - type: main_score
-      value: 78.60333333333334
-    task:
-      type: Classification
-  - dataset:
-      config: default
-      name: MTEB Ocnli
-      revision: None
-      split: validation
-      type: C-MTEB/OCNLI
-    metrics:
-    - type: cos_sim_accuracy
-      value: 80.5630752571738
-    - type: cos_sim_accuracy_threshold
-      value: 53.72971296310425
-    - type: cos_sim_ap
-      value: 85.61885910463258
-    - type: cos_sim_f1
-      value: 82.40469208211144
-    - type: cos_sim_f1_threshold
-      value: 50.07883310317993
-    - type: cos_sim_precision
-      value: 76.70609645131938
-    - type: cos_sim_recall
-      value: 89.01795142555439
-    - type: dot_accuracy
-      value: 80.5630752571738
-    - type: dot_accuracy_threshold
-      value: 53.7297248840332
-    - type: dot_ap
-      value: 85.61885910463258
-    - type: dot_f1
-      value: 82.40469208211144
-    - type: dot_f1_threshold
-      value: 50.07884502410889
-    - type: dot_precision
-      value: 76.70609645131938
-    - type: dot_recall
-      value: 89.01795142555439
-    - type: euclidean_accuracy
-      value: 80.5630752571738
-    - type: euclidean_accuracy_threshold
-      value: 96.19801044464111
-    - type: euclidean_ap
-      value: 85.61885910463258
-    - type: euclidean_f1
-      value: 82.40469208211144
-    - type: euclidean_f1_threshold
-      value: 99.92111921310425
-    - type: euclidean_precision
-      value: 76.70609645131938
-    - type: euclidean_recall
-      value: 89.01795142555439
-    - type: manhattan_accuracy
-      value: 80.67135896047645
-    - type: manhattan_accuracy_threshold
-      value: 3323.1739044189453
-    - type: manhattan_ap
-      value: 85.55348220886658
-    - type: manhattan_f1
-      value: 82.26744186046511
-    - type: manhattan_f1_threshold
-      value: 3389.273452758789
-    - type: manhattan_precision
-      value: 76.00716204118174
-    - type: manhattan_recall
-      value: 89.65153115100317
-    - type: max_accuracy
-      value: 80.67135896047645
-    - type: max_ap
-      value: 85.61885910463258
-    - type: max_f1
-      value: 82.40469208211144
-    task:
-      type: PairClassification
-  - dataset:
-      config: default
-      name: MTEB OnlineShopping
-      revision: None
-      split: test
-      type: C-MTEB/OnlineShopping-classification
-    metrics:
-    - type: accuracy
-      value: 94.94
-    - type: accuracy_stderr
-      value: 0.49030602688525093
-    - type: ap
-      value: 93.0785841977823
-    - type: ap_stderr
-      value: 0.5447383082750599
-    - type: f1
-      value: 94.92765777406245
-    - type: f1_stderr
-      value: 0.4891510966106189
-    - type: main_score
-      value: 94.94
-    task:
-      type: Classification
-  - dataset:
-      config: default
-      name: MTEB PAWSX
-      revision: None
-      split: test
-      type: C-MTEB/PAWSX
-    metrics:
-    - type: cos_sim_pearson
-      value: 36.564307811370654
-    - type: cos_sim_spearman
-      value: 42.44208208349051
-    - type: manhattan_pearson
-      value: 42.099358471578306
-    - type: manhattan_spearman
-      value: 42.50283181486304
-    - type: euclidean_pearson
-      value: 42.07954956675317
-    - type: euclidean_spearman
-      value: 42.453014115018554
-    - type: main_score
-      value: 42.44208208349051
-    task:
-      type: STS
-  - dataset:
-      config: default
-      name: MTEB QBQTC
-      revision: None
-      split: test
-      type: C-MTEB/QBQTC
-    metrics:
-    - type: cos_sim_pearson
-      value: 39.19092968089104
-    - type: cos_sim_spearman
-      value: 41.5174661348832
-    - type: manhattan_pearson
-      value: 37.91587646684523
-    - type: manhattan_spearman
-      value: 41.536668677987194
-    - type: euclidean_pearson
-      value: 37.91079973901135
-    - type: euclidean_spearman
-      value: 41.51833855501128
-    - type: main_score
-      value: 41.5174661348832
-    task:
-      type: STS
-  - dataset:
-      config: zh
-      name: MTEB STS22 (zh)
-      revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
-      split: test
-      type: mteb/sts22-crosslingual-sts
-    metrics:
-    - type: cos_sim_pearson
-      value: 62.029449510721605
-    - type: cos_sim_spearman
-      value: 66.31935471251364
-    - type: manhattan_pearson
-      value: 63.63179975157496
-    - type: manhattan_spearman
-      value: 66.3007950466125
-    - type: euclidean_pearson
-      value: 63.59752734041086
-    - type: euclidean_spearman
-      value: 66.31935471251364
-    - type: main_score
-      value: 66.31935471251364
-    task:
-      type: STS
-  - dataset:
-      config: default
-      name: MTEB STSB
-      revision: None
-      split: test
-      type: C-MTEB/STSB
-    metrics:
-    - type: cos_sim_pearson
-      value: 81.81459862563769
-    - type: cos_sim_spearman
-      value: 82.15323953301453
-    - type: manhattan_pearson
-      value: 81.61904305126016
-    - type: manhattan_spearman
-      value: 82.1361073852468
-    - type: euclidean_pearson
-      value: 81.60799063723992
-    - type: euclidean_spearman
-      value: 82.15405405083231
-    - type: main_score
-      value: 82.15323953301453
-    task:
-      type: STS
   - dataset:
       config: default
       name: MTEB T2Reranking
@@ -978,11 +481,11 @@ model-index:
       type: C-MTEB/T2Reranking
     metrics:
     - type: map
-      value: 69.13560834260383
     - type: mrr
-      value: 79.95749642669074
     - type: main_score
-      value: 69.13560834260383
     task:
       type: Reranking
   - dataset:
@@ -1056,55 +559,6 @@ model-index:
       value: 85.875
     task:
       type: Retrieval
-  - dataset:
-      config: default
-      name: MTEB TNews
-      revision: None
-      split: validation
-      type: C-MTEB/TNews-classification
-    metrics:
-    - type: accuracy
-      value: 54.309000000000005
-    - type: accuracy_stderr
-      value: 0.4694347665011627
-    - type: f1
-      value: 52.598803987889255
-    - type: f1_stderr
-      value: 0.5191189533227434
-    - type: main_score
-      value: 54.309000000000005
-    task:
-      type: Classification
-  - dataset:
-      config: default
-      name: MTEB ThuNewsClusteringP2P
-      revision: None
-      split: test
-      type: C-MTEB/ThuNewsClusteringP2P
-    metrics:
-    - type: v_measure
-      value: 76.64191229011249
-    - type: v_measure_std
-      value: 2.807206940615986
-    - type: main_score
-      value: 76.64191229011249
-    task:
-      type: Clustering
-  - dataset:
-      config: default
-      name: MTEB ThuNewsClusteringS2S
-      revision: None
-      split: test
-      type: C-MTEB/ThuNewsClusteringS2S
-    metrics:
-    - type: v_measure
-      value: 71.02529199411326
-    - type: v_measure_std
-      value: 2.0547855888165945
-    - type: main_score
-      value: 71.02529199411326
-    task:
-      type: Clustering
   - dataset:
       config: default
       name: MTEB VideoRetrieval
@@ -1176,32 +630,13 @@ model-index:
       value: 80.93599999999999
     task:
       type: Retrieval
-  - dataset:
-      config: default
-      name: MTEB Waimai
-      revision: None
-      split: test
-      type: C-MTEB/waimai-classification
-    metrics:
-    - type: accuracy
-      value: 89.47
-    - type: accuracy_stderr
-      value: 0.26476404589747476
-    - type: ap
-      value: 75.49555223825388
-    - type: ap_stderr
-      value: 0.596040511982105
-    - type: f1
-      value: 88.01797939221065
-    - type: f1_stderr
-      value: 0.27168216797281214
-    - type: main_score
-      value: 89.47
-    task:
-      type: Classification
 tags:
 - mteb
 ---
 <h2 align="left">XYZ-embedding-zh-v2</h2>
 ## Usage (Sentence Transformers)
@@ -1231,4 +666,4 @@ print(embeddings.shape)
 similarities = model.similarity(embeddings, embeddings)
 print(similarities.shape)
 # [3, 3]
-```

 model-index:
 - name: XYZ-embedding-zh-v2
   results:
   - dataset:
       config: default
       name: MTEB CMedQAv1
       revision: None
       split: test
+      type: C-MTEB/CMedQAv1
     metrics:
     - type: map
       value: 89.9766367822762
       name: MTEB CMedQAv2
       revision: None
       split: test
+      type: C-MTEB/CMedQAv2
     metrics:
     - type: map
       value: 89.04628340075982
       value: 48.294
     task:
       type: Retrieval
   - dataset:
       config: default
       name: MTEB CovidRetrieval
       value: 70.294
     task:
       type: Retrieval
   - dataset:
       config: default
       name: MTEB MMarcoReranking
       value: 82.505
     task:
       type: Retrieval
   - dataset:
       config: default
       name: MTEB MedicalRetrieval
       value: 68.041
     task:
       type: Retrieval
   - dataset:
       config: default
       name: MTEB T2Reranking
       type: C-MTEB/T2Reranking
     metrics:
     - type: map
+      value: 69.13287570713865
     - type: mrr
+      value: 79.95326487625066
     - type: main_score
+      value: 69.13287570713865
     task:
       type: Reranking
   - dataset:
       value: 85.875
     task:
       type: Retrieval
   - dataset:
       config: default
       name: MTEB VideoRetrieval
       value: 80.93599999999999
     task:
       type: Retrieval
 tags:
 - mteb
+language:
+- zh
 ---
 <h2 align="left">XYZ-embedding-zh-v2</h2>
 ## Usage (Sentence Transformers)
 similarities = model.similarity(embeddings, embeddings)
 print(similarities.shape)
 # [3, 3]
+```

config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_name_or_path": "",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 21128
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.0.1",
+    "transformers": "4.41.0",
+    "pytorch": "2.2.2+cu118"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": null
+}

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Dense",
+    "type": "sentence_transformers.models.Dense"
+  }
+]

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8090436280027987a24ffb67f66976b4069d4812c580f271ef7fe4720a037bcf
+size 1302216550

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 512,
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff