KazBERT

Browse files

Files changed (13) hide show

config (2).json +38 -0
model (1).safetensors +3 -0
optimizer (1).pt +3 -0
rng_state_0.pth +3 -0
rng_state_1.pth +3 -0
scaler.pt +3 -0
scheduler (1).pt +3 -0
special_tokens_map (2).json +37 -0
tokenizer (2).json +0 -0
tokenizer_config (1).json +56 -0
trainer_state (1).json +83 -0
training_args (1).bin +3 -0
vocab (1).txt +0 -0

config (2).json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 32000
+}

model (1).safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:908ebe55ed5c4eca4563f7788dec382d067c0bfe7666573bf2e6938332ed8cdf
+size 442505216

optimizer (1).pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dcc3d84acdac896e33719dd7a3f081cba65285ce33043126e26756a72d15cb76
+size 885131514

rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c9b875719f5373e1190cb1905b35eb769c21abe2dae03070fca65c00f7c2417
+size 14512

rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:907a0362da6680f6ae54cd8f779b768155a32a9fa9c10003e8cbf28d9b6bda11
+size 14512

scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f175266b7432a0605c41f226523433c388cc3b556f9062fc9ba2f7d98faf874c
+size 988

scheduler (1).pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9764a5132838e65bdfdba49b236da5301ec634a6466ffbc0d064d35ee5d792ed
+size 1064

special_tokens_map (2).json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer (2).json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config (1).json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

trainer_state (1).json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.3929121725731894,
+  "eval_steps": 500,
+  "global_step": 3500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.7704160246533128,
+      "grad_norm": 9.17220401763916,
+      "learning_rate": 1.7452491011813045e-05,
+      "loss": 0.6268,
+      "step": 500
+    },
+    {
+      "epoch": 1.5408320493066254,
+      "grad_norm": 1.130704641342163,
+      "learning_rate": 1.4884437596302003e-05,
+      "loss": 0.2416,
+      "step": 1000
+    },
+    {
+      "epoch": 2.3112480739599386,
+      "grad_norm": 10.34769058227539,
+      "learning_rate": 1.2321520287621984e-05,
+      "loss": 0.21,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0816640986132513,
+      "grad_norm": 8.762519836425781,
+      "learning_rate": 9.753466872110941e-06,
+      "loss": 0.1846,
+      "step": 2000
+    },
+    {
+      "epoch": 3.852080123266564,
+      "grad_norm": 3.5205228328704834,
+      "learning_rate": 7.185413456599898e-06,
+      "loss": 0.154,
+      "step": 2500
+    },
+    {
+      "epoch": 4.622496147919877,
+      "grad_norm": 4.912118434906006,
+      "learning_rate": 4.617360041088855e-06,
+      "loss": 0.1376,
+      "step": 3000
+    },
+    {
+      "epoch": 5.3929121725731894,
+      "grad_norm": 6.772336483001709,
+      "learning_rate": 2.0493066255778122e-06,
+      "loss": 0.1164,
+      "step": 3500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3894,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 6,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.9468968108425216e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args (1).bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cdbe8386357c1c19416188aa805631b83c2cc2cd653b9c74eb494600bea1936
+size 5240

vocab (1).txt ADDED Viewed

The diff for this file is too large to render. See raw diff