Training in progress, epoch 1, checkpoint

Files changed (6) hide show

checkpoint-7/config.json CHANGED Viewed

@@ -1,33 +1,41 @@
 {
   "architectures": [
-    "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 128,
   "id2label": {
     "LABEL_0": "negative",
     "LABEL_1": "positive"
   },
   "initializer_range": 0.02,
   "intermediate_size": 512,
   "label2id": {
     "negative": 0,
     "positive": 1
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 2,
-  "num_hidden_layers": 2,
   "pad_token_id": 0,
-  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.51.3",
   "type_vocab_size": 2,
-  "use_cache": true,
   "vocab_size": 30522
 }

 {
   "architectures": [
+    "MobileBertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "classifier_activation": false,
   "classifier_dropout": null,
+  "embedding_size": 128,
+  "hidden_act": "relu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 512,
   "id2label": {
     "LABEL_0": "negative",
     "LABEL_1": "positive"
   },
   "initializer_range": 0.02,
   "intermediate_size": 512,
+  "intra_bottleneck_size": 128,
+  "key_query_shared_bottleneck": true,
   "label2id": {
     "negative": 0,
     "positive": 1
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
+  "model_type": "mobilebert",
+  "normalization_type": "no_norm",
+  "num_attention_heads": 4,
+  "num_feedforward_networks": 4,
+  "num_hidden_layers": 24,
   "pad_token_id": 0,
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.51.3",
+  "trigram_input": true,
+  "true_hidden_size": 128,
   "type_vocab_size": 2,
+  "use_bottleneck": true,
+  "use_bottleneck_attention": false,
   "vocab_size": 30522
 }

checkpoint-7/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0053fef86ddfa093cdaccc6931a3dc51c243ed13d68523fc4d065afc87ab1604
-size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf83bce81e36c9be6ce3c489f440f7b2626836074b1f7b6963213bc7ebfabc36
+size 98470112

checkpoint-7/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a287d2025c941c456eac19e5cbb1c2227ecbce1472440672dc1d742c82c83eb
-size 35123898

 version https://git-lfs.github.com/spec/v1
+oid sha256:dcc0e7bf2ca72f2e8202e1b7f034ddd4a86824ada12c4deddcc567b259edefac
+size 197593757

checkpoint-7/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f42508f1fa811c01793b47e142274e8086f1d7bcec910eff64443ea1ee4491a6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:85d6eca64aa942b1fb6242ccacdb356b249b16a5213116b7a463df59ba8a0592
 size 14244

checkpoint-7/trainer_state.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "best_global_step": 7,
-  "best_metric": 0.30054421768707484,
   "best_model_checkpoint": "./results\\checkpoint-7",
   "epoch": 1.0,
   "eval_steps": 500,
@@ -11,12 +11,12 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.47,
-      "eval_f1": 0.30054421768707484,
-      "eval_loss": 0.7224627733230591,
-      "eval_runtime": 0.1369,
-      "eval_samples_per_second": 730.665,
-      "eval_steps_per_second": 51.147,
       "step": 7
     }
   ],
@@ -37,7 +37,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 127048704000.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_global_step": 7,
+  "best_metric": 0.3671895424836601,
   "best_model_checkpoint": "./results\\checkpoint-7",
   "epoch": 1.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.53,
+      "eval_f1": 0.3671895424836601,
+      "eval_loss": 390876.71875,
+      "eval_runtime": 2.5345,
+      "eval_samples_per_second": 39.456,
+      "eval_steps_per_second": 2.762,
       "step": 7
     }
   ],
       "attributes": {}
     }
   },
+  "total_flos": 6270852096000.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

checkpoint-7/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9bda51bd81b8a1c7f9c3de07b155661dcbc50fd649940b4dedccc037a1028b31
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab0605f9ed33d05f165c2819fb33ed3f1442d423c372f77031105ed68bd9dbbb
 size 5304