jin3213 commited on 19 days ago

Commit

d73ea91

verified ·

1 Parent(s): 129bce4

Upload 27 files

Browse files

Files changed (27) hide show

finetuned_distilbert/checkpoint-296/config.json +25 -0
finetuned_distilbert/checkpoint-296/model.safetensors +3 -0
finetuned_distilbert/checkpoint-296/optimizer.pt +3 -0
finetuned_distilbert/checkpoint-296/rng_state.pth +3 -0
finetuned_distilbert/checkpoint-296/scheduler.pt +3 -0
finetuned_distilbert/checkpoint-296/trainer_state.json +244 -0
finetuned_distilbert/checkpoint-296/training_args.bin +3 -0
finetuned_distilbert/checkpoint-592/config.json +25 -0
finetuned_distilbert/checkpoint-592/model.safetensors +3 -0
finetuned_distilbert/checkpoint-592/rng_state.pth +3 -0
finetuned_distilbert/checkpoint-592/scheduler.pt +3 -0
finetuned_distilbert/checkpoint-592/trainer_state.json +462 -0
finetuned_distilbert/checkpoint-592/training_args.bin +3 -0
finetuned_distilbert/checkpoint-888/config.json +25 -0
finetuned_distilbert/checkpoint-888/model.safetensors +3 -0
finetuned_distilbert/checkpoint-888/optimizer.pt +3 -0
finetuned_distilbert/checkpoint-888/rng_state.pth +3 -0
finetuned_distilbert/checkpoint-888/scheduler.pt +3 -0
finetuned_distilbert/checkpoint-888/trainer_state.json +673 -0
finetuned_distilbert/checkpoint-888/training_args.bin +3 -0
finetuned_distilbert/config.json +25 -0
finetuned_distilbert/model.safetensors +3 -0
finetuned_distilbert/special_tokens_map.json +7 -0
finetuned_distilbert/tokenizer.json +0 -0
finetuned_distilbert/tokenizer_config.json +55 -0
finetuned_distilbert/training_args.bin +3 -0
finetuned_distilbert/vocab.txt +0 -0

finetuned_distilbert/checkpoint-296/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-cased-distilled-squad",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForQuestionAnswering"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": true,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "vocab_size": 28996
+}

finetuned_distilbert/checkpoint-296/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af8a9df62f54bd4b92b3fdb569ebdb2b6e6a3b94ae6cab7e360b1e291049f869
+size 260782152

finetuned_distilbert/checkpoint-296/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc0edbc3aa5afe0dc6ba3a5a6e511fc60a9470d7ddea2976bc663e615d22e88d
+size 521625611

finetuned_distilbert/checkpoint-296/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fea74348d064456dc86781e4e54bd1d504da2f98a52fe2589964204c7b29477
+size 14645

finetuned_distilbert/checkpoint-296/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7959987c50bca324a6a5ff3fb0dd4e95ee53efcf0283c72dfddef23174d9820
+size 1465

finetuned_distilbert/checkpoint-296/trainer_state.json ADDED Viewed

	@@ -0,0 +1,244 @@

+{
+  "best_metric": 0.0008619087748229504,
+  "best_model_checkpoint": "/content/drive/My Drive/BonsAI/RAG/finetuned_distilbert/checkpoint-296",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 296,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.033783783783783786,
+      "grad_norm": 63.3484992980957,
+      "learning_rate": 2.696629213483146e-06,
+      "loss": 1.5891,
+      "step": 10
+    },
+    {
+      "epoch": 0.06756756756756757,
+      "grad_norm": 12.823508262634277,
+      "learning_rate": 6.067415730337078e-06,
+      "loss": 0.9926,
+      "step": 20
+    },
+    {
+      "epoch": 0.10135135135135136,
+      "grad_norm": 10.632203102111816,
+      "learning_rate": 9.438202247191012e-06,
+      "loss": 0.4432,
+      "step": 30
+    },
+    {
+      "epoch": 0.13513513513513514,
+      "grad_norm": 1.4367406368255615,
+      "learning_rate": 1.2808988764044944e-05,
+      "loss": 0.1425,
+      "step": 40
+    },
+    {
+      "epoch": 0.16891891891891891,
+      "grad_norm": 5.937399387359619,
+      "learning_rate": 1.6179775280898875e-05,
+      "loss": 0.0221,
+      "step": 50
+    },
+    {
+      "epoch": 0.20270270270270271,
+      "grad_norm": 0.006844044663012028,
+      "learning_rate": 1.955056179775281e-05,
+      "loss": 0.0338,
+      "step": 60
+    },
+    {
+      "epoch": 0.23648648648648649,
+      "grad_norm": 0.0062227933667600155,
+      "learning_rate": 2.292134831460674e-05,
+      "loss": 0.0188,
+      "step": 70
+    },
+    {
+      "epoch": 0.2702702702702703,
+      "grad_norm": 0.9754657745361328,
+      "learning_rate": 2.595505617977528e-05,
+      "loss": 0.0298,
+      "step": 80
+    },
+    {
+      "epoch": 0.30405405405405406,
+      "grad_norm": 0.004088506102561951,
+      "learning_rate": 2.932584269662921e-05,
+      "loss": 0.0006,
+      "step": 90
+    },
+    {
+      "epoch": 0.33783783783783783,
+      "grad_norm": 0.0017175301909446716,
+      "learning_rate": 2.9992579868275392e-05,
+      "loss": 0.0011,
+      "step": 100
+    },
+    {
+      "epoch": 0.3716216216216216,
+      "grad_norm": 2.027472734451294,
+      "learning_rate": 2.996244816439765e-05,
+      "loss": 0.001,
+      "step": 110
+    },
+    {
+      "epoch": 0.40540540540540543,
+      "grad_norm": 0.12049974501132965,
+      "learning_rate": 2.990918767154285e-05,
+      "loss": 0.0137,
+      "step": 120
+    },
+    {
+      "epoch": 0.4391891891891892,
+      "grad_norm": 0.0021126759238541126,
+      "learning_rate": 2.983288071919922e-05,
+      "loss": 0.0016,
+      "step": 130
+    },
+    {
+      "epoch": 0.47297297297297297,
+      "grad_norm": 0.008341561071574688,
+      "learning_rate": 2.9733645261820407e-05,
+      "loss": 0.0012,
+      "step": 140
+    },
+    {
+      "epoch": 0.5067567567567568,
+      "grad_norm": 0.001279486226849258,
+      "learning_rate": 2.9611634696492718e-05,
+      "loss": 0.0235,
+      "step": 150
+    },
+    {
+      "epoch": 0.5405405405405406,
+      "grad_norm": 0.0039567407220602036,
+      "learning_rate": 2.946703762581565e-05,
+      "loss": 0.0008,
+      "step": 160
+    },
+    {
+      "epoch": 0.5743243243243243,
+      "grad_norm": 0.0008406731067225337,
+      "learning_rate": 2.9300077566362e-05,
+      "loss": 0.0002,
+      "step": 170
+    },
+    {
+      "epoch": 0.6081081081081081,
+      "grad_norm": 0.0006949682137928903,
+      "learning_rate": 2.91110126031684e-05,
+      "loss": 0.0001,
+      "step": 180
+    },
+    {
+      "epoch": 0.6418918918918919,
+      "grad_norm": 0.0053464132361114025,
+      "learning_rate": 2.8900134990790303e-05,
+      "loss": 0.0001,
+      "step": 190
+    },
+    {
+      "epoch": 0.6756756756756757,
+      "grad_norm": 37.02953338623047,
+      "learning_rate": 2.8667770701538055e-05,
+      "loss": 0.0478,
+      "step": 200
+    },
+    {
+      "epoch": 0.7094594594594594,
+      "grad_norm": 1.9292826652526855,
+      "learning_rate": 2.8414278921592484e-05,
+      "loss": 0.0009,
+      "step": 210
+    },
+    {
+      "epoch": 0.7432432432432432,
+      "grad_norm": 0.0011820381041616201,
+      "learning_rate": 2.8140051495778807e-05,
+      "loss": 0.0009,
+      "step": 220
+    },
+    {
+      "epoch": 0.777027027027027,
+      "grad_norm": 0.0016253776848316193,
+      "learning_rate": 2.7845512321857174e-05,
+      "loss": 0.0471,
+      "step": 230
+    },
+    {
+      "epoch": 0.8108108108108109,
+      "grad_norm": 0.0026769598480314016,
+      "learning_rate": 2.7531116695266156e-05,
+      "loss": 0.0093,
+      "step": 240
+    },
+    {
+      "epoch": 0.8445945945945946,
+      "grad_norm": 0.01816718466579914,
+      "learning_rate": 2.7197350605332033e-05,
+      "loss": 0.0019,
+      "step": 250
+    },
+    {
+      "epoch": 0.8783783783783784,
+      "grad_norm": 0.02628684975206852,
+      "learning_rate": 2.6844729984031765e-05,
+      "loss": 0.0144,
+      "step": 260
+    },
+    {
+      "epoch": 0.9121621621621622,
+      "grad_norm": 0.005144517868757248,
+      "learning_rate": 2.6473799908471016e-05,
+      "loss": 0.0004,
+      "step": 270
+    },
+    {
+      "epoch": 0.9459459459459459,
+      "grad_norm": 0.0019285095622763038,
+      "learning_rate": 2.6085133758309887e-05,
+      "loss": 0.0003,
+      "step": 280
+    },
+    {
+      "epoch": 0.9797297297297297,
+      "grad_norm": 0.0016680621774867177,
+      "learning_rate": 2.5679332329438924e-05,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.0008619087748229504,
+      "eval_runtime": 1.2802,
+      "eval_samples_per_second": 205.444,
+      "eval_steps_per_second": 51.556,
+      "step": 296
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 888,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 231745936181760.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

finetuned_distilbert/checkpoint-296/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9988663bd319a438514dff5ffaf0334f9bcb456c0dcfeff35874f6030035c433
+size 5649

finetuned_distilbert/checkpoint-592/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-cased-distilled-squad",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForQuestionAnswering"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": true,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "vocab_size": 28996
+}

finetuned_distilbert/checkpoint-592/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89a37041947570ce75dbdd0622e4feba2c2f716b3dd31c5acf8c77c6b42a561c
+size 260782152

finetuned_distilbert/checkpoint-592/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38f3b6037e0614df73850681b863603d587ca9ccc1689b28187c0e00e1656fc8
+size 14645

finetuned_distilbert/checkpoint-592/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c35ec423c0634d869dd8ec1b9387f3973c45562d7d308287e7e995066436e53
+size 1465

finetuned_distilbert/checkpoint-592/trainer_state.json ADDED Viewed

	@@ -0,0 +1,462 @@

+{
+  "best_metric": 2.672690470717498e-06,
+  "best_model_checkpoint": "/content/drive/My Drive/BonsAI/RAG/finetuned_distilbert/checkpoint-592",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 592,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.033783783783783786,
+      "grad_norm": 63.3484992980957,
+      "learning_rate": 2.696629213483146e-06,
+      "loss": 1.5891,
+      "step": 10
+    },
+    {
+      "epoch": 0.06756756756756757,
+      "grad_norm": 12.823508262634277,
+      "learning_rate": 6.067415730337078e-06,
+      "loss": 0.9926,
+      "step": 20
+    },
+    {
+      "epoch": 0.10135135135135136,
+      "grad_norm": 10.632203102111816,
+      "learning_rate": 9.438202247191012e-06,
+      "loss": 0.4432,
+      "step": 30
+    },
+    {
+      "epoch": 0.13513513513513514,
+      "grad_norm": 1.4367406368255615,
+      "learning_rate": 1.2808988764044944e-05,
+      "loss": 0.1425,
+      "step": 40
+    },
+    {
+      "epoch": 0.16891891891891891,
+      "grad_norm": 5.937399387359619,
+      "learning_rate": 1.6179775280898875e-05,
+      "loss": 0.0221,
+      "step": 50
+    },
+    {
+      "epoch": 0.20270270270270271,
+      "grad_norm": 0.006844044663012028,
+      "learning_rate": 1.955056179775281e-05,
+      "loss": 0.0338,
+      "step": 60
+    },
+    {
+      "epoch": 0.23648648648648649,
+      "grad_norm": 0.0062227933667600155,
+      "learning_rate": 2.292134831460674e-05,
+      "loss": 0.0188,
+      "step": 70
+    },
+    {
+      "epoch": 0.2702702702702703,
+      "grad_norm": 0.9754657745361328,
+      "learning_rate": 2.595505617977528e-05,
+      "loss": 0.0298,
+      "step": 80
+    },
+    {
+      "epoch": 0.30405405405405406,
+      "grad_norm": 0.004088506102561951,
+      "learning_rate": 2.932584269662921e-05,
+      "loss": 0.0006,
+      "step": 90
+    },
+    {
+      "epoch": 0.33783783783783783,
+      "grad_norm": 0.0017175301909446716,
+      "learning_rate": 2.9992579868275392e-05,
+      "loss": 0.0011,
+      "step": 100
+    },
+    {
+      "epoch": 0.3716216216216216,
+      "grad_norm": 2.027472734451294,
+      "learning_rate": 2.996244816439765e-05,
+      "loss": 0.001,
+      "step": 110
+    },
+    {
+      "epoch": 0.40540540540540543,
+      "grad_norm": 0.12049974501132965,
+      "learning_rate": 2.990918767154285e-05,
+      "loss": 0.0137,
+      "step": 120
+    },
+    {
+      "epoch": 0.4391891891891892,
+      "grad_norm": 0.0021126759238541126,
+      "learning_rate": 2.983288071919922e-05,
+      "loss": 0.0016,
+      "step": 130
+    },
+    {
+      "epoch": 0.47297297297297297,
+      "grad_norm": 0.008341561071574688,
+      "learning_rate": 2.9733645261820407e-05,
+      "loss": 0.0012,
+      "step": 140
+    },
+    {
+      "epoch": 0.5067567567567568,
+      "grad_norm": 0.001279486226849258,
+      "learning_rate": 2.9611634696492718e-05,
+      "loss": 0.0235,
+      "step": 150
+    },
+    {
+      "epoch": 0.5405405405405406,
+      "grad_norm": 0.0039567407220602036,
+      "learning_rate": 2.946703762581565e-05,
+      "loss": 0.0008,
+      "step": 160
+    },
+    {
+      "epoch": 0.5743243243243243,
+      "grad_norm": 0.0008406731067225337,
+      "learning_rate": 2.9300077566362e-05,
+      "loss": 0.0002,
+      "step": 170
+    },
+    {
+      "epoch": 0.6081081081081081,
+      "grad_norm": 0.0006949682137928903,
+      "learning_rate": 2.91110126031684e-05,
+      "loss": 0.0001,
+      "step": 180
+    },
+    {
+      "epoch": 0.6418918918918919,
+      "grad_norm": 0.0053464132361114025,
+      "learning_rate": 2.8900134990790303e-05,
+      "loss": 0.0001,
+      "step": 190
+    },
+    {
+      "epoch": 0.6756756756756757,
+      "grad_norm": 37.02953338623047,
+      "learning_rate": 2.8667770701538055e-05,
+      "loss": 0.0478,
+      "step": 200
+    },
+    {
+      "epoch": 0.7094594594594594,
+      "grad_norm": 1.9292826652526855,
+      "learning_rate": 2.8414278921592484e-05,
+      "loss": 0.0009,
+      "step": 210
+    },
+    {
+      "epoch": 0.7432432432432432,
+      "grad_norm": 0.0011820381041616201,
+      "learning_rate": 2.8140051495778807e-05,
+      "loss": 0.0009,
+      "step": 220
+    },
+    {
+      "epoch": 0.777027027027027,
+      "grad_norm": 0.0016253776848316193,
+      "learning_rate": 2.7845512321857174e-05,
+      "loss": 0.0471,
+      "step": 230
+    },
+    {
+      "epoch": 0.8108108108108109,
+      "grad_norm": 0.0026769598480314016,
+      "learning_rate": 2.7531116695266156e-05,
+      "loss": 0.0093,
+      "step": 240
+    },
+    {
+      "epoch": 0.8445945945945946,
+      "grad_norm": 0.01816718466579914,
+      "learning_rate": 2.7197350605332033e-05,
+      "loss": 0.0019,
+      "step": 250
+    },
+    {
+      "epoch": 0.8783783783783784,
+      "grad_norm": 0.02628684975206852,
+      "learning_rate": 2.6844729984031765e-05,
+      "loss": 0.0144,
+      "step": 260
+    },
+    {
+      "epoch": 0.9121621621621622,
+      "grad_norm": 0.005144517868757248,
+      "learning_rate": 2.6473799908471016e-05,
+      "loss": 0.0004,
+      "step": 270
+    },
+    {
+      "epoch": 0.9459459459459459,
+      "grad_norm": 0.0019285095622763038,
+      "learning_rate": 2.6085133758309887e-05,
+      "loss": 0.0003,
+      "step": 280
+    },
+    {
+      "epoch": 0.9797297297297297,
+      "grad_norm": 0.0016680621774867177,
+      "learning_rate": 2.5679332329438924e-05,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.0008619087748229504,
+      "eval_runtime": 1.2802,
+      "eval_samples_per_second": 205.444,
+      "eval_steps_per_second": 51.556,
+      "step": 296
+    },
+    {
+      "epoch": 1.0135135135135136,
+      "grad_norm": 0.0004233259242027998,
+      "learning_rate": 2.525702290527538e-05,
+      "loss": 0.0001,
+      "step": 300
+    },
+    {
+      "epoch": 1.0472972972972974,
+      "grad_norm": 0.0012364143040031195,
+      "learning_rate": 2.4818858287115342e-05,
+      "loss": 0.0,
+      "step": 310
+    },
+    {
+      "epoch": 1.0810810810810811,
+      "grad_norm": 0.0005589796346612275,
+      "learning_rate": 2.4365515785040646e-05,
+      "loss": 0.0007,
+      "step": 320
+    },
+    {
+      "epoch": 1.114864864864865,
+      "grad_norm": 0.002292066579684615,
+      "learning_rate": 2.3897696170940328e-05,
+      "loss": 0.0001,
+      "step": 330
+    },
+    {
+      "epoch": 1.1486486486486487,
+      "grad_norm": 0.00015610487025696784,
+      "learning_rate": 2.3416122595265107e-05,
+      "loss": 0.0001,
+      "step": 340
+    },
+    {
+      "epoch": 1.1824324324324325,
+      "grad_norm": 0.0001659117260714993,
+      "learning_rate": 2.29215394691893e-05,
+      "loss": 0.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.2162162162162162,
+      "grad_norm": 0.0006985558429732919,
+      "learning_rate": 2.2414711313908155e-05,
+      "loss": 0.0001,
+      "step": 360
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.00019110628636553884,
+      "learning_rate": 2.1896421578849363e-05,
+      "loss": 0.0,
+      "step": 370
+    },
+    {
+      "epoch": 1.2837837837837838,
+      "grad_norm": 0.00018775821081362665,
+      "learning_rate": 2.1367471430625464e-05,
+      "loss": 0.0,
+      "step": 380
+    },
+    {
+      "epoch": 1.3175675675675675,
+      "grad_norm": 0.011069620959460735,
+      "learning_rate": 2.0828678514599244e-05,
+      "loss": 0.0029,
+      "step": 390
+    },
+    {
+      "epoch": 1.3513513513513513,
+      "grad_norm": 0.0014019826194271445,
+      "learning_rate": 2.0280875690976484e-05,
+      "loss": 0.0,
+      "step": 400
+    },
+    {
+      "epoch": 1.385135135135135,
+      "grad_norm": 0.0038798104505985975,
+      "learning_rate": 1.9724909747379727e-05,
+      "loss": 0.0,
+      "step": 410
+    },
+    {
+      "epoch": 1.4189189189189189,
+      "grad_norm": 0.0006560618057847023,
+      "learning_rate": 1.9161640089893237e-05,
+      "loss": 0.0092,
+      "step": 420
+    },
+    {
+      "epoch": 1.4527027027027026,
+      "grad_norm": 0.001959842164069414,
+      "learning_rate": 1.859193741460243e-05,
+      "loss": 0.0,
+      "step": 430
+    },
+    {
+      "epoch": 1.4864864864864864,
+      "grad_norm": 0.0003938580339308828,
+      "learning_rate": 1.8016682361681422e-05,
+      "loss": 0.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.5202702702702702,
+      "grad_norm": 0.014895599335432053,
+      "learning_rate": 1.7436764154109012e-05,
+      "loss": 0.0,
+      "step": 450
+    },
+    {
+      "epoch": 1.554054054054054,
+      "grad_norm": 0.020251600071787834,
+      "learning_rate": 1.6853079223117554e-05,
+      "loss": 0.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.5878378378378377,
+      "grad_norm": 0.00020745389338117093,
+      "learning_rate": 1.6266529822499306e-05,
+      "loss": 0.0001,
+      "step": 470
+    },
+    {
+      "epoch": 1.6216216216216215,
+      "grad_norm": 0.0014518573880195618,
+      "learning_rate": 1.5678022633912405e-05,
+      "loss": 0.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.6554054054054053,
+      "grad_norm": 0.0029633333906531334,
+      "learning_rate": 1.508846736534227e-05,
+      "loss": 0.0,
+      "step": 490
+    },
+    {
+      "epoch": 1.689189189189189,
+      "grad_norm": 0.005655954591929913,
+      "learning_rate": 1.4498775344884886e-05,
+      "loss": 0.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.722972972972973,
+      "grad_norm": 0.014498586766421795,
+      "learning_rate": 1.3909858112025813e-05,
+      "loss": 0.0,
+      "step": 510
+    },
+    {
+      "epoch": 1.7567567567567568,
+      "grad_norm": 0.00017455824126955122,
+      "learning_rate": 1.3322626008592419e-05,
+      "loss": 0.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.7905405405405406,
+      "grad_norm": 0.0002139418647857383,
+      "learning_rate": 1.2737986771557401e-05,
+      "loss": 0.0,
+      "step": 530
+    },
+    {
+      "epoch": 1.8243243243243243,
+      "grad_norm": 0.0010211545741185546,
+      "learning_rate": 1.2156844129868893e-05,
+      "loss": 0.007,
+      "step": 540
+    },
+    {
+      "epoch": 1.8581081081081081,
+      "grad_norm": 0.00027997707366012037,
+      "learning_rate": 1.1580096407476113e-05,
+      "loss": 0.0,
+      "step": 550
+    },
+    {
+      "epoch": 1.8918918918918919,
+      "grad_norm": 0.000816286716144532,
+      "learning_rate": 1.1008635134710045e-05,
+      "loss": 0.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.9256756756756757,
+      "grad_norm": 0.00012246037658769637,
+      "learning_rate": 1.0443343670165568e-05,
+      "loss": 0.0,
+      "step": 570
+    },
+    {
+      "epoch": 1.9594594594594594,
+      "grad_norm": 0.0003848487394861877,
+      "learning_rate": 9.885095835215368e-06,
+      "loss": 0.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.9932432432432432,
+      "grad_norm": 0.002730418462306261,
+      "learning_rate": 9.334754563266447e-06,
+      "loss": 0.0,
+      "step": 590
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.672690470717498e-06,
+      "eval_runtime": 1.3117,
+      "eval_samples_per_second": 200.508,
+      "eval_steps_per_second": 50.318,
+      "step": 592
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 888,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 463491872363520.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

finetuned_distilbert/checkpoint-592/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9988663bd319a438514dff5ffaf0334f9bcb456c0dcfeff35874f6030035c433
+size 5649

finetuned_distilbert/checkpoint-888/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-cased-distilled-squad",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForQuestionAnswering"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": true,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "vocab_size": 28996
+}

finetuned_distilbert/checkpoint-888/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1815c6277178bd2bd1b6672d90d44328ba0166680879ad203e18287018b641e
+size 260782152

finetuned_distilbert/checkpoint-888/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3eefcfd7bd840bf480153e604c33439bfa2e5e4f678f4a8c419a50f2c140840d
+size 521625611

finetuned_distilbert/checkpoint-888/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f56d9ac2106efbb5eacd3b2cd2688d6b1b7e3129fdb61e2e170417d303ce7ad
+size 14645

finetuned_distilbert/checkpoint-888/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48877d6f8b876923600fd7b4ed236faffe4ced54e13b5bc55a27052ebdd6a2e9
+size 1465

finetuned_distilbert/checkpoint-888/trainer_state.json ADDED Viewed

	@@ -0,0 +1,673 @@

+{
+  "best_metric": 2.4147814201569417e-06,
+  "best_model_checkpoint": "/content/drive/My Drive/BonsAI/RAG/finetuned_distilbert/checkpoint-888",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 888,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.033783783783783786,
+      "grad_norm": 63.3484992980957,
+      "learning_rate": 2.696629213483146e-06,
+      "loss": 1.5891,
+      "step": 10
+    },
+    {
+      "epoch": 0.06756756756756757,
+      "grad_norm": 12.823508262634277,
+      "learning_rate": 6.067415730337078e-06,
+      "loss": 0.9926,
+      "step": 20
+    },
+    {
+      "epoch": 0.10135135135135136,
+      "grad_norm": 10.632203102111816,
+      "learning_rate": 9.438202247191012e-06,
+      "loss": 0.4432,
+      "step": 30
+    },
+    {
+      "epoch": 0.13513513513513514,
+      "grad_norm": 1.4367406368255615,
+      "learning_rate": 1.2808988764044944e-05,
+      "loss": 0.1425,
+      "step": 40
+    },
+    {
+      "epoch": 0.16891891891891891,
+      "grad_norm": 5.937399387359619,
+      "learning_rate": 1.6179775280898875e-05,
+      "loss": 0.0221,
+      "step": 50
+    },
+    {
+      "epoch": 0.20270270270270271,
+      "grad_norm": 0.006844044663012028,
+      "learning_rate": 1.955056179775281e-05,
+      "loss": 0.0338,
+      "step": 60
+    },
+    {
+      "epoch": 0.23648648648648649,
+      "grad_norm": 0.0062227933667600155,
+      "learning_rate": 2.292134831460674e-05,
+      "loss": 0.0188,
+      "step": 70
+    },
+    {
+      "epoch": 0.2702702702702703,
+      "grad_norm": 0.9754657745361328,
+      "learning_rate": 2.595505617977528e-05,
+      "loss": 0.0298,
+      "step": 80
+    },
+    {
+      "epoch": 0.30405405405405406,
+      "grad_norm": 0.004088506102561951,
+      "learning_rate": 2.932584269662921e-05,
+      "loss": 0.0006,
+      "step": 90
+    },
+    {
+      "epoch": 0.33783783783783783,
+      "grad_norm": 0.0017175301909446716,
+      "learning_rate": 2.9992579868275392e-05,
+      "loss": 0.0011,
+      "step": 100
+    },
+    {
+      "epoch": 0.3716216216216216,
+      "grad_norm": 2.027472734451294,
+      "learning_rate": 2.996244816439765e-05,
+      "loss": 0.001,
+      "step": 110
+    },
+    {
+      "epoch": 0.40540540540540543,
+      "grad_norm": 0.12049974501132965,
+      "learning_rate": 2.990918767154285e-05,
+      "loss": 0.0137,
+      "step": 120
+    },
+    {
+      "epoch": 0.4391891891891892,
+      "grad_norm": 0.0021126759238541126,
+      "learning_rate": 2.983288071919922e-05,
+      "loss": 0.0016,
+      "step": 130
+    },
+    {
+      "epoch": 0.47297297297297297,
+      "grad_norm": 0.008341561071574688,
+      "learning_rate": 2.9733645261820407e-05,
+      "loss": 0.0012,
+      "step": 140
+    },
+    {
+      "epoch": 0.5067567567567568,
+      "grad_norm": 0.001279486226849258,
+      "learning_rate": 2.9611634696492718e-05,
+      "loss": 0.0235,
+      "step": 150
+    },
+    {
+      "epoch": 0.5405405405405406,
+      "grad_norm": 0.0039567407220602036,
+      "learning_rate": 2.946703762581565e-05,
+      "loss": 0.0008,
+      "step": 160
+    },
+    {
+      "epoch": 0.5743243243243243,
+      "grad_norm": 0.0008406731067225337,
+      "learning_rate": 2.9300077566362e-05,
+      "loss": 0.0002,
+      "step": 170
+    },
+    {
+      "epoch": 0.6081081081081081,
+      "grad_norm": 0.0006949682137928903,
+      "learning_rate": 2.91110126031684e-05,
+      "loss": 0.0001,
+      "step": 180
+    },
+    {
+      "epoch": 0.6418918918918919,
+      "grad_norm": 0.0053464132361114025,
+      "learning_rate": 2.8900134990790303e-05,
+      "loss": 0.0001,
+      "step": 190
+    },
+    {
+      "epoch": 0.6756756756756757,
+      "grad_norm": 37.02953338623047,
+      "learning_rate": 2.8667770701538055e-05,
+      "loss": 0.0478,
+      "step": 200
+    },
+    {
+      "epoch": 0.7094594594594594,
+      "grad_norm": 1.9292826652526855,
+      "learning_rate": 2.8414278921592484e-05,
+      "loss": 0.0009,
+      "step": 210
+    },
+    {
+      "epoch": 0.7432432432432432,
+      "grad_norm": 0.0011820381041616201,
+      "learning_rate": 2.8140051495778807e-05,
+      "loss": 0.0009,
+      "step": 220
+    },
+    {
+      "epoch": 0.777027027027027,
+      "grad_norm": 0.0016253776848316193,
+      "learning_rate": 2.7845512321857174e-05,
+      "loss": 0.0471,
+      "step": 230
+    },
+    {
+      "epoch": 0.8108108108108109,
+      "grad_norm": 0.0026769598480314016,
+      "learning_rate": 2.7531116695266156e-05,
+      "loss": 0.0093,
+      "step": 240
+    },
+    {
+      "epoch": 0.8445945945945946,
+      "grad_norm": 0.01816718466579914,
+      "learning_rate": 2.7197350605332033e-05,
+      "loss": 0.0019,
+      "step": 250
+    },
+    {
+      "epoch": 0.8783783783783784,
+      "grad_norm": 0.02628684975206852,
+      "learning_rate": 2.6844729984031765e-05,
+      "loss": 0.0144,
+      "step": 260
+    },
+    {
+      "epoch": 0.9121621621621622,
+      "grad_norm": 0.005144517868757248,
+      "learning_rate": 2.6473799908471016e-05,
+      "loss": 0.0004,
+      "step": 270
+    },
+    {
+      "epoch": 0.9459459459459459,
+      "grad_norm": 0.0019285095622763038,
+      "learning_rate": 2.6085133758309887e-05,
+      "loss": 0.0003,
+      "step": 280
+    },
+    {
+      "epoch": 0.9797297297297297,
+      "grad_norm": 0.0016680621774867177,
+      "learning_rate": 2.5679332329438924e-05,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.0008619087748229504,
+      "eval_runtime": 1.2802,
+      "eval_samples_per_second": 205.444,
+      "eval_steps_per_second": 51.556,
+      "step": 296
+    },
+    {
+      "epoch": 1.0135135135135136,
+      "grad_norm": 0.0004233259242027998,
+      "learning_rate": 2.525702290527538e-05,
+      "loss": 0.0001,
+      "step": 300
+    },
+    {
+      "epoch": 1.0472972972972974,
+      "grad_norm": 0.0012364143040031195,
+      "learning_rate": 2.4818858287115342e-05,
+      "loss": 0.0,
+      "step": 310
+    },
+    {
+      "epoch": 1.0810810810810811,
+      "grad_norm": 0.0005589796346612275,
+      "learning_rate": 2.4365515785040646e-05,
+      "loss": 0.0007,
+      "step": 320
+    },
+    {
+      "epoch": 1.114864864864865,
+      "grad_norm": 0.002292066579684615,
+      "learning_rate": 2.3897696170940328e-05,
+      "loss": 0.0001,
+      "step": 330
+    },
+    {
+      "epoch": 1.1486486486486487,
+      "grad_norm": 0.00015610487025696784,
+      "learning_rate": 2.3416122595265107e-05,
+      "loss": 0.0001,
+      "step": 340
+    },
+    {
+      "epoch": 1.1824324324324325,
+      "grad_norm": 0.0001659117260714993,
+      "learning_rate": 2.29215394691893e-05,
+      "loss": 0.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.2162162162162162,
+      "grad_norm": 0.0006985558429732919,
+      "learning_rate": 2.2414711313908155e-05,
+      "loss": 0.0001,
+      "step": 360
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.00019110628636553884,
+      "learning_rate": 2.1896421578849363e-05,
+      "loss": 0.0,
+      "step": 370
+    },
+    {
+      "epoch": 1.2837837837837838,
+      "grad_norm": 0.00018775821081362665,
+      "learning_rate": 2.1367471430625464e-05,
+      "loss": 0.0,
+      "step": 380
+    },
+    {
+      "epoch": 1.3175675675675675,
+      "grad_norm": 0.011069620959460735,
+      "learning_rate": 2.0828678514599244e-05,
+      "loss": 0.0029,
+      "step": 390
+    },
+    {
+      "epoch": 1.3513513513513513,
+      "grad_norm": 0.0014019826194271445,
+      "learning_rate": 2.0280875690976484e-05,
+      "loss": 0.0,
+      "step": 400
+    },
+    {
+      "epoch": 1.385135135135135,
+      "grad_norm": 0.0038798104505985975,
+      "learning_rate": 1.9724909747379727e-05,
+      "loss": 0.0,
+      "step": 410
+    },
+    {
+      "epoch": 1.4189189189189189,
+      "grad_norm": 0.0006560618057847023,
+      "learning_rate": 1.9161640089893237e-05,
+      "loss": 0.0092,
+      "step": 420
+    },
+    {
+      "epoch": 1.4527027027027026,
+      "grad_norm": 0.001959842164069414,
+      "learning_rate": 1.859193741460243e-05,
+      "loss": 0.0,
+      "step": 430
+    },
+    {
+      "epoch": 1.4864864864864864,
+      "grad_norm": 0.0003938580339308828,
+      "learning_rate": 1.8016682361681422e-05,
+      "loss": 0.0,
+      "step": 440
+    },
+    {
+      "epoch": 1.5202702702702702,
+      "grad_norm": 0.014895599335432053,
+      "learning_rate": 1.7436764154109012e-05,
+      "loss": 0.0,
+      "step": 450
+    },
+    {
+      "epoch": 1.554054054054054,
+      "grad_norm": 0.020251600071787834,
+      "learning_rate": 1.6853079223117554e-05,
+      "loss": 0.0,
+      "step": 460
+    },
+    {
+      "epoch": 1.5878378378378377,
+      "grad_norm": 0.00020745389338117093,
+      "learning_rate": 1.6266529822499306e-05,
+      "loss": 0.0001,
+      "step": 470
+    },
+    {
+      "epoch": 1.6216216216216215,
+      "grad_norm": 0.0014518573880195618,
+      "learning_rate": 1.5678022633912405e-05,
+      "loss": 0.0,
+      "step": 480
+    },
+    {
+      "epoch": 1.6554054054054053,
+      "grad_norm": 0.0029633333906531334,
+      "learning_rate": 1.508846736534227e-05,
+      "loss": 0.0,
+      "step": 490
+    },
+    {
+      "epoch": 1.689189189189189,
+      "grad_norm": 0.005655954591929913,
+      "learning_rate": 1.4498775344884886e-05,
+      "loss": 0.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.722972972972973,
+      "grad_norm": 0.014498586766421795,
+      "learning_rate": 1.3909858112025813e-05,
+      "loss": 0.0,
+      "step": 510
+    },
+    {
+      "epoch": 1.7567567567567568,
+      "grad_norm": 0.00017455824126955122,
+      "learning_rate": 1.3322626008592419e-05,
+      "loss": 0.0,
+      "step": 520
+    },
+    {
+      "epoch": 1.7905405405405406,
+      "grad_norm": 0.0002139418647857383,
+      "learning_rate": 1.2737986771557401e-05,
+      "loss": 0.0,
+      "step": 530
+    },
+    {
+      "epoch": 1.8243243243243243,
+      "grad_norm": 0.0010211545741185546,
+      "learning_rate": 1.2156844129868893e-05,
+      "loss": 0.007,
+      "step": 540
+    },
+    {
+      "epoch": 1.8581081081081081,
+      "grad_norm": 0.00027997707366012037,
+      "learning_rate": 1.1580096407476113e-05,
+      "loss": 0.0,
+      "step": 550
+    },
+    {
+      "epoch": 1.8918918918918919,
+      "grad_norm": 0.000816286716144532,
+      "learning_rate": 1.1008635134710045e-05,
+      "loss": 0.0,
+      "step": 560
+    },
+    {
+      "epoch": 1.9256756756756757,
+      "grad_norm": 0.00012246037658769637,
+      "learning_rate": 1.0443343670165568e-05,
+      "loss": 0.0,
+      "step": 570
+    },
+    {
+      "epoch": 1.9594594594594594,
+      "grad_norm": 0.0003848487394861877,
+      "learning_rate": 9.885095835215368e-06,
+      "loss": 0.0,
+      "step": 580
+    },
+    {
+      "epoch": 1.9932432432432432,
+      "grad_norm": 0.002730418462306261,
+      "learning_rate": 9.334754563266447e-06,
+      "loss": 0.0,
+      "step": 590
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.672690470717498e-06,
+      "eval_runtime": 1.3117,
+      "eval_samples_per_second": 200.508,
+      "eval_steps_per_second": 50.318,
+      "step": 592
+    },
+    {
+      "epoch": 2.027027027027027,
+      "grad_norm": 0.0005972671206109226,
+      "learning_rate": 8.793170565847057e-06,
+      "loss": 0.0,
+      "step": 600
+    },
+    {
+      "epoch": 2.060810810810811,
+      "grad_norm": 0.003524207742884755,
+      "learning_rate": 8.26118101758611e-06,
+      "loss": 0.0008,
+      "step": 610
+    },
+    {
+      "epoch": 2.0945945945945947,
+      "grad_norm": 0.0023349469993263483,
+      "learning_rate": 7.739608262117777e-06,
+      "loss": 0.0,
+      "step": 620
+    },
+    {
+      "epoch": 2.1283783783783785,
+      "grad_norm": 0.00018075505795422941,
+      "learning_rate": 7.229258540911695e-06,
+      "loss": 0.0,
+      "step": 630
+    },
+    {
+      "epoch": 2.1621621621621623,
+      "grad_norm": 0.0010670729679986835,
+      "learning_rate": 6.730920746993713e-06,
+      "loss": 0.0,
+      "step": 640
+    },
+    {
+      "epoch": 2.195945945945946,
+      "grad_norm": 0.0004548372235149145,
+      "learning_rate": 6.2453652054836095e-06,
+      "loss": 0.0,
+      "step": 650
+    },
+    {
+      "epoch": 2.22972972972973,
+      "grad_norm": 0.0005834044422954321,
+      "learning_rate": 5.773342482835027e-06,
+      "loss": 0.0,
+      "step": 660
+    },
+    {
+      "epoch": 2.2635135135135136,
+      "grad_norm": 0.0003634082095231861,
+      "learning_rate": 5.315582226617964e-06,
+      "loss": 0.0,
+      "step": 670
+    },
+    {
+      "epoch": 2.2972972972972974,
+      "grad_norm": 0.0003151243436150253,
+      "learning_rate": 4.872792037637571e-06,
+      "loss": 0.0,
+      "step": 680
+    },
+    {
+      "epoch": 2.331081081081081,
+      "grad_norm": 0.00016155642515514046,
+      "learning_rate": 4.445656376132542e-06,
+      "loss": 0.0,
+      "step": 690
+    },
+    {
+      "epoch": 2.364864864864865,
+      "grad_norm": 0.0002693350543268025,
+      "learning_rate": 4.034835503743947e-06,
+      "loss": 0.0,
+      "step": 700
+    },
+    {
+      "epoch": 2.3986486486486487,
+      "grad_norm": 0.00017396688053850085,
+      "learning_rate": 3.6409644628900156e-06,
+      "loss": 0.0,
+      "step": 710
+    },
+    {
+      "epoch": 2.4324324324324325,
+      "grad_norm": 0.00020753787248395383,
+      "learning_rate": 3.264652095124477e-06,
+      "loss": 0.0,
+      "step": 720
+    },
+    {
+      "epoch": 2.4662162162162162,
+      "grad_norm": 0.004645159002393484,
+      "learning_rate": 2.9064800999959553e-06,
+      "loss": 0.0,
+      "step": 730
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.00012094193516531959,
+      "learning_rate": 2.567002135863145e-06,
+      "loss": 0.0,
+      "step": 740
+    },
+    {
+      "epoch": 2.5337837837837838,
+      "grad_norm": 0.0003555043658707291,
+      "learning_rate": 2.2467429640557903e-06,
+      "loss": 0.0,
+      "step": 750
+    },
+    {
+      "epoch": 2.5675675675675675,
+      "grad_norm": 0.0024685899261385202,
+      "learning_rate": 1.9461976377043095e-06,
+      "loss": 0.0,
+      "step": 760
+    },
+    {
+      "epoch": 2.6013513513513513,
+      "grad_norm": 0.0003505544736981392,
+      "learning_rate": 1.6658307364921116e-06,
+      "loss": 0.0,
+      "step": 770
+    },
+    {
+      "epoch": 2.635135135135135,
+      "grad_norm": 0.0009317690273746848,
+      "learning_rate": 1.4060756485133851e-06,
+      "loss": 0.0,
+      "step": 780
+    },
+    {
+      "epoch": 2.668918918918919,
+      "grad_norm": 0.0001754916738718748,
+      "learning_rate": 1.1673339003465067e-06,
+      "loss": 0.0,
+      "step": 790
+    },
+    {
+      "epoch": 2.7027027027027026,
+      "grad_norm": 0.0002394665643805638,
+      "learning_rate": 9.499745363786405e-07,
+      "loss": 0.0,
+      "step": 800
+    },
+    {
+      "epoch": 2.7364864864864864,
+      "grad_norm": 0.00020074588246643543,
+      "learning_rate": 7.543335483409642e-07,
+      "loss": 0.0002,
+      "step": 810
+    },
+    {
+      "epoch": 2.77027027027027,
+      "grad_norm": 0.008309523575007915,
+      "learning_rate": 5.807133559363159e-07,
+      "loss": 0.0,
+      "step": 820
+    },
+    {
+      "epoch": 2.804054054054054,
+      "grad_norm": 0.0003275635826867074,
+      "learning_rate": 4.2938233936211824e-07,
+      "loss": 0.0,
+      "step": 830
+    },
+    {
+      "epoch": 2.8378378378378377,
+      "grad_norm": 0.0005050458130426705,
+      "learning_rate": 3.005744244511988e-07,
+      "loss": 0.0,
+      "step": 840
+    },
+    {
+      "epoch": 2.8716216216216215,
+      "grad_norm": 0.0005031975451856852,
+      "learning_rate": 1.9448872107179717e-07,
+      "loss": 0.0,
+      "step": 850
+    },
+    {
+      "epoch": 2.9054054054054053,
+      "grad_norm": 0.04222193360328674,
+      "learning_rate": 1.1128921534570347e-07,
+      "loss": 0.0,
+      "step": 860
+    },
+    {
+      "epoch": 2.939189189189189,
+      "grad_norm": 0.002297344384714961,
+      "learning_rate": 5.11045161602991e-08,
+      "loss": 0.0,
+      "step": 870
+    },
+    {
+      "epoch": 2.972972972972973,
+      "grad_norm": 0.00021804046991746873,
+      "learning_rate": 1.402765636634551e-08,
+      "loss": 0.0,
+      "step": 880
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 2.4147814201569417e-06,
+      "eval_runtime": 1.3958,
+      "eval_samples_per_second": 188.419,
+      "eval_steps_per_second": 47.284,
+      "step": 888
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 888,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 695237808545280.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

finetuned_distilbert/checkpoint-888/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9988663bd319a438514dff5ffaf0334f9bcb456c0dcfeff35874f6030035c433
+size 5649

finetuned_distilbert/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-cased-distilled-squad",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForQuestionAnswering"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": true,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "vocab_size": 28996
+}

finetuned_distilbert/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1815c6277178bd2bd1b6672d90d44328ba0166680879ad203e18287018b641e
+size 260782152

finetuned_distilbert/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

finetuned_distilbert/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

finetuned_distilbert/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

finetuned_distilbert/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9988663bd319a438514dff5ffaf0334f9bcb456c0dcfeff35874f6030035c433
+size 5649

finetuned_distilbert/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff