Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

config.json +26 -0
optimizer.pt +3 -0
pytorch_model.bin +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
tokenizer.json +0 -0
tokenizer_config.json +55 -0
trainer_state.json +775 -0
training_args.bin +3 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "_name_or_path": "intfloat/e5-base-v2",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.43.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:864d50e4126e45f39f26e7b21405955edb547e8d8d4b1a1d3a833b044e240078
+size 871297978

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e715e1af5cd5cc6baafc2f28a200a9f068b4fb59458acc6255a2518dc5b2b59b
+size 437996134

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:038235e47710842766eec88106fd85eccb3c65e3759cd8a2b2471421a7d95501
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91c3a4cacee02650c99cba1547310e127667d610f1d02da2c6f1f883440f9445
+size 1064

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,775 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 26.0,
+  "eval_steps": 500,
+  "global_step": 1066,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.24390243902439024,
+      "grad_norm": 153.20391845703125,
+      "learning_rate": 1e-05,
+      "loss": 14.3989,
+      "step": 10
+    },
+    {
+      "epoch": 0.4878048780487805,
+      "grad_norm": 98.0101318359375,
+      "learning_rate": 1e-05,
+      "loss": 9.4888,
+      "step": 20
+    },
+    {
+      "epoch": 0.7317073170731707,
+      "grad_norm": 66.21862030029297,
+      "learning_rate": 1e-05,
+      "loss": 7.1045,
+      "step": 30
+    },
+    {
+      "epoch": 0.975609756097561,
+      "grad_norm": 51.25503921508789,
+      "learning_rate": 1e-05,
+      "loss": 6.0752,
+      "step": 40
+    },
+    {
+      "epoch": 1.2195121951219512,
+      "grad_norm": 42.94609451293945,
+      "learning_rate": 1e-05,
+      "loss": 5.4784,
+      "step": 50
+    },
+    {
+      "epoch": 1.4634146341463414,
+      "grad_norm": 23.749080657958984,
+      "learning_rate": 1e-05,
+      "loss": 5.0407,
+      "step": 60
+    },
+    {
+      "epoch": 1.7073170731707317,
+      "grad_norm": 21.82228660583496,
+      "learning_rate": 1e-05,
+      "loss": 4.7377,
+      "step": 70
+    },
+    {
+      "epoch": 1.951219512195122,
+      "grad_norm": 16.767141342163086,
+      "learning_rate": 1e-05,
+      "loss": 4.6813,
+      "step": 80
+    },
+    {
+      "epoch": 2.1951219512195124,
+      "grad_norm": 15.738419532775879,
+      "learning_rate": 1e-05,
+      "loss": 4.5301,
+      "step": 90
+    },
+    {
+      "epoch": 2.4390243902439024,
+      "grad_norm": 15.758868217468262,
+      "learning_rate": 1e-05,
+      "loss": 4.4416,
+      "step": 100
+    },
+    {
+      "epoch": 2.682926829268293,
+      "grad_norm": 14.666833877563477,
+      "learning_rate": 1e-05,
+      "loss": 4.324,
+      "step": 110
+    },
+    {
+      "epoch": 2.926829268292683,
+      "grad_norm": 14.121631622314453,
+      "learning_rate": 1e-05,
+      "loss": 4.2509,
+      "step": 120
+    },
+    {
+      "epoch": 3.1707317073170733,
+      "grad_norm": 13.137129783630371,
+      "learning_rate": 1e-05,
+      "loss": 4.2184,
+      "step": 130
+    },
+    {
+      "epoch": 3.4146341463414633,
+      "grad_norm": 13.16901683807373,
+      "learning_rate": 1e-05,
+      "loss": 4.2392,
+      "step": 140
+    },
+    {
+      "epoch": 3.658536585365854,
+      "grad_norm": 12.6273193359375,
+      "learning_rate": 1e-05,
+      "loss": 4.2731,
+      "step": 150
+    },
+    {
+      "epoch": 3.902439024390244,
+      "grad_norm": 12.8707275390625,
+      "learning_rate": 1e-05,
+      "loss": 4.1833,
+      "step": 160
+    },
+    {
+      "epoch": 4.146341463414634,
+      "grad_norm": 12.259010314941406,
+      "learning_rate": 1e-05,
+      "loss": 4.1131,
+      "step": 170
+    },
+    {
+      "epoch": 4.390243902439025,
+      "grad_norm": 13.05242919921875,
+      "learning_rate": 1e-05,
+      "loss": 4.0821,
+      "step": 180
+    },
+    {
+      "epoch": 4.634146341463414,
+      "grad_norm": 12.534492492675781,
+      "learning_rate": 1e-05,
+      "loss": 4.1202,
+      "step": 190
+    },
+    {
+      "epoch": 4.878048780487805,
+      "grad_norm": 11.910691261291504,
+      "learning_rate": 1e-05,
+      "loss": 4.0793,
+      "step": 200
+    },
+    {
+      "epoch": 5.121951219512195,
+      "grad_norm": 13.948406219482422,
+      "learning_rate": 1e-05,
+      "loss": 3.9298,
+      "step": 210
+    },
+    {
+      "epoch": 5.365853658536586,
+      "grad_norm": 15.066719055175781,
+      "learning_rate": 1e-05,
+      "loss": 4.1399,
+      "step": 220
+    },
+    {
+      "epoch": 5.609756097560975,
+      "grad_norm": 12.302136421203613,
+      "learning_rate": 1e-05,
+      "loss": 3.9993,
+      "step": 230
+    },
+    {
+      "epoch": 5.853658536585366,
+      "grad_norm": 12.577598571777344,
+      "learning_rate": 1e-05,
+      "loss": 3.9465,
+      "step": 240
+    },
+    {
+      "epoch": 6.097560975609756,
+      "grad_norm": 12.180274963378906,
+      "learning_rate": 1e-05,
+      "loss": 3.8803,
+      "step": 250
+    },
+    {
+      "epoch": 6.341463414634147,
+      "grad_norm": 14.182092666625977,
+      "learning_rate": 1e-05,
+      "loss": 3.9006,
+      "step": 260
+    },
+    {
+      "epoch": 6.585365853658536,
+      "grad_norm": 15.906909942626953,
+      "learning_rate": 1e-05,
+      "loss": 3.8925,
+      "step": 270
+    },
+    {
+      "epoch": 6.829268292682927,
+      "grad_norm": 11.003096580505371,
+      "learning_rate": 1e-05,
+      "loss": 3.9061,
+      "step": 280
+    },
+    {
+      "epoch": 7.073170731707317,
+      "grad_norm": 12.84813404083252,
+      "learning_rate": 1e-05,
+      "loss": 3.8672,
+      "step": 290
+    },
+    {
+      "epoch": 7.317073170731708,
+      "grad_norm": 13.7793607711792,
+      "learning_rate": 1e-05,
+      "loss": 3.8265,
+      "step": 300
+    },
+    {
+      "epoch": 7.560975609756097,
+      "grad_norm": 15.24905776977539,
+      "learning_rate": 1e-05,
+      "loss": 3.7881,
+      "step": 310
+    },
+    {
+      "epoch": 7.804878048780488,
+      "grad_norm": 13.246805191040039,
+      "learning_rate": 1e-05,
+      "loss": 3.7045,
+      "step": 320
+    },
+    {
+      "epoch": 8.048780487804878,
+      "grad_norm": 13.150066375732422,
+      "learning_rate": 1e-05,
+      "loss": 3.6827,
+      "step": 330
+    },
+    {
+      "epoch": 8.292682926829269,
+      "grad_norm": 16.51359748840332,
+      "learning_rate": 1e-05,
+      "loss": 3.7162,
+      "step": 340
+    },
+    {
+      "epoch": 8.536585365853659,
+      "grad_norm": 12.987756729125977,
+      "learning_rate": 1e-05,
+      "loss": 3.8721,
+      "step": 350
+    },
+    {
+      "epoch": 8.78048780487805,
+      "grad_norm": 12.487648010253906,
+      "learning_rate": 1e-05,
+      "loss": 3.7642,
+      "step": 360
+    },
+    {
+      "epoch": 9.024390243902438,
+      "grad_norm": 13.399900436401367,
+      "learning_rate": 1e-05,
+      "loss": 3.5614,
+      "step": 370
+    },
+    {
+      "epoch": 9.268292682926829,
+      "grad_norm": 12.230491638183594,
+      "learning_rate": 1e-05,
+      "loss": 3.6516,
+      "step": 380
+    },
+    {
+      "epoch": 9.512195121951219,
+      "grad_norm": 12.481654167175293,
+      "learning_rate": 1e-05,
+      "loss": 3.5442,
+      "step": 390
+    },
+    {
+      "epoch": 9.75609756097561,
+      "grad_norm": 12.716032028198242,
+      "learning_rate": 1e-05,
+      "loss": 3.7811,
+      "step": 400
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 16.65569305419922,
+      "learning_rate": 1e-05,
+      "loss": 3.6132,
+      "step": 410
+    },
+    {
+      "epoch": 10.24390243902439,
+      "grad_norm": 12.62430477142334,
+      "learning_rate": 1e-05,
+      "loss": 3.6959,
+      "step": 420
+    },
+    {
+      "epoch": 10.487804878048781,
+      "grad_norm": 12.553898811340332,
+      "learning_rate": 1e-05,
+      "loss": 3.6677,
+      "step": 430
+    },
+    {
+      "epoch": 10.731707317073171,
+      "grad_norm": 14.134100914001465,
+      "learning_rate": 1e-05,
+      "loss": 3.678,
+      "step": 440
+    },
+    {
+      "epoch": 10.975609756097562,
+      "grad_norm": 12.144598960876465,
+      "learning_rate": 1e-05,
+      "loss": 3.6878,
+      "step": 450
+    },
+    {
+      "epoch": 11.21951219512195,
+      "grad_norm": 13.102901458740234,
+      "learning_rate": 1e-05,
+      "loss": 3.4404,
+      "step": 460
+    },
+    {
+      "epoch": 11.463414634146341,
+      "grad_norm": 12.662181854248047,
+      "learning_rate": 1e-05,
+      "loss": 3.5925,
+      "step": 470
+    },
+    {
+      "epoch": 11.707317073170731,
+      "grad_norm": 12.018118858337402,
+      "learning_rate": 1e-05,
+      "loss": 3.5211,
+      "step": 480
+    },
+    {
+      "epoch": 11.951219512195122,
+      "grad_norm": 13.634488105773926,
+      "learning_rate": 1e-05,
+      "loss": 3.6878,
+      "step": 490
+    },
+    {
+      "epoch": 12.195121951219512,
+      "grad_norm": 13.962358474731445,
+      "learning_rate": 1e-05,
+      "loss": 3.4749,
+      "step": 500
+    },
+    {
+      "epoch": 12.439024390243903,
+      "grad_norm": 13.360703468322754,
+      "learning_rate": 1e-05,
+      "loss": 3.3638,
+      "step": 510
+    },
+    {
+      "epoch": 12.682926829268293,
+      "grad_norm": 12.813912391662598,
+      "learning_rate": 1e-05,
+      "loss": 3.6672,
+      "step": 520
+    },
+    {
+      "epoch": 12.926829268292684,
+      "grad_norm": 12.291671752929688,
+      "learning_rate": 1e-05,
+      "loss": 3.5141,
+      "step": 530
+    },
+    {
+      "epoch": 13.170731707317072,
+      "grad_norm": 13.497099876403809,
+      "learning_rate": 1e-05,
+      "loss": 3.4268,
+      "step": 540
+    },
+    {
+      "epoch": 13.414634146341463,
+      "grad_norm": 11.689033508300781,
+      "learning_rate": 1e-05,
+      "loss": 3.4772,
+      "step": 550
+    },
+    {
+      "epoch": 13.658536585365853,
+      "grad_norm": 13.340636253356934,
+      "learning_rate": 1e-05,
+      "loss": 3.3453,
+      "step": 560
+    },
+    {
+      "epoch": 13.902439024390244,
+      "grad_norm": 14.993213653564453,
+      "learning_rate": 1e-05,
+      "loss": 3.6322,
+      "step": 570
+    },
+    {
+      "epoch": 14.146341463414634,
+      "grad_norm": 15.0778169631958,
+      "learning_rate": 1e-05,
+      "loss": 3.3216,
+      "step": 580
+    },
+    {
+      "epoch": 14.390243902439025,
+      "grad_norm": 13.393571853637695,
+      "learning_rate": 1e-05,
+      "loss": 3.2652,
+      "step": 590
+    },
+    {
+      "epoch": 14.634146341463415,
+      "grad_norm": 13.353214263916016,
+      "learning_rate": 1e-05,
+      "loss": 3.3916,
+      "step": 600
+    },
+    {
+      "epoch": 14.878048780487806,
+      "grad_norm": 13.477783203125,
+      "learning_rate": 1e-05,
+      "loss": 3.5582,
+      "step": 610
+    },
+    {
+      "epoch": 15.121951219512194,
+      "grad_norm": 13.947030067443848,
+      "learning_rate": 1e-05,
+      "loss": 3.1888,
+      "step": 620
+    },
+    {
+      "epoch": 15.365853658536585,
+      "grad_norm": 13.27822208404541,
+      "learning_rate": 1e-05,
+      "loss": 3.3959,
+      "step": 630
+    },
+    {
+      "epoch": 15.609756097560975,
+      "grad_norm": 12.724740982055664,
+      "learning_rate": 1e-05,
+      "loss": 3.4102,
+      "step": 640
+    },
+    {
+      "epoch": 15.853658536585366,
+      "grad_norm": 12.53013801574707,
+      "learning_rate": 1e-05,
+      "loss": 3.3826,
+      "step": 650
+    },
+    {
+      "epoch": 16.097560975609756,
+      "grad_norm": 12.549883842468262,
+      "learning_rate": 1e-05,
+      "loss": 3.1557,
+      "step": 660
+    },
+    {
+      "epoch": 16.341463414634145,
+      "grad_norm": 14.903629302978516,
+      "learning_rate": 1e-05,
+      "loss": 3.215,
+      "step": 670
+    },
+    {
+      "epoch": 16.585365853658537,
+      "grad_norm": 13.426742553710938,
+      "learning_rate": 1e-05,
+      "loss": 3.2223,
+      "step": 680
+    },
+    {
+      "epoch": 16.829268292682926,
+      "grad_norm": 12.214171409606934,
+      "learning_rate": 1e-05,
+      "loss": 3.3669,
+      "step": 690
+    },
+    {
+      "epoch": 17.073170731707318,
+      "grad_norm": 13.474701881408691,
+      "learning_rate": 1e-05,
+      "loss": 3.1725,
+      "step": 700
+    },
+    {
+      "epoch": 17.317073170731707,
+      "grad_norm": 13.971524238586426,
+      "learning_rate": 1e-05,
+      "loss": 3.3139,
+      "step": 710
+    },
+    {
+      "epoch": 17.5609756097561,
+      "grad_norm": 13.29129695892334,
+      "learning_rate": 1e-05,
+      "loss": 3.2124,
+      "step": 720
+    },
+    {
+      "epoch": 17.804878048780488,
+      "grad_norm": 14.565364837646484,
+      "learning_rate": 1e-05,
+      "loss": 3.1354,
+      "step": 730
+    },
+    {
+      "epoch": 18.048780487804876,
+      "grad_norm": 14.526548385620117,
+      "learning_rate": 1e-05,
+      "loss": 3.0789,
+      "step": 740
+    },
+    {
+      "epoch": 18.29268292682927,
+      "grad_norm": 14.852441787719727,
+      "learning_rate": 1e-05,
+      "loss": 3.1379,
+      "step": 750
+    },
+    {
+      "epoch": 18.536585365853657,
+      "grad_norm": 14.984235763549805,
+      "learning_rate": 1e-05,
+      "loss": 3.0832,
+      "step": 760
+    },
+    {
+      "epoch": 18.78048780487805,
+      "grad_norm": 13.381596565246582,
+      "learning_rate": 1e-05,
+      "loss": 3.0988,
+      "step": 770
+    },
+    {
+      "epoch": 19.024390243902438,
+      "grad_norm": 13.721541404724121,
+      "learning_rate": 1e-05,
+      "loss": 3.0601,
+      "step": 780
+    },
+    {
+      "epoch": 19.26829268292683,
+      "grad_norm": 14.160118103027344,
+      "learning_rate": 1e-05,
+      "loss": 3.1213,
+      "step": 790
+    },
+    {
+      "epoch": 19.51219512195122,
+      "grad_norm": 14.012876510620117,
+      "learning_rate": 1e-05,
+      "loss": 3.2049,
+      "step": 800
+    },
+    {
+      "epoch": 19.75609756097561,
+      "grad_norm": 14.465662002563477,
+      "learning_rate": 1e-05,
+      "loss": 3.1248,
+      "step": 810
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 19.51372718811035,
+      "learning_rate": 1e-05,
+      "loss": 2.9954,
+      "step": 820
+    },
+    {
+      "epoch": 20.24390243902439,
+      "grad_norm": 13.775348663330078,
+      "learning_rate": 1e-05,
+      "loss": 3.1184,
+      "step": 830
+    },
+    {
+      "epoch": 20.48780487804878,
+      "grad_norm": 15.329011917114258,
+      "learning_rate": 1e-05,
+      "loss": 3.1376,
+      "step": 840
+    },
+    {
+      "epoch": 20.73170731707317,
+      "grad_norm": 14.254768371582031,
+      "learning_rate": 1e-05,
+      "loss": 2.9425,
+      "step": 850
+    },
+    {
+      "epoch": 20.975609756097562,
+      "grad_norm": 14.885010719299316,
+      "learning_rate": 1e-05,
+      "loss": 3.0427,
+      "step": 860
+    },
+    {
+      "epoch": 21.21951219512195,
+      "grad_norm": 13.358927726745605,
+      "learning_rate": 1e-05,
+      "loss": 3.0594,
+      "step": 870
+    },
+    {
+      "epoch": 21.463414634146343,
+      "grad_norm": 14.775203704833984,
+      "learning_rate": 1e-05,
+      "loss": 3.0176,
+      "step": 880
+    },
+    {
+      "epoch": 21.70731707317073,
+      "grad_norm": 14.861948013305664,
+      "learning_rate": 1e-05,
+      "loss": 2.9453,
+      "step": 890
+    },
+    {
+      "epoch": 21.951219512195124,
+      "grad_norm": 13.654282569885254,
+      "learning_rate": 1e-05,
+      "loss": 3.0511,
+      "step": 900
+    },
+    {
+      "epoch": 22.195121951219512,
+      "grad_norm": 14.70966911315918,
+      "learning_rate": 1e-05,
+      "loss": 3.023,
+      "step": 910
+    },
+    {
+      "epoch": 22.4390243902439,
+      "grad_norm": 14.872281074523926,
+      "learning_rate": 1e-05,
+      "loss": 2.9731,
+      "step": 920
+    },
+    {
+      "epoch": 22.682926829268293,
+      "grad_norm": 14.201629638671875,
+      "learning_rate": 1e-05,
+      "loss": 3.0614,
+      "step": 930
+    },
+    {
+      "epoch": 22.926829268292682,
+      "grad_norm": 16.941543579101562,
+      "learning_rate": 1e-05,
+      "loss": 3.0268,
+      "step": 940
+    },
+    {
+      "epoch": 23.170731707317074,
+      "grad_norm": 13.769314765930176,
+      "learning_rate": 1e-05,
+      "loss": 2.9794,
+      "step": 950
+    },
+    {
+      "epoch": 23.414634146341463,
+      "grad_norm": 15.23621654510498,
+      "learning_rate": 1e-05,
+      "loss": 2.9424,
+      "step": 960
+    },
+    {
+      "epoch": 23.658536585365855,
+      "grad_norm": 14.478521347045898,
+      "learning_rate": 1e-05,
+      "loss": 2.8599,
+      "step": 970
+    },
+    {
+      "epoch": 23.902439024390244,
+      "grad_norm": 15.78727912902832,
+      "learning_rate": 1e-05,
+      "loss": 2.9107,
+      "step": 980
+    },
+    {
+      "epoch": 24.146341463414632,
+      "grad_norm": 13.50251579284668,
+      "learning_rate": 1e-05,
+      "loss": 2.9054,
+      "step": 990
+    },
+    {
+      "epoch": 24.390243902439025,
+      "grad_norm": 14.051477432250977,
+      "learning_rate": 1e-05,
+      "loss": 2.9868,
+      "step": 1000
+    },
+    {
+      "epoch": 24.634146341463413,
+      "grad_norm": 14.178876876831055,
+      "learning_rate": 1e-05,
+      "loss": 3.0255,
+      "step": 1010
+    },
+    {
+      "epoch": 24.878048780487806,
+      "grad_norm": 15.032842636108398,
+      "learning_rate": 1e-05,
+      "loss": 2.9312,
+      "step": 1020
+    },
+    {
+      "epoch": 25.121951219512194,
+      "grad_norm": 16.624441146850586,
+      "learning_rate": 1e-05,
+      "loss": 2.8341,
+      "step": 1030
+    },
+    {
+      "epoch": 25.365853658536587,
+      "grad_norm": 14.353311538696289,
+      "learning_rate": 1e-05,
+      "loss": 2.9496,
+      "step": 1040
+    },
+    {
+      "epoch": 25.609756097560975,
+      "grad_norm": 16.341983795166016,
+      "learning_rate": 1e-05,
+      "loss": 2.7901,
+      "step": 1050
+    },
+    {
+      "epoch": 25.853658536585368,
+      "grad_norm": 15.729475021362305,
+      "learning_rate": 1e-05,
+      "loss": 2.8535,
+      "step": 1060
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1230,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 30,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c028c4876049bc6bb78a424268085afa67f4ccb752a59989169c4bb9542f3be8
+size 5368

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff