Delete last-checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/model.safetensors +0 -3
last-checkpoint/optimizer.pt +0 -3
last-checkpoint/rng_state.pth +0 -3
last-checkpoint/scheduler.pt +0 -3
last-checkpoint/special_tokens_map.json +0 -16
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer_config.json +0 -20
last-checkpoint/trainer_state.json +0 -970
last-checkpoint/training_args.bin +0 -3

last-checkpoint/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4e65a61da3bd59857bc61ebd07e726758d8f85f0c3bbf9a6bdfafa5218f0a6c3
-size 91951912

last-checkpoint/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:665e7c098b20618c3e078ad86acab2e0ca25d1467a8d0e2112c68af547b92299
-size 183993547

last-checkpoint/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:236304ae89e49aae8260113165ee63419b9b745f79120014328a7fa31ed79b42
-size 14645

last-checkpoint/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:d352eeae2aa15ed4d011e01fe52a140d1d472b101b8457914fdee010588d6f43
-size 1465

last-checkpoint/special_tokens_map.json DELETED Viewed

@@ -1,16 +0,0 @@
-{
-  "eos_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<|endoftext|>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

last-checkpoint/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer_config.json DELETED Viewed

@@ -1,20 +0,0 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|endoftext|>",
-  "extra_special_tokens": {},
-  "model_max_length": 2048,
-  "pad_token": "<|endoftext|>",
-  "padding_side": "right",
-  "tokenizer_class": "PreTrainedTokenizerFast",
-  "truncation_side": "right"
-}

last-checkpoint/trainer_state.json DELETED Viewed

@@ -1,970 +0,0 @@
-{
-  "best_global_step": null,
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.012,
-  "eval_steps": 10,
-  "global_step": 120,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.0001,
-      "grad_norm": 13.644583702087402,
-      "learning_rate": 0.0,
-      "loss": 73.3569,
-      "step": 1
-    },
-    {
-      "epoch": 0.0002,
-      "grad_norm": 13.757925987243652,
-      "learning_rate": 1e-05,
-      "loss": 73.384,
-      "step": 2
-    },
-    {
-      "epoch": 0.0003,
-      "grad_norm": 13.819925308227539,
-      "learning_rate": 2e-05,
-      "loss": 73.4764,
-      "step": 3
-    },
-    {
-      "epoch": 0.0004,
-      "grad_norm": 13.885299682617188,
-      "learning_rate": 3e-05,
-      "loss": 73.2476,
-      "step": 4
-    },
-    {
-      "epoch": 0.0005,
-      "grad_norm": 13.726334571838379,
-      "learning_rate": 4e-05,
-      "loss": 72.8724,
-      "step": 5
-    },
-    {
-      "epoch": 0.0006,
-      "grad_norm": 13.66053581237793,
-      "learning_rate": 5e-05,
-      "loss": 72.2172,
-      "step": 6
-    },
-    {
-      "epoch": 0.0007,
-      "grad_norm": 13.866334915161133,
-      "learning_rate": 6e-05,
-      "loss": 71.1639,
-      "step": 7
-    },
-    {
-      "epoch": 0.0008,
-      "grad_norm": 13.102575302124023,
-      "learning_rate": 7.000000000000001e-05,
-      "loss": 70.3215,
-      "step": 8
-    },
-    {
-      "epoch": 0.0009,
-      "grad_norm": 13.716519355773926,
-      "learning_rate": 8e-05,
-      "loss": 67.5932,
-      "step": 9
-    },
-    {
-      "epoch": 0.001,
-      "grad_norm": 12.51129150390625,
-      "learning_rate": 8.999999999999999e-05,
-      "loss": 65.3889,
-      "step": 10
-    },
-    {
-      "epoch": 0.001,
-      "eval_loss": 7.900570869445801,
-      "eval_runtime": 680.8566,
-      "eval_samples_per_second": 14.687,
-      "eval_steps_per_second": 1.836,
-      "step": 10
-    },
-    {
-      "epoch": 0.0011,
-      "grad_norm": 11.444281578063965,
-      "learning_rate": 0.0001,
-      "loss": 63.3063,
-      "step": 11
-    },
-    {
-      "epoch": 0.0012,
-      "grad_norm": 9.952727317810059,
-      "learning_rate": 0.00011,
-      "loss": 60.919,
-      "step": 12
-    },
-    {
-      "epoch": 0.0013,
-      "grad_norm": 8.779980659484863,
-      "learning_rate": 0.00012,
-      "loss": 59.1227,
-      "step": 13
-    },
-    {
-      "epoch": 0.0014,
-      "grad_norm": 7.721734046936035,
-      "learning_rate": 0.00013000000000000002,
-      "loss": 57.2133,
-      "step": 14
-    },
-    {
-      "epoch": 0.0015,
-      "grad_norm": 7.298959732055664,
-      "learning_rate": 0.00014000000000000001,
-      "loss": 55.2262,
-      "step": 15
-    },
-    {
-      "epoch": 0.0016,
-      "grad_norm": 7.175290107727051,
-      "learning_rate": 0.00015000000000000001,
-      "loss": 52.2363,
-      "step": 16
-    },
-    {
-      "epoch": 0.0017,
-      "grad_norm": 6.67252779006958,
-      "learning_rate": 0.00016,
-      "loss": 50.2609,
-      "step": 17
-    },
-    {
-      "epoch": 0.0018,
-      "grad_norm": 6.430507183074951,
-      "learning_rate": 0.00016999999999999999,
-      "loss": 47.5378,
-      "step": 18
-    },
-    {
-      "epoch": 0.0019,
-      "grad_norm": 5.900411128997803,
-      "learning_rate": 0.00017999999999999998,
-      "loss": 45.5708,
-      "step": 19
-    },
-    {
-      "epoch": 0.002,
-      "grad_norm": 5.60172176361084,
-      "learning_rate": 0.00019,
-      "loss": 43.1171,
-      "step": 20
-    },
-    {
-      "epoch": 0.002,
-      "eval_loss": 5.050407409667969,
-      "eval_runtime": 679.6399,
-      "eval_samples_per_second": 14.714,
-      "eval_steps_per_second": 1.839,
-      "step": 20
-    },
-    {
-      "epoch": 0.0021,
-      "grad_norm": 5.3365912437438965,
-      "learning_rate": 0.0002,
-      "loss": 39.7964,
-      "step": 21
-    },
-    {
-      "epoch": 0.0022,
-      "grad_norm": 5.166492462158203,
-      "learning_rate": 0.00021000000000000004,
-      "loss": 37.9665,
-      "step": 22
-    },
-    {
-      "epoch": 0.0023,
-      "grad_norm": 4.789157867431641,
-      "learning_rate": 0.00022,
-      "loss": 35.5756,
-      "step": 23
-    },
-    {
-      "epoch": 0.0024,
-      "grad_norm": 4.50107479095459,
-      "learning_rate": 0.00022999999999999998,
-      "loss": 33.2697,
-      "step": 24
-    },
-    {
-      "epoch": 0.0025,
-      "grad_norm": 4.265627384185791,
-      "learning_rate": 0.00024,
-      "loss": 31.4949,
-      "step": 25
-    },
-    {
-      "epoch": 0.0026,
-      "grad_norm": 4.027210235595703,
-      "learning_rate": 0.00025,
-      "loss": 28.0505,
-      "step": 26
-    },
-    {
-      "epoch": 0.0027,
-      "grad_norm": 3.928957223892212,
-      "learning_rate": 0.00026000000000000003,
-      "loss": 26.2355,
-      "step": 27
-    },
-    {
-      "epoch": 0.0028,
-      "grad_norm": 3.5308196544647217,
-      "learning_rate": 0.00027,
-      "loss": 24.1822,
-      "step": 28
-    },
-    {
-      "epoch": 0.0029,
-      "grad_norm": 3.3050384521484375,
-      "learning_rate": 0.00028000000000000003,
-      "loss": 23.0633,
-      "step": 29
-    },
-    {
-      "epoch": 0.003,
-      "grad_norm": 3.0656802654266357,
-      "learning_rate": 0.00029,
-      "loss": 20.7796,
-      "step": 30
-    },
-    {
-      "epoch": 0.003,
-      "eval_loss": 2.453228235244751,
-      "eval_runtime": 680.3163,
-      "eval_samples_per_second": 14.699,
-      "eval_steps_per_second": 1.837,
-      "step": 30
-    },
-    {
-      "epoch": 0.0031,
-      "grad_norm": 2.902653694152832,
-      "learning_rate": 0.00030000000000000003,
-      "loss": 19.6354,
-      "step": 31
-    },
-    {
-      "epoch": 0.0032,
-      "grad_norm": 2.723034620285034,
-      "learning_rate": 0.00031,
-      "loss": 18.3838,
-      "step": 32
-    },
-    {
-      "epoch": 0.0033,
-      "grad_norm": 2.559993267059326,
-      "learning_rate": 0.00032,
-      "loss": 16.345,
-      "step": 33
-    },
-    {
-      "epoch": 0.0034,
-      "grad_norm": 2.283393144607544,
-      "learning_rate": 0.00033,
-      "loss": 14.2652,
-      "step": 34
-    },
-    {
-      "epoch": 0.0035,
-      "grad_norm": 2.1815197467803955,
-      "learning_rate": 0.00033999999999999997,
-      "loss": 14.0831,
-      "step": 35
-    },
-    {
-      "epoch": 0.0036,
-      "grad_norm": 2.018310308456421,
-      "learning_rate": 0.00035,
-      "loss": 12.7781,
-      "step": 36
-    },
-    {
-      "epoch": 0.0037,
-      "grad_norm": 1.862168550491333,
-      "learning_rate": 0.00035999999999999997,
-      "loss": 11.4392,
-      "step": 37
-    },
-    {
-      "epoch": 0.0038,
-      "grad_norm": 1.762819528579712,
-      "learning_rate": 0.00037,
-      "loss": 10.6711,
-      "step": 38
-    },
-    {
-      "epoch": 0.0039,
-      "grad_norm": 1.6565489768981934,
-      "learning_rate": 0.00038,
-      "loss": 9.3263,
-      "step": 39
-    },
-    {
-      "epoch": 0.004,
-      "grad_norm": 1.545681118965149,
-      "learning_rate": 0.00039000000000000005,
-      "loss": 9.0413,
-      "step": 40
-    },
-    {
-      "epoch": 0.004,
-      "eval_loss": 0.9703376889228821,
-      "eval_runtime": 681.7846,
-      "eval_samples_per_second": 14.667,
-      "eval_steps_per_second": 1.833,
-      "step": 40
-    },
-    {
-      "epoch": 0.0041,
-      "grad_norm": 1.7677608728408813,
-      "learning_rate": 0.0004,
-      "loss": 9.2559,
-      "step": 41
-    },
-    {
-      "epoch": 0.0042,
-      "grad_norm": 1.4745514392852783,
-      "learning_rate": 0.00041,
-      "loss": 7.3328,
-      "step": 42
-    },
-    {
-      "epoch": 0.0043,
-      "grad_norm": 1.3988405466079712,
-      "learning_rate": 0.00042000000000000007,
-      "loss": 7.0773,
-      "step": 43
-    },
-    {
-      "epoch": 0.0044,
-      "grad_norm": 1.1933400630950928,
-      "learning_rate": 0.00043000000000000004,
-      "loss": 5.9391,
-      "step": 44
-    },
-    {
-      "epoch": 0.0045,
-      "grad_norm": 1.4306049346923828,
-      "learning_rate": 0.00044,
-      "loss": 6.1685,
-      "step": 45
-    },
-    {
-      "epoch": 0.0046,
-      "grad_norm": 1.0445979833602905,
-      "learning_rate": 0.00045,
-      "loss": 4.2649,
-      "step": 46
-    },
-    {
-      "epoch": 0.0047,
-      "grad_norm": 0.9933378100395203,
-      "learning_rate": 0.00045999999999999996,
-      "loss": 4.1511,
-      "step": 47
-    },
-    {
-      "epoch": 0.0048,
-      "grad_norm": 1.1010223627090454,
-      "learning_rate": 0.00047000000000000004,
-      "loss": 4.194,
-      "step": 48
-    },
-    {
-      "epoch": 0.0049,
-      "grad_norm": 3.0977938175201416,
-      "learning_rate": 0.00048,
-      "loss": 4.409,
-      "step": 49
-    },
-    {
-      "epoch": 0.005,
-      "grad_norm": 0.9864963293075562,
-      "learning_rate": 0.00049,
-      "loss": 3.0262,
-      "step": 50
-    },
-    {
-      "epoch": 0.005,
-      "eval_loss": 0.34309181571006775,
-      "eval_runtime": 680.6754,
-      "eval_samples_per_second": 14.691,
-      "eval_steps_per_second": 1.836,
-      "step": 50
-    },
-    {
-      "epoch": 0.0051,
-      "grad_norm": 1.5408843755722046,
-      "learning_rate": 0.0005,
-      "loss": 3.7493,
-      "step": 51
-    },
-    {
-      "epoch": 0.0052,
-      "grad_norm": 0.7898032069206238,
-      "learning_rate": 0.00051,
-      "loss": 2.5679,
-      "step": 52
-    },
-    {
-      "epoch": 0.0053,
-      "grad_norm": 0.7600494027137756,
-      "learning_rate": 0.0005200000000000001,
-      "loss": 2.116,
-      "step": 53
-    },
-    {
-      "epoch": 0.0054,
-      "grad_norm": 0.6847724318504333,
-      "learning_rate": 0.00053,
-      "loss": 1.9753,
-      "step": 54
-    },
-    {
-      "epoch": 0.0055,
-      "grad_norm": 0.9391168355941772,
-      "learning_rate": 0.00054,
-      "loss": 2.6345,
-      "step": 55
-    },
-    {
-      "epoch": 0.0056,
-      "grad_norm": 0.614797830581665,
-      "learning_rate": 0.0005499999999999999,
-      "loss": 1.4403,
-      "step": 56
-    },
-    {
-      "epoch": 0.0057,
-      "grad_norm": 0.6510812044143677,
-      "learning_rate": 0.0005600000000000001,
-      "loss": 1.4256,
-      "step": 57
-    },
-    {
-      "epoch": 0.0058,
-      "grad_norm": 0.5823825001716614,
-      "learning_rate": 0.00057,
-      "loss": 1.2711,
-      "step": 58
-    },
-    {
-      "epoch": 0.0059,
-      "grad_norm": 1.0760960578918457,
-      "learning_rate": 0.00058,
-      "loss": 2.4494,
-      "step": 59
-    },
-    {
-      "epoch": 0.006,
-      "grad_norm": 0.6940433382987976,
-      "learning_rate": 0.00059,
-      "loss": 1.2022,
-      "step": 60
-    },
-    {
-      "epoch": 0.006,
-      "eval_loss": 0.11387959867715836,
-      "eval_runtime": 680.3956,
-      "eval_samples_per_second": 14.697,
-      "eval_steps_per_second": 1.837,
-      "step": 60
-    },
-    {
-      "epoch": 0.0061,
-      "grad_norm": 0.47780829668045044,
-      "learning_rate": 0.0006000000000000001,
-      "loss": 0.8812,
-      "step": 61
-    },
-    {
-      "epoch": 0.0062,
-      "grad_norm": 0.4161126911640167,
-      "learning_rate": 0.00061,
-      "loss": 0.6873,
-      "step": 62
-    },
-    {
-      "epoch": 0.0063,
-      "grad_norm": 0.4566328525543213,
-      "learning_rate": 0.00062,
-      "loss": 0.7777,
-      "step": 63
-    },
-    {
-      "epoch": 0.0064,
-      "grad_norm": 0.6500325202941895,
-      "learning_rate": 0.00063,
-      "loss": 0.8523,
-      "step": 64
-    },
-    {
-      "epoch": 0.0065,
-      "grad_norm": 0.31760597229003906,
-      "learning_rate": 0.00064,
-      "loss": 0.4715,
-      "step": 65
-    },
-    {
-      "epoch": 0.0066,
-      "grad_norm": 0.33907350897789,
-      "learning_rate": 0.0006500000000000001,
-      "loss": 0.4799,
-      "step": 66
-    },
-    {
-      "epoch": 0.0067,
-      "grad_norm": 0.429651141166687,
-      "learning_rate": 0.00066,
-      "loss": 0.5399,
-      "step": 67
-    },
-    {
-      "epoch": 0.0068,
-      "grad_norm": 0.26789650321006775,
-      "learning_rate": 0.00067,
-      "loss": 0.3205,
-      "step": 68
-    },
-    {
-      "epoch": 0.0069,
-      "grad_norm": 0.5304676294326782,
-      "learning_rate": 0.0006799999999999999,
-      "loss": 0.6197,
-      "step": 69
-    },
-    {
-      "epoch": 0.007,
-      "grad_norm": 0.22003565728664398,
-      "learning_rate": 0.0006900000000000001,
-      "loss": 0.2592,
-      "step": 70
-    },
-    {
-      "epoch": 0.007,
-      "eval_loss": 0.04052043333649635,
-      "eval_runtime": 682.0602,
-      "eval_samples_per_second": 14.661,
-      "eval_steps_per_second": 1.833,
-      "step": 70
-    },
-    {
-      "epoch": 0.0071,
-      "grad_norm": 0.550254762172699,
-      "learning_rate": 0.0007,
-      "loss": 0.5598,
-      "step": 71
-    },
-    {
-      "epoch": 0.0072,
-      "grad_norm": 0.21443326771259308,
-      "learning_rate": 0.00071,
-      "loss": 0.23,
-      "step": 72
-    },
-    {
-      "epoch": 0.0073,
-      "grad_norm": 0.25100332498550415,
-      "learning_rate": 0.0007199999999999999,
-      "loss": 0.2287,
-      "step": 73
-    },
-    {
-      "epoch": 0.0074,
-      "grad_norm": 0.8701838850975037,
-      "learning_rate": 0.0007300000000000001,
-      "loss": 0.8359,
-      "step": 74
-    },
-    {
-      "epoch": 0.0075,
-      "grad_norm": 0.19172491133213043,
-      "learning_rate": 0.00074,
-      "loss": 0.1903,
-      "step": 75
-    },
-    {
-      "epoch": 0.0076,
-      "grad_norm": 0.15641027688980103,
-      "learning_rate": 0.00075,
-      "loss": 0.13,
-      "step": 76
-    },
-    {
-      "epoch": 0.0077,
-      "grad_norm": 0.1556449830532074,
-      "learning_rate": 0.00076,
-      "loss": 0.1248,
-      "step": 77
-    },
-    {
-      "epoch": 0.0078,
-      "grad_norm": 0.2690158486366272,
-      "learning_rate": 0.00077,
-      "loss": 0.1378,
-      "step": 78
-    },
-    {
-      "epoch": 0.0079,
-      "grad_norm": 0.17920906841754913,
-      "learning_rate": 0.0007800000000000001,
-      "loss": 0.1314,
-      "step": 79
-    },
-    {
-      "epoch": 0.008,
-      "grad_norm": 0.13513772189617157,
-      "learning_rate": 0.0007899999999999999,
-      "loss": 0.0999,
-      "step": 80
-    },
-    {
-      "epoch": 0.008,
-      "eval_loss": 0.019889511168003082,
-      "eval_runtime": 681.178,
-      "eval_samples_per_second": 14.68,
-      "eval_steps_per_second": 1.835,
-      "step": 80
-    },
-    {
-      "epoch": 0.0081,
-      "grad_norm": 0.10746733844280243,
-      "learning_rate": 0.0008,
-      "loss": 0.1001,
-      "step": 81
-    },
-    {
-      "epoch": 0.0082,
-      "grad_norm": 0.0835120752453804,
-      "learning_rate": 0.0008100000000000001,
-      "loss": 0.0699,
-      "step": 82
-    },
-    {
-      "epoch": 0.0083,
-      "grad_norm": 0.13253141939640045,
-      "learning_rate": 0.00082,
-      "loss": 0.0795,
-      "step": 83
-    },
-    {
-      "epoch": 0.0084,
-      "grad_norm": 0.08485159277915955,
-      "learning_rate": 0.00083,
-      "loss": 0.0697,
-      "step": 84
-    },
-    {
-      "epoch": 0.0085,
-      "grad_norm": 0.14905264973640442,
-      "learning_rate": 0.0008400000000000001,
-      "loss": 0.0928,
-      "step": 85
-    },
-    {
-      "epoch": 0.0086,
-      "grad_norm": 0.9524427652359009,
-      "learning_rate": 0.00085,
-      "loss": 0.3045,
-      "step": 86
-    },
-    {
-      "epoch": 0.0087,
-      "grad_norm": 0.5842136740684509,
-      "learning_rate": 0.0008600000000000001,
-      "loss": 0.1334,
-      "step": 87
-    },
-    {
-      "epoch": 0.0088,
-      "grad_norm": 0.11962056159973145,
-      "learning_rate": 0.00087,
-      "loss": 0.0735,
-      "step": 88
-    },
-    {
-      "epoch": 0.0089,
-      "grad_norm": 0.057546887546777725,
-      "learning_rate": 0.00088,
-      "loss": 0.0477,
-      "step": 89
-    },
-    {
-      "epoch": 0.009,
-      "grad_norm": 0.14116229116916656,
-      "learning_rate": 0.0008900000000000001,
-      "loss": 0.0651,
-      "step": 90
-    },
-    {
-      "epoch": 0.009,
-      "eval_loss": 0.013806294649839401,
-      "eval_runtime": 680.6174,
-      "eval_samples_per_second": 14.693,
-      "eval_steps_per_second": 1.837,
-      "step": 90
-    },
-    {
-      "epoch": 0.0091,
-      "grad_norm": 0.0823589563369751,
-      "learning_rate": 0.0009,
-      "loss": 0.062,
-      "step": 91
-    },
-    {
-      "epoch": 0.0092,
-      "grad_norm": 0.06650274246931076,
-      "learning_rate": 0.00091,
-      "loss": 0.0455,
-      "step": 92
-    },
-    {
-      "epoch": 0.0093,
-      "grad_norm": 0.49958088994026184,
-      "learning_rate": 0.0009199999999999999,
-      "loss": 0.5843,
-      "step": 93
-    },
-    {
-      "epoch": 0.0094,
-      "grad_norm": 0.0794510543346405,
-      "learning_rate": 0.00093,
-      "loss": 0.0424,
-      "step": 94
-    },
-    {
-      "epoch": 0.0095,
-      "grad_norm": 0.09296651929616928,
-      "learning_rate": 0.0009400000000000001,
-      "loss": 0.0569,
-      "step": 95
-    },
-    {
-      "epoch": 0.0096,
-      "grad_norm": 0.06935586035251617,
-      "learning_rate": 0.00095,
-      "loss": 0.0461,
-      "step": 96
-    },
-    {
-      "epoch": 0.0097,
-      "grad_norm": 0.06106742471456528,
-      "learning_rate": 0.00096,
-      "loss": 0.0388,
-      "step": 97
-    },
-    {
-      "epoch": 0.0098,
-      "grad_norm": 0.07485494017601013,
-      "learning_rate": 0.0009699999999999999,
-      "loss": 0.0517,
-      "step": 98
-    },
-    {
-      "epoch": 0.0099,
-      "grad_norm": 0.15889950096607208,
-      "learning_rate": 0.00098,
-      "loss": 0.0938,
-      "step": 99
-    },
-    {
-      "epoch": 0.01,
-      "grad_norm": 0.11253108084201813,
-      "learning_rate": 0.00099,
-      "loss": 0.0644,
-      "step": 100
-    },
-    {
-      "epoch": 0.01,
-      "eval_loss": 0.011293401941657066,
-      "eval_runtime": 681.3771,
-      "eval_samples_per_second": 14.676,
-      "eval_steps_per_second": 1.835,
-      "step": 100
-    },
-    {
-      "epoch": 0.0101,
-      "grad_norm": 0.06460646539926529,
-      "learning_rate": 0.001,
-      "loss": 0.0401,
-      "step": 101
-    },
-    {
-      "epoch": 0.0102,
-      "grad_norm": 0.08616077899932861,
-      "learning_rate": 0.00101,
-      "loss": 0.0476,
-      "step": 102
-    },
-    {
-      "epoch": 0.0103,
-      "grad_norm": 0.14644859731197357,
-      "learning_rate": 0.00102,
-      "loss": 0.0833,
-      "step": 103
-    },
-    {
-      "epoch": 0.0104,
-      "grad_norm": 0.04703814536333084,
-      "learning_rate": 0.00103,
-      "loss": 0.0301,
-      "step": 104
-    },
-    {
-      "epoch": 0.0105,
-      "grad_norm": 0.8709274530410767,
-      "learning_rate": 0.0010400000000000001,
-      "loss": 0.7189,
-      "step": 105
-    },
-    {
-      "epoch": 0.0106,
-      "grad_norm": 0.08391022682189941,
-      "learning_rate": 0.00105,
-      "loss": 0.0517,
-      "step": 106
-    },
-    {
-      "epoch": 0.0107,
-      "grad_norm": 0.1281561255455017,
-      "learning_rate": 0.00106,
-      "loss": 0.0558,
-      "step": 107
-    },
-    {
-      "epoch": 0.0108,
-      "grad_norm": 0.06590331345796585,
-      "learning_rate": 0.00107,
-      "loss": 0.0425,
-      "step": 108
-    },
-    {
-      "epoch": 0.0109,
-      "grad_norm": 0.024916600435972214,
-      "learning_rate": 0.00108,
-      "loss": 0.0243,
-      "step": 109
-    },
-    {
-      "epoch": 0.011,
-      "grad_norm": 0.028951430693268776,
-      "learning_rate": 0.00109,
-      "loss": 0.0245,
-      "step": 110
-    },
-    {
-      "epoch": 0.011,
-      "eval_loss": 0.005778464023023844,
-      "eval_runtime": 680.0884,
-      "eval_samples_per_second": 14.704,
-      "eval_steps_per_second": 1.838,
-      "step": 110
-    },
-    {
-      "epoch": 0.0111,
-      "grad_norm": 0.035101134330034256,
-      "learning_rate": 0.0010999999999999998,
-      "loss": 0.0292,
-      "step": 111
-    },
-    {
-      "epoch": 0.0112,
-      "grad_norm": 0.03888938948512077,
-      "learning_rate": 0.00111,
-      "loss": 0.0291,
-      "step": 112
-    },
-    {
-      "epoch": 0.0113,
-      "grad_norm": 0.027459941804409027,
-      "learning_rate": 0.0011200000000000001,
-      "loss": 0.0232,
-      "step": 113
-    },
-    {
-      "epoch": 0.0114,
-      "grad_norm": 0.04769216105341911,
-      "learning_rate": 0.00113,
-      "loss": 0.0337,
-      "step": 114
-    },
-    {
-      "epoch": 0.0115,
-      "grad_norm": 0.024265864863991737,
-      "learning_rate": 0.00114,
-      "loss": 0.0233,
-      "step": 115
-    },
-    {
-      "epoch": 0.0116,
-      "grad_norm": 0.07471495121717453,
-      "learning_rate": 0.0011500000000000002,
-      "loss": 0.039,
-      "step": 116
-    },
-    {
-      "epoch": 0.0117,
-      "grad_norm": 0.03743023797869682,
-      "learning_rate": 0.00116,
-      "loss": 0.0291,
-      "step": 117
-    },
-    {
-      "epoch": 0.0118,
-      "grad_norm": 0.025691425427794456,
-      "learning_rate": 0.00117,
-      "loss": 0.024,
-      "step": 118
-    },
-    {
-      "epoch": 0.0119,
-      "grad_norm": 0.02821824699640274,
-      "learning_rate": 0.00118,
-      "loss": 0.0245,
-      "step": 119
-    },
-    {
-      "epoch": 0.012,
-      "grad_norm": 0.027008380740880966,
-      "learning_rate": 0.00119,
-      "loss": 0.0271,
-      "step": 120
-    },
-    {
-      "epoch": 0.012,
-      "eval_loss": 0.0045553590171039104,
-      "eval_runtime": 685.7922,
-      "eval_samples_per_second": 14.582,
-      "eval_steps_per_second": 1.823,
-      "step": 120
-    }
-  ],
-  "logging_steps": 1,
-  "max_steps": 10000,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 9223372036854775807,
-  "save_steps": 10,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": false
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 0.0,
-  "train_batch_size": 8,
-  "trial_name": null,
-  "trial_params": null
-}

last-checkpoint/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:af02e602b37732e5013cd321602a51e5ba92ee6a737c53590897eaa51eee7722
-size 5841