samtuckervegan commited on Apr 4, 2025

Commit

cd4e30a

verified ·

1 Parent(s): 76cd93d

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

README.md +28 -0
checkpoint-7374/config.json +51 -0
checkpoint-7374/model.safetensors +3 -0
checkpoint-7374/optimizer.pt +3 -0
checkpoint-7374/rng_state_0.pth +3 -0
checkpoint-7374/rng_state_1.pth +3 -0
checkpoint-7374/rng_state_2.pth +3 -0
checkpoint-7374/rng_state_3.pth +3 -0
checkpoint-7374/scheduler.pt +3 -0
checkpoint-7374/trainer_state.json +2139 -0
checkpoint-7374/training_args.bin +3 -0
config.json +51 -0
merges.txt +0 -0
model.safetensors +3 -0
runs/Apr04_07-57-07_r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo/events.out.tfevents.1743753431.r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo.216.0 +2 -2
runs/Apr04_07-57-07_r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo/events.out.tfevents.1743756139.r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo.216.1 +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+tags:
+- autotrain
+- text-regression
+base_model: allenai/longformer-base-4096
+widget:
+- text: "I love AutoTrain"
+datasets:
+- samtuckervegan/text_performance
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Regression
+## Validation Metrics
+loss: 0.03396870195865631
+mse: 0.03396843746304512
+mae: 0.14112502336502075
+r2: 0.2725181579589844
+rmse: 0.18430528332916862
+explained_variance: 0.273209810256958

checkpoint-7374/config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_name_or_path": "allenai/longformer-base-4096",
+  "_num_labels": 1,
+  "architectures": [
+    "LongformerForSequenceClassification"
+  ],
+  "attention_mode": "longformer",
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "target"
+  },
+  "ignore_attention_mask": false,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "target": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 4098,
+  "model_type": "longformer",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "onnx_export": false,
+  "pad_token_id": 1,
+  "problem_type": "regression",
+  "sep_token_id": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

checkpoint-7374/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9242fc9cfb1524a85761f4f00c1b4ff8956fb34fb58246044b0f95b84eb92733
+size 594675108

checkpoint-7374/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7aad989632e7911c595b9e247281c744cbc55dc21b17d47ccb9938d793f4ccb9
+size 1189510202

checkpoint-7374/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b34b08ccbb3da22551dde002057c59106d0ab0be2e59f3c5fc70f8d7c2bc8ca
+size 15024

checkpoint-7374/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17d49d11bbb7b0dae6eaa111a9d082af395d3eebdb2f514f692df6e982aa7bf3
+size 15024

checkpoint-7374/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9722302a3709aaaafc5512910e8e745ab2112262187e1023f792897b6426dfe5
+size 15024

checkpoint-7374/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ef783c4497f7e80f3002a7e0e0113ff3604fff25889ca37045e7e49c7c867c1
+size 15024

checkpoint-7374/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:882b9e52894f599097d083730fb17de039c02dd7e0c5be59a4f3eebc3827e542
+size 1064

checkpoint-7374/trainer_state.json ADDED Viewed

	@@ -0,0 +1,2139 @@

+{
+  "best_metric": 0.03396870195865631,
+  "best_model_checkpoint": "text-performance-longformer/checkpoint-7374",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 7374,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01017087062652563,
+      "grad_norm": 13.638067245483398,
+      "learning_rate": 1.6260162601626018e-06,
+      "loss": 0.3817,
+      "step": 25
+    },
+    {
+      "epoch": 0.02034174125305126,
+      "grad_norm": 6.143240928649902,
+      "learning_rate": 3.3197831978319785e-06,
+      "loss": 0.2287,
+      "step": 50
+    },
+    {
+      "epoch": 0.030512611879576892,
+      "grad_norm": 1.5986428260803223,
+      "learning_rate": 5.013550135501355e-06,
+      "loss": 0.0691,
+      "step": 75
+    },
+    {
+      "epoch": 0.04068348250610252,
+      "grad_norm": 0.9884291887283325,
+      "learning_rate": 6.707317073170733e-06,
+      "loss": 0.0543,
+      "step": 100
+    },
+    {
+      "epoch": 0.050854353132628156,
+      "grad_norm": 3.724142074584961,
+      "learning_rate": 8.401084010840109e-06,
+      "loss": 0.0619,
+      "step": 125
+    },
+    {
+      "epoch": 0.061025223759153785,
+      "grad_norm": 2.291118860244751,
+      "learning_rate": 1.0094850948509485e-05,
+      "loss": 0.0523,
+      "step": 150
+    },
+    {
+      "epoch": 0.07119609438567942,
+      "grad_norm": 2.3093833923339844,
+      "learning_rate": 1.1788617886178862e-05,
+      "loss": 0.0541,
+      "step": 175
+    },
+    {
+      "epoch": 0.08136696501220504,
+      "grad_norm": 3.3519299030303955,
+      "learning_rate": 1.348238482384824e-05,
+      "loss": 0.0515,
+      "step": 200
+    },
+    {
+      "epoch": 0.09153783563873068,
+      "grad_norm": 2.0906357765197754,
+      "learning_rate": 1.5176151761517615e-05,
+      "loss": 0.0529,
+      "step": 225
+    },
+    {
+      "epoch": 0.10170870626525631,
+      "grad_norm": 1.1683790683746338,
+      "learning_rate": 1.6802168021680217e-05,
+      "loss": 0.0573,
+      "step": 250
+    },
+    {
+      "epoch": 0.11187957689178193,
+      "grad_norm": 1.7971270084381104,
+      "learning_rate": 1.8495934959349594e-05,
+      "loss": 0.0495,
+      "step": 275
+    },
+    {
+      "epoch": 0.12205044751830757,
+      "grad_norm": 3.8092596530914307,
+      "learning_rate": 2.018970189701897e-05,
+      "loss": 0.0462,
+      "step": 300
+    },
+    {
+      "epoch": 0.1322213181448332,
+      "grad_norm": 2.3589837551116943,
+      "learning_rate": 2.1883468834688347e-05,
+      "loss": 0.044,
+      "step": 325
+    },
+    {
+      "epoch": 0.14239218877135884,
+      "grad_norm": 1.4069643020629883,
+      "learning_rate": 2.3577235772357724e-05,
+      "loss": 0.0458,
+      "step": 350
+    },
+    {
+      "epoch": 0.15256305939788445,
+      "grad_norm": 0.8279024362564087,
+      "learning_rate": 2.5271002710027104e-05,
+      "loss": 0.0457,
+      "step": 375
+    },
+    {
+      "epoch": 0.16273393002441008,
+      "grad_norm": 0.7554256916046143,
+      "learning_rate": 2.696476964769648e-05,
+      "loss": 0.0431,
+      "step": 400
+    },
+    {
+      "epoch": 0.17290480065093572,
+      "grad_norm": 0.7221837043762207,
+      "learning_rate": 2.8658536585365854e-05,
+      "loss": 0.0455,
+      "step": 425
+    },
+    {
+      "epoch": 0.18307567127746135,
+      "grad_norm": 1.551680088043213,
+      "learning_rate": 3.035230352303523e-05,
+      "loss": 0.0443,
+      "step": 450
+    },
+    {
+      "epoch": 0.193246541903987,
+      "grad_norm": 1.2256766557693481,
+      "learning_rate": 3.204607046070461e-05,
+      "loss": 0.0423,
+      "step": 475
+    },
+    {
+      "epoch": 0.20341741253051263,
+      "grad_norm": 1.9404680728912354,
+      "learning_rate": 3.373983739837399e-05,
+      "loss": 0.0454,
+      "step": 500
+    },
+    {
+      "epoch": 0.21358828315703823,
+      "grad_norm": 1.481037974357605,
+      "learning_rate": 3.5433604336043364e-05,
+      "loss": 0.0549,
+      "step": 525
+    },
+    {
+      "epoch": 0.22375915378356387,
+      "grad_norm": 1.0439728498458862,
+      "learning_rate": 3.712737127371274e-05,
+      "loss": 0.0474,
+      "step": 550
+    },
+    {
+      "epoch": 0.2339300244100895,
+      "grad_norm": 2.524623394012451,
+      "learning_rate": 3.882113821138211e-05,
+      "loss": 0.0442,
+      "step": 575
+    },
+    {
+      "epoch": 0.24410089503661514,
+      "grad_norm": 0.46617817878723145,
+      "learning_rate": 4.051490514905149e-05,
+      "loss": 0.0416,
+      "step": 600
+    },
+    {
+      "epoch": 0.25427176566314075,
+      "grad_norm": 0.9644585251808167,
+      "learning_rate": 4.220867208672087e-05,
+      "loss": 0.0459,
+      "step": 625
+    },
+    {
+      "epoch": 0.2644426362896664,
+      "grad_norm": 0.7916778922080994,
+      "learning_rate": 4.390243902439025e-05,
+      "loss": 0.0453,
+      "step": 650
+    },
+    {
+      "epoch": 0.274613506916192,
+      "grad_norm": 0.41167861223220825,
+      "learning_rate": 4.5596205962059624e-05,
+      "loss": 0.0447,
+      "step": 675
+    },
+    {
+      "epoch": 0.2847843775427177,
+      "grad_norm": 2.670891046524048,
+      "learning_rate": 4.7289972899729e-05,
+      "loss": 0.0436,
+      "step": 700
+    },
+    {
+      "epoch": 0.2949552481692433,
+      "grad_norm": 0.5455201268196106,
+      "learning_rate": 4.898373983739837e-05,
+      "loss": 0.046,
+      "step": 725
+    },
+    {
+      "epoch": 0.3051261187957689,
+      "grad_norm": 0.477638304233551,
+      "learning_rate": 4.9924653405666065e-05,
+      "loss": 0.0461,
+      "step": 750
+    },
+    {
+      "epoch": 0.31529698942229456,
+      "grad_norm": 1.8583427667617798,
+      "learning_rate": 4.973628691983123e-05,
+      "loss": 0.046,
+      "step": 775
+    },
+    {
+      "epoch": 0.32546786004882017,
+      "grad_norm": 1.4675369262695312,
+      "learning_rate": 4.954792043399639e-05,
+      "loss": 0.043,
+      "step": 800
+    },
+    {
+      "epoch": 0.33563873067534583,
+      "grad_norm": 0.5187409520149231,
+      "learning_rate": 4.9359553948161545e-05,
+      "loss": 0.0465,
+      "step": 825
+    },
+    {
+      "epoch": 0.34580960130187144,
+      "grad_norm": 1.1287797689437866,
+      "learning_rate": 4.917118746232671e-05,
+      "loss": 0.0395,
+      "step": 850
+    },
+    {
+      "epoch": 0.35598047192839705,
+      "grad_norm": 1.6059693098068237,
+      "learning_rate": 4.8982820976491866e-05,
+      "loss": 0.0463,
+      "step": 875
+    },
+    {
+      "epoch": 0.3661513425549227,
+      "grad_norm": 0.5075823664665222,
+      "learning_rate": 4.8794454490657024e-05,
+      "loss": 0.0431,
+      "step": 900
+    },
+    {
+      "epoch": 0.3763222131814483,
+      "grad_norm": 0.4540039896965027,
+      "learning_rate": 4.860608800482219e-05,
+      "loss": 0.0444,
+      "step": 925
+    },
+    {
+      "epoch": 0.386493083807974,
+      "grad_norm": 0.3790999948978424,
+      "learning_rate": 4.8417721518987346e-05,
+      "loss": 0.0408,
+      "step": 950
+    },
+    {
+      "epoch": 0.3966639544344996,
+      "grad_norm": 0.35743555426597595,
+      "learning_rate": 4.82293550331525e-05,
+      "loss": 0.0412,
+      "step": 975
+    },
+    {
+      "epoch": 0.40683482506102525,
+      "grad_norm": 0.9319044351577759,
+      "learning_rate": 4.804098854731766e-05,
+      "loss": 0.0383,
+      "step": 1000
+    },
+    {
+      "epoch": 0.41700569568755086,
+      "grad_norm": 0.9398106336593628,
+      "learning_rate": 4.785262206148282e-05,
+      "loss": 0.0439,
+      "step": 1025
+    },
+    {
+      "epoch": 0.42717656631407647,
+      "grad_norm": 0.5996136665344238,
+      "learning_rate": 4.766425557564798e-05,
+      "loss": 0.0455,
+      "step": 1050
+    },
+    {
+      "epoch": 0.43734743694060213,
+      "grad_norm": 2.0928783416748047,
+      "learning_rate": 4.747588908981314e-05,
+      "loss": 0.0451,
+      "step": 1075
+    },
+    {
+      "epoch": 0.44751830756712774,
+      "grad_norm": 0.42079484462738037,
+      "learning_rate": 4.7287522603978304e-05,
+      "loss": 0.0476,
+      "step": 1100
+    },
+    {
+      "epoch": 0.4576891781936534,
+      "grad_norm": 1.7878057956695557,
+      "learning_rate": 4.709915611814346e-05,
+      "loss": 0.0434,
+      "step": 1125
+    },
+    {
+      "epoch": 0.467860048820179,
+      "grad_norm": 0.3113216459751129,
+      "learning_rate": 4.691078963230862e-05,
+      "loss": 0.042,
+      "step": 1150
+    },
+    {
+      "epoch": 0.4780309194467046,
+      "grad_norm": 0.319442480802536,
+      "learning_rate": 4.6722423146473784e-05,
+      "loss": 0.0409,
+      "step": 1175
+    },
+    {
+      "epoch": 0.4882017900732303,
+      "grad_norm": 0.9614000916481018,
+      "learning_rate": 4.653405666063894e-05,
+      "loss": 0.0391,
+      "step": 1200
+    },
+    {
+      "epoch": 0.4983726606997559,
+      "grad_norm": 0.6299770474433899,
+      "learning_rate": 4.63456901748041e-05,
+      "loss": 0.0414,
+      "step": 1225
+    },
+    {
+      "epoch": 0.5085435313262815,
+      "grad_norm": 0.8669236898422241,
+      "learning_rate": 4.615732368896926e-05,
+      "loss": 0.0388,
+      "step": 1250
+    },
+    {
+      "epoch": 0.5187144019528072,
+      "grad_norm": 0.6755848526954651,
+      "learning_rate": 4.596895720313442e-05,
+      "loss": 0.0421,
+      "step": 1275
+    },
+    {
+      "epoch": 0.5288852725793328,
+      "grad_norm": 0.5094274282455444,
+      "learning_rate": 4.5780590717299585e-05,
+      "loss": 0.0408,
+      "step": 1300
+    },
+    {
+      "epoch": 0.5390561432058584,
+      "grad_norm": 1.5034645795822144,
+      "learning_rate": 4.559222423146474e-05,
+      "loss": 0.0467,
+      "step": 1325
+    },
+    {
+      "epoch": 0.549227013832384,
+      "grad_norm": 1.6210927963256836,
+      "learning_rate": 4.54038577456299e-05,
+      "loss": 0.0443,
+      "step": 1350
+    },
+    {
+      "epoch": 0.5593978844589097,
+      "grad_norm": 0.4528130292892456,
+      "learning_rate": 4.5215491259795064e-05,
+      "loss": 0.041,
+      "step": 1375
+    },
+    {
+      "epoch": 0.5695687550854354,
+      "grad_norm": 0.6066830158233643,
+      "learning_rate": 4.5027124773960215e-05,
+      "loss": 0.0427,
+      "step": 1400
+    },
+    {
+      "epoch": 0.5797396257119609,
+      "grad_norm": 0.2950328588485718,
+      "learning_rate": 4.483875828812538e-05,
+      "loss": 0.0417,
+      "step": 1425
+    },
+    {
+      "epoch": 0.5899104963384866,
+      "grad_norm": 0.425102561712265,
+      "learning_rate": 4.4650391802290537e-05,
+      "loss": 0.0391,
+      "step": 1450
+    },
+    {
+      "epoch": 0.6000813669650122,
+      "grad_norm": 0.5589340329170227,
+      "learning_rate": 4.4462025316455694e-05,
+      "loss": 0.0406,
+      "step": 1475
+    },
+    {
+      "epoch": 0.6102522375915378,
+      "grad_norm": 3.0561866760253906,
+      "learning_rate": 4.427365883062086e-05,
+      "loss": 0.0403,
+      "step": 1500
+    },
+    {
+      "epoch": 0.6204231082180635,
+      "grad_norm": 0.6851157546043396,
+      "learning_rate": 4.4085292344786016e-05,
+      "loss": 0.0405,
+      "step": 1525
+    },
+    {
+      "epoch": 0.6305939788445891,
+      "grad_norm": 0.5816906690597534,
+      "learning_rate": 4.389692585895118e-05,
+      "loss": 0.0465,
+      "step": 1550
+    },
+    {
+      "epoch": 0.6407648494711147,
+      "grad_norm": 1.0224462747573853,
+      "learning_rate": 4.370855937311634e-05,
+      "loss": 0.0419,
+      "step": 1575
+    },
+    {
+      "epoch": 0.6509357200976403,
+      "grad_norm": 0.4752540588378906,
+      "learning_rate": 4.3520192887281495e-05,
+      "loss": 0.0405,
+      "step": 1600
+    },
+    {
+      "epoch": 0.661106590724166,
+      "grad_norm": 1.1999990940093994,
+      "learning_rate": 4.333182640144666e-05,
+      "loss": 0.041,
+      "step": 1625
+    },
+    {
+      "epoch": 0.6712774613506917,
+      "grad_norm": 0.40787383913993835,
+      "learning_rate": 4.314345991561182e-05,
+      "loss": 0.0411,
+      "step": 1650
+    },
+    {
+      "epoch": 0.6814483319772172,
+      "grad_norm": 0.26120448112487793,
+      "learning_rate": 4.2955093429776974e-05,
+      "loss": 0.0401,
+      "step": 1675
+    },
+    {
+      "epoch": 0.6916192026037429,
+      "grad_norm": 0.5405380725860596,
+      "learning_rate": 4.276672694394214e-05,
+      "loss": 0.0387,
+      "step": 1700
+    },
+    {
+      "epoch": 0.7017900732302685,
+      "grad_norm": 0.2800443768501282,
+      "learning_rate": 4.2578360458107296e-05,
+      "loss": 0.0411,
+      "step": 1725
+    },
+    {
+      "epoch": 0.7119609438567941,
+      "grad_norm": 0.5752384662628174,
+      "learning_rate": 4.2389993972272454e-05,
+      "loss": 0.0404,
+      "step": 1750
+    },
+    {
+      "epoch": 0.7221318144833198,
+      "grad_norm": 0.778127133846283,
+      "learning_rate": 4.220162748643762e-05,
+      "loss": 0.0428,
+      "step": 1775
+    },
+    {
+      "epoch": 0.7323026851098454,
+      "grad_norm": 1.7695764303207397,
+      "learning_rate": 4.2013261000602775e-05,
+      "loss": 0.0353,
+      "step": 1800
+    },
+    {
+      "epoch": 0.7424735557363711,
+      "grad_norm": 0.2690475285053253,
+      "learning_rate": 4.182489451476794e-05,
+      "loss": 0.0404,
+      "step": 1825
+    },
+    {
+      "epoch": 0.7526444263628966,
+      "grad_norm": 0.506496012210846,
+      "learning_rate": 4.163652802893309e-05,
+      "loss": 0.0402,
+      "step": 1850
+    },
+    {
+      "epoch": 0.7628152969894223,
+      "grad_norm": 0.7220098972320557,
+      "learning_rate": 4.1448161543098255e-05,
+      "loss": 0.0372,
+      "step": 1875
+    },
+    {
+      "epoch": 0.772986167615948,
+      "grad_norm": 1.1999934911727905,
+      "learning_rate": 4.125979505726341e-05,
+      "loss": 0.0429,
+      "step": 1900
+    },
+    {
+      "epoch": 0.7831570382424735,
+      "grad_norm": 0.5055158734321594,
+      "learning_rate": 4.107142857142857e-05,
+      "loss": 0.0367,
+      "step": 1925
+    },
+    {
+      "epoch": 0.7933279088689992,
+      "grad_norm": 0.5015272498130798,
+      "learning_rate": 4.0883062085593734e-05,
+      "loss": 0.0376,
+      "step": 1950
+    },
+    {
+      "epoch": 0.8034987794955248,
+      "grad_norm": 0.6382879018783569,
+      "learning_rate": 4.069469559975889e-05,
+      "loss": 0.0387,
+      "step": 1975
+    },
+    {
+      "epoch": 0.8136696501220505,
+      "grad_norm": 1.4380210638046265,
+      "learning_rate": 4.050632911392405e-05,
+      "loss": 0.0433,
+      "step": 2000
+    },
+    {
+      "epoch": 0.823840520748576,
+      "grad_norm": 0.46868982911109924,
+      "learning_rate": 4.031796262808921e-05,
+      "loss": 0.0409,
+      "step": 2025
+    },
+    {
+      "epoch": 0.8340113913751017,
+      "grad_norm": 0.47722935676574707,
+      "learning_rate": 4.012959614225437e-05,
+      "loss": 0.0367,
+      "step": 2050
+    },
+    {
+      "epoch": 0.8441822620016274,
+      "grad_norm": 0.66898113489151,
+      "learning_rate": 3.9941229656419535e-05,
+      "loss": 0.0426,
+      "step": 2075
+    },
+    {
+      "epoch": 0.8543531326281529,
+      "grad_norm": 0.640957772731781,
+      "learning_rate": 3.975286317058469e-05,
+      "loss": 0.0374,
+      "step": 2100
+    },
+    {
+      "epoch": 0.8645240032546786,
+      "grad_norm": 2.0676088333129883,
+      "learning_rate": 3.956449668474985e-05,
+      "loss": 0.0391,
+      "step": 2125
+    },
+    {
+      "epoch": 0.8746948738812043,
+      "grad_norm": 0.3028632402420044,
+      "learning_rate": 3.9376130198915014e-05,
+      "loss": 0.0379,
+      "step": 2150
+    },
+    {
+      "epoch": 0.8848657445077298,
+      "grad_norm": 3.03582763671875,
+      "learning_rate": 3.918776371308017e-05,
+      "loss": 0.0396,
+      "step": 2175
+    },
+    {
+      "epoch": 0.8950366151342555,
+      "grad_norm": 0.5792508721351624,
+      "learning_rate": 3.899939722724533e-05,
+      "loss": 0.0393,
+      "step": 2200
+    },
+    {
+      "epoch": 0.9052074857607811,
+      "grad_norm": 0.6225530505180359,
+      "learning_rate": 3.8811030741410494e-05,
+      "loss": 0.0413,
+      "step": 2225
+    },
+    {
+      "epoch": 0.9153783563873068,
+      "grad_norm": 1.2196191549301147,
+      "learning_rate": 3.862266425557565e-05,
+      "loss": 0.0411,
+      "step": 2250
+    },
+    {
+      "epoch": 0.9255492270138324,
+      "grad_norm": 1.1619917154312134,
+      "learning_rate": 3.843429776974081e-05,
+      "loss": 0.0372,
+      "step": 2275
+    },
+    {
+      "epoch": 0.935720097640358,
+      "grad_norm": 0.4266558289527893,
+      "learning_rate": 3.8245931283905966e-05,
+      "loss": 0.0382,
+      "step": 2300
+    },
+    {
+      "epoch": 0.9458909682668837,
+      "grad_norm": 0.2716640830039978,
+      "learning_rate": 3.8057564798071124e-05,
+      "loss": 0.0364,
+      "step": 2325
+    },
+    {
+      "epoch": 0.9560618388934092,
+      "grad_norm": 0.3579002618789673,
+      "learning_rate": 3.787673297166968e-05,
+      "loss": 0.0397,
+      "step": 2350
+    },
+    {
+      "epoch": 0.9662327095199349,
+      "grad_norm": 0.3124788999557495,
+      "learning_rate": 3.768836648583484e-05,
+      "loss": 0.0367,
+      "step": 2375
+    },
+    {
+      "epoch": 0.9764035801464606,
+      "grad_norm": 0.7987418174743652,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.0425,
+      "step": 2400
+    },
+    {
+      "epoch": 0.9865744507729862,
+      "grad_norm": 0.623573899269104,
+      "learning_rate": 3.731163351416516e-05,
+      "loss": 0.0407,
+      "step": 2425
+    },
+    {
+      "epoch": 0.9967453213995118,
+      "grad_norm": 0.6313973665237427,
+      "learning_rate": 3.712326702833032e-05,
+      "loss": 0.0359,
+      "step": 2450
+    },
+    {
+      "epoch": 1.0,
+      "eval_explained_variance": 0.22635483741760254,
+      "eval_loss": 0.03618196025490761,
+      "eval_mae": 0.1520702838897705,
+      "eval_mse": 0.03618059679865837,
+      "eval_r2": 0.22514164447784424,
+      "eval_rmse": 0.19021197858877967,
+      "eval_runtime": 39.05,
+      "eval_samples_per_second": 503.405,
+      "eval_steps_per_second": 7.887,
+      "step": 2458
+    },
+    {
+      "epoch": 1.0069161920260374,
+      "grad_norm": 0.5591532588005066,
+      "learning_rate": 3.693490054249548e-05,
+      "loss": 0.0361,
+      "step": 2475
+    },
+    {
+      "epoch": 1.017087062652563,
+      "grad_norm": 0.35737472772598267,
+      "learning_rate": 3.674653405666064e-05,
+      "loss": 0.035,
+      "step": 2500
+    },
+    {
+      "epoch": 1.0272579332790888,
+      "grad_norm": 1.0595606565475464,
+      "learning_rate": 3.6558167570825805e-05,
+      "loss": 0.0374,
+      "step": 2525
+    },
+    {
+      "epoch": 1.0374288039056143,
+      "grad_norm": 0.42890357971191406,
+      "learning_rate": 3.636980108499096e-05,
+      "loss": 0.0366,
+      "step": 2550
+    },
+    {
+      "epoch": 1.0475996745321399,
+      "grad_norm": 0.41043972969055176,
+      "learning_rate": 3.618143459915612e-05,
+      "loss": 0.0377,
+      "step": 2575
+    },
+    {
+      "epoch": 1.0577705451586656,
+      "grad_norm": 0.5887218117713928,
+      "learning_rate": 3.5993068113321284e-05,
+      "loss": 0.0391,
+      "step": 2600
+    },
+    {
+      "epoch": 1.0679414157851912,
+      "grad_norm": 0.245023712515831,
+      "learning_rate": 3.5804701627486435e-05,
+      "loss": 0.032,
+      "step": 2625
+    },
+    {
+      "epoch": 1.0781122864117167,
+      "grad_norm": 0.4018308222293854,
+      "learning_rate": 3.56163351416516e-05,
+      "loss": 0.0401,
+      "step": 2650
+    },
+    {
+      "epoch": 1.0882831570382425,
+      "grad_norm": 1.287443995475769,
+      "learning_rate": 3.5427968655816756e-05,
+      "loss": 0.0366,
+      "step": 2675
+    },
+    {
+      "epoch": 1.098454027664768,
+      "grad_norm": 0.9983925223350525,
+      "learning_rate": 3.5239602169981914e-05,
+      "loss": 0.0408,
+      "step": 2700
+    },
+    {
+      "epoch": 1.1086248982912936,
+      "grad_norm": 1.045338749885559,
+      "learning_rate": 3.505123568414708e-05,
+      "loss": 0.0411,
+      "step": 2725
+    },
+    {
+      "epoch": 1.1187957689178194,
+      "grad_norm": 0.2734503746032715,
+      "learning_rate": 3.4862869198312236e-05,
+      "loss": 0.0373,
+      "step": 2750
+    },
+    {
+      "epoch": 1.128966639544345,
+      "grad_norm": 0.4091496169567108,
+      "learning_rate": 3.46745027124774e-05,
+      "loss": 0.0362,
+      "step": 2775
+    },
+    {
+      "epoch": 1.1391375101708707,
+      "grad_norm": 1.026307225227356,
+      "learning_rate": 3.448613622664256e-05,
+      "loss": 0.036,
+      "step": 2800
+    },
+    {
+      "epoch": 1.1493083807973963,
+      "grad_norm": 0.5424162149429321,
+      "learning_rate": 3.4297769740807715e-05,
+      "loss": 0.0362,
+      "step": 2825
+    },
+    {
+      "epoch": 1.1594792514239218,
+      "grad_norm": 0.4070860743522644,
+      "learning_rate": 3.410940325497288e-05,
+      "loss": 0.0338,
+      "step": 2850
+    },
+    {
+      "epoch": 1.1696501220504476,
+      "grad_norm": 0.861303448677063,
+      "learning_rate": 3.392103676913804e-05,
+      "loss": 0.0371,
+      "step": 2875
+    },
+    {
+      "epoch": 1.1798209926769732,
+      "grad_norm": 0.6860642433166504,
+      "learning_rate": 3.3732670283303194e-05,
+      "loss": 0.0369,
+      "step": 2900
+    },
+    {
+      "epoch": 1.1899918633034987,
+      "grad_norm": 0.6697980761528015,
+      "learning_rate": 3.354430379746836e-05,
+      "loss": 0.0379,
+      "step": 2925
+    },
+    {
+      "epoch": 1.2001627339300245,
+      "grad_norm": 0.6728507876396179,
+      "learning_rate": 3.3355937311633516e-05,
+      "loss": 0.038,
+      "step": 2950
+    },
+    {
+      "epoch": 1.21033360455655,
+      "grad_norm": 0.4306727349758148,
+      "learning_rate": 3.316757082579868e-05,
+      "loss": 0.0381,
+      "step": 2975
+    },
+    {
+      "epoch": 1.2205044751830756,
+      "grad_norm": 0.8999012112617493,
+      "learning_rate": 3.297920433996384e-05,
+      "loss": 0.0393,
+      "step": 3000
+    },
+    {
+      "epoch": 1.2306753458096014,
+      "grad_norm": 0.358962744474411,
+      "learning_rate": 3.2790837854128995e-05,
+      "loss": 0.0374,
+      "step": 3025
+    },
+    {
+      "epoch": 1.240846216436127,
+      "grad_norm": 0.8197824954986572,
+      "learning_rate": 3.260247136829416e-05,
+      "loss": 0.0391,
+      "step": 3050
+    },
+    {
+      "epoch": 1.2510170870626527,
+      "grad_norm": 0.6671149730682373,
+      "learning_rate": 3.241410488245931e-05,
+      "loss": 0.0357,
+      "step": 3075
+    },
+    {
+      "epoch": 1.2611879576891782,
+      "grad_norm": 0.8932905197143555,
+      "learning_rate": 3.2225738396624475e-05,
+      "loss": 0.038,
+      "step": 3100
+    },
+    {
+      "epoch": 1.2713588283157038,
+      "grad_norm": 0.3033260405063629,
+      "learning_rate": 3.203737191078963e-05,
+      "loss": 0.0364,
+      "step": 3125
+    },
+    {
+      "epoch": 1.2815296989422293,
+      "grad_norm": 0.6071414351463318,
+      "learning_rate": 3.184900542495479e-05,
+      "loss": 0.0325,
+      "step": 3150
+    },
+    {
+      "epoch": 1.2917005695687551,
+      "grad_norm": 0.28337907791137695,
+      "learning_rate": 3.1660638939119954e-05,
+      "loss": 0.0373,
+      "step": 3175
+    },
+    {
+      "epoch": 1.3018714401952807,
+      "grad_norm": 0.5393190979957581,
+      "learning_rate": 3.147227245328511e-05,
+      "loss": 0.0362,
+      "step": 3200
+    },
+    {
+      "epoch": 1.3120423108218064,
+      "grad_norm": 1.3214200735092163,
+      "learning_rate": 3.128390596745027e-05,
+      "loss": 0.0368,
+      "step": 3225
+    },
+    {
+      "epoch": 1.322213181448332,
+      "grad_norm": 0.5053825974464417,
+      "learning_rate": 3.109553948161543e-05,
+      "loss": 0.0375,
+      "step": 3250
+    },
+    {
+      "epoch": 1.3323840520748575,
+      "grad_norm": 0.4198523759841919,
+      "learning_rate": 3.090717299578059e-05,
+      "loss": 0.0404,
+      "step": 3275
+    },
+    {
+      "epoch": 1.342554922701383,
+      "grad_norm": 0.9187168478965759,
+      "learning_rate": 3.0718806509945755e-05,
+      "loss": 0.0374,
+      "step": 3300
+    },
+    {
+      "epoch": 1.3527257933279089,
+      "grad_norm": 0.3942495882511139,
+      "learning_rate": 3.053044002411091e-05,
+      "loss": 0.0362,
+      "step": 3325
+    },
+    {
+      "epoch": 1.3628966639544344,
+      "grad_norm": 0.6447917819023132,
+      "learning_rate": 3.0342073538276073e-05,
+      "loss": 0.038,
+      "step": 3350
+    },
+    {
+      "epoch": 1.3730675345809602,
+      "grad_norm": 0.3163827061653137,
+      "learning_rate": 3.015370705244123e-05,
+      "loss": 0.0369,
+      "step": 3375
+    },
+    {
+      "epoch": 1.3832384052074858,
+      "grad_norm": 0.3072253465652466,
+      "learning_rate": 2.9965340566606392e-05,
+      "loss": 0.0385,
+      "step": 3400
+    },
+    {
+      "epoch": 1.3934092758340113,
+      "grad_norm": 0.4808538556098938,
+      "learning_rate": 2.9776974080771553e-05,
+      "loss": 0.0378,
+      "step": 3425
+    },
+    {
+      "epoch": 1.403580146460537,
+      "grad_norm": 0.2448228895664215,
+      "learning_rate": 2.9588607594936714e-05,
+      "loss": 0.0337,
+      "step": 3450
+    },
+    {
+      "epoch": 1.4137510170870626,
+      "grad_norm": 0.42077022790908813,
+      "learning_rate": 2.940024110910187e-05,
+      "loss": 0.0359,
+      "step": 3475
+    },
+    {
+      "epoch": 1.4239218877135884,
+      "grad_norm": 0.48862871527671814,
+      "learning_rate": 2.921187462326703e-05,
+      "loss": 0.0378,
+      "step": 3500
+    },
+    {
+      "epoch": 1.434092758340114,
+      "grad_norm": 0.5912586450576782,
+      "learning_rate": 2.9023508137432186e-05,
+      "loss": 0.0384,
+      "step": 3525
+    },
+    {
+      "epoch": 1.4442636289666395,
+      "grad_norm": 0.4059402644634247,
+      "learning_rate": 2.8835141651597347e-05,
+      "loss": 0.0381,
+      "step": 3550
+    },
+    {
+      "epoch": 1.454434499593165,
+      "grad_norm": 0.3919837176799774,
+      "learning_rate": 2.8646775165762508e-05,
+      "loss": 0.036,
+      "step": 3575
+    },
+    {
+      "epoch": 1.4646053702196908,
+      "grad_norm": 0.2935680150985718,
+      "learning_rate": 2.845840867992767e-05,
+      "loss": 0.0368,
+      "step": 3600
+    },
+    {
+      "epoch": 1.4747762408462164,
+      "grad_norm": 0.7148743867874146,
+      "learning_rate": 2.8270042194092826e-05,
+      "loss": 0.0386,
+      "step": 3625
+    },
+    {
+      "epoch": 1.4849471114727422,
+      "grad_norm": 1.314514398574829,
+      "learning_rate": 2.8081675708257987e-05,
+      "loss": 0.0375,
+      "step": 3650
+    },
+    {
+      "epoch": 1.4951179820992677,
+      "grad_norm": 1.6261988878250122,
+      "learning_rate": 2.7893309222423148e-05,
+      "loss": 0.0376,
+      "step": 3675
+    },
+    {
+      "epoch": 1.5052888527257933,
+      "grad_norm": 1.105427861213684,
+      "learning_rate": 2.770494273658831e-05,
+      "loss": 0.0354,
+      "step": 3700
+    },
+    {
+      "epoch": 1.5154597233523188,
+      "grad_norm": 0.5577530264854431,
+      "learning_rate": 2.7516576250753466e-05,
+      "loss": 0.0362,
+      "step": 3725
+    },
+    {
+      "epoch": 1.5256305939788446,
+      "grad_norm": 0.49557003378868103,
+      "learning_rate": 2.7328209764918627e-05,
+      "loss": 0.0389,
+      "step": 3750
+    },
+    {
+      "epoch": 1.5358014646053704,
+      "grad_norm": 0.8892014622688293,
+      "learning_rate": 2.7139843279083788e-05,
+      "loss": 0.0379,
+      "step": 3775
+    },
+    {
+      "epoch": 1.545972335231896,
+      "grad_norm": 0.5090736150741577,
+      "learning_rate": 2.695147679324895e-05,
+      "loss": 0.0359,
+      "step": 3800
+    },
+    {
+      "epoch": 1.5561432058584215,
+      "grad_norm": 0.8963241577148438,
+      "learning_rate": 2.6763110307414107e-05,
+      "loss": 0.0421,
+      "step": 3825
+    },
+    {
+      "epoch": 1.566314076484947,
+      "grad_norm": 0.3889683485031128,
+      "learning_rate": 2.6574743821579268e-05,
+      "loss": 0.0362,
+      "step": 3850
+    },
+    {
+      "epoch": 1.5764849471114726,
+      "grad_norm": 0.6879289150238037,
+      "learning_rate": 2.638637733574443e-05,
+      "loss": 0.0367,
+      "step": 3875
+    },
+    {
+      "epoch": 1.5866558177379984,
+      "grad_norm": 1.1574759483337402,
+      "learning_rate": 2.619801084990959e-05,
+      "loss": 0.0387,
+      "step": 3900
+    },
+    {
+      "epoch": 1.5968266883645241,
+      "grad_norm": 1.3032798767089844,
+      "learning_rate": 2.6009644364074747e-05,
+      "loss": 0.0366,
+      "step": 3925
+    },
+    {
+      "epoch": 1.6069975589910497,
+      "grad_norm": 1.0193997621536255,
+      "learning_rate": 2.58212778782399e-05,
+      "loss": 0.0325,
+      "step": 3950
+    },
+    {
+      "epoch": 1.6171684296175752,
+      "grad_norm": 1.767223834991455,
+      "learning_rate": 2.5632911392405062e-05,
+      "loss": 0.0393,
+      "step": 3975
+    },
+    {
+      "epoch": 1.6273393002441008,
+      "grad_norm": 1.016648530960083,
+      "learning_rate": 2.5444544906570223e-05,
+      "loss": 0.0333,
+      "step": 4000
+    },
+    {
+      "epoch": 1.6375101708706266,
+      "grad_norm": 2.0735578536987305,
+      "learning_rate": 2.5256178420735384e-05,
+      "loss": 0.0355,
+      "step": 4025
+    },
+    {
+      "epoch": 1.647681041497152,
+      "grad_norm": 0.8982949256896973,
+      "learning_rate": 2.506781193490054e-05,
+      "loss": 0.0369,
+      "step": 4050
+    },
+    {
+      "epoch": 1.6578519121236779,
+      "grad_norm": 0.324400395154953,
+      "learning_rate": 2.4879445449065702e-05,
+      "loss": 0.0358,
+      "step": 4075
+    },
+    {
+      "epoch": 1.6680227827502034,
+      "grad_norm": 0.32701972126960754,
+      "learning_rate": 2.4691078963230863e-05,
+      "loss": 0.0336,
+      "step": 4100
+    },
+    {
+      "epoch": 1.678193653376729,
+      "grad_norm": 1.151262640953064,
+      "learning_rate": 2.4502712477396024e-05,
+      "loss": 0.0392,
+      "step": 4125
+    },
+    {
+      "epoch": 1.6883645240032545,
+      "grad_norm": 0.5716719627380371,
+      "learning_rate": 2.431434599156118e-05,
+      "loss": 0.0383,
+      "step": 4150
+    },
+    {
+      "epoch": 1.6985353946297803,
+      "grad_norm": 0.8748169541358948,
+      "learning_rate": 2.4125979505726342e-05,
+      "loss": 0.0344,
+      "step": 4175
+    },
+    {
+      "epoch": 1.708706265256306,
+      "grad_norm": 0.25271666049957275,
+      "learning_rate": 2.3937613019891503e-05,
+      "loss": 0.0351,
+      "step": 4200
+    },
+    {
+      "epoch": 1.7188771358828316,
+      "grad_norm": 0.23297059535980225,
+      "learning_rate": 2.3749246534056664e-05,
+      "loss": 0.0337,
+      "step": 4225
+    },
+    {
+      "epoch": 1.7290480065093572,
+      "grad_norm": 0.3409133851528168,
+      "learning_rate": 2.356088004822182e-05,
+      "loss": 0.0346,
+      "step": 4250
+    },
+    {
+      "epoch": 1.7392188771358827,
+      "grad_norm": 0.822523832321167,
+      "learning_rate": 2.337251356238698e-05,
+      "loss": 0.0374,
+      "step": 4275
+    },
+    {
+      "epoch": 1.7493897477624083,
+      "grad_norm": 1.9754129648208618,
+      "learning_rate": 2.318414707655214e-05,
+      "loss": 0.0409,
+      "step": 4300
+    },
+    {
+      "epoch": 1.759560618388934,
+      "grad_norm": 0.49358049035072327,
+      "learning_rate": 2.29957805907173e-05,
+      "loss": 0.0379,
+      "step": 4325
+    },
+    {
+      "epoch": 1.7697314890154598,
+      "grad_norm": 0.6075097918510437,
+      "learning_rate": 2.280741410488246e-05,
+      "loss": 0.0358,
+      "step": 4350
+    },
+    {
+      "epoch": 1.7799023596419854,
+      "grad_norm": 0.5666526556015015,
+      "learning_rate": 2.261904761904762e-05,
+      "loss": 0.0337,
+      "step": 4375
+    },
+    {
+      "epoch": 1.790073230268511,
+      "grad_norm": 0.7485412955284119,
+      "learning_rate": 2.243068113321278e-05,
+      "loss": 0.037,
+      "step": 4400
+    },
+    {
+      "epoch": 1.8002441008950365,
+      "grad_norm": 0.585403323173523,
+      "learning_rate": 2.224231464737794e-05,
+      "loss": 0.0379,
+      "step": 4425
+    },
+    {
+      "epoch": 1.8104149715215623,
+      "grad_norm": 0.7822312712669373,
+      "learning_rate": 2.2053948161543102e-05,
+      "loss": 0.0368,
+      "step": 4450
+    },
+    {
+      "epoch": 1.8205858421480878,
+      "grad_norm": 0.3547162413597107,
+      "learning_rate": 2.186558167570826e-05,
+      "loss": 0.038,
+      "step": 4475
+    },
+    {
+      "epoch": 1.8307567127746136,
+      "grad_norm": 0.5509994626045227,
+      "learning_rate": 2.1677215189873417e-05,
+      "loss": 0.0371,
+      "step": 4500
+    },
+    {
+      "epoch": 1.8409275834011392,
+      "grad_norm": 1.1359673738479614,
+      "learning_rate": 2.1488848704038578e-05,
+      "loss": 0.0321,
+      "step": 4525
+    },
+    {
+      "epoch": 1.8510984540276647,
+      "grad_norm": 0.7981705069541931,
+      "learning_rate": 2.130048221820374e-05,
+      "loss": 0.0369,
+      "step": 4550
+    },
+    {
+      "epoch": 1.8612693246541903,
+      "grad_norm": 0.3582057058811188,
+      "learning_rate": 2.11121157323689e-05,
+      "loss": 0.0412,
+      "step": 4575
+    },
+    {
+      "epoch": 1.871440195280716,
+      "grad_norm": 0.9928992986679077,
+      "learning_rate": 2.0923749246534057e-05,
+      "loss": 0.0352,
+      "step": 4600
+    },
+    {
+      "epoch": 1.8816110659072418,
+      "grad_norm": 0.48575785756111145,
+      "learning_rate": 2.0735382760699218e-05,
+      "loss": 0.035,
+      "step": 4625
+    },
+    {
+      "epoch": 1.8917819365337674,
+      "grad_norm": 0.5365208387374878,
+      "learning_rate": 2.054701627486438e-05,
+      "loss": 0.0379,
+      "step": 4650
+    },
+    {
+      "epoch": 1.901952807160293,
+      "grad_norm": 1.141358494758606,
+      "learning_rate": 2.0358649789029536e-05,
+      "loss": 0.0355,
+      "step": 4675
+    },
+    {
+      "epoch": 1.9121236777868185,
+      "grad_norm": 0.43180742859840393,
+      "learning_rate": 2.0170283303194694e-05,
+      "loss": 0.0354,
+      "step": 4700
+    },
+    {
+      "epoch": 1.922294548413344,
+      "grad_norm": 0.7140740752220154,
+      "learning_rate": 1.9981916817359855e-05,
+      "loss": 0.0355,
+      "step": 4725
+    },
+    {
+      "epoch": 1.9324654190398698,
+      "grad_norm": 0.30647122859954834,
+      "learning_rate": 1.9793550331525016e-05,
+      "loss": 0.0371,
+      "step": 4750
+    },
+    {
+      "epoch": 1.9426362896663956,
+      "grad_norm": 0.42196792364120483,
+      "learning_rate": 1.9605183845690177e-05,
+      "loss": 0.0356,
+      "step": 4775
+    },
+    {
+      "epoch": 1.9528071602929211,
+      "grad_norm": 0.6331903338432312,
+      "learning_rate": 1.9416817359855334e-05,
+      "loss": 0.0352,
+      "step": 4800
+    },
+    {
+      "epoch": 1.9629780309194467,
+      "grad_norm": 0.7057808637619019,
+      "learning_rate": 1.9228450874020495e-05,
+      "loss": 0.0364,
+      "step": 4825
+    },
+    {
+      "epoch": 1.9731489015459722,
+      "grad_norm": 0.49434205889701843,
+      "learning_rate": 1.9040084388185656e-05,
+      "loss": 0.0347,
+      "step": 4850
+    },
+    {
+      "epoch": 1.983319772172498,
+      "grad_norm": 0.3139288127422333,
+      "learning_rate": 1.8851717902350817e-05,
+      "loss": 0.0364,
+      "step": 4875
+    },
+    {
+      "epoch": 1.9934906427990235,
+      "grad_norm": 0.3922992944717407,
+      "learning_rate": 1.8663351416515974e-05,
+      "loss": 0.036,
+      "step": 4900
+    },
+    {
+      "epoch": 2.0,
+      "eval_explained_variance": 0.25461888313293457,
+      "eval_loss": 0.03482421860098839,
+      "eval_mae": 0.14410310983657837,
+      "eval_mse": 0.0348237045109272,
+      "eval_r2": 0.25420135259628296,
+      "eval_rmse": 0.18661110500430353,
+      "eval_runtime": 39.031,
+      "eval_samples_per_second": 503.652,
+      "eval_steps_per_second": 7.891,
+      "step": 4916
+    },
+    {
+      "epoch": 2.0036615134255493,
+      "grad_norm": 0.5134842395782471,
+      "learning_rate": 1.8474984930681132e-05,
+      "loss": 0.0344,
+      "step": 4925
+    },
+    {
+      "epoch": 2.013832384052075,
+      "grad_norm": 0.3377295136451721,
+      "learning_rate": 1.8286618444846293e-05,
+      "loss": 0.0336,
+      "step": 4950
+    },
+    {
+      "epoch": 2.0240032546786004,
+      "grad_norm": 0.3855837285518646,
+      "learning_rate": 1.8098251959011453e-05,
+      "loss": 0.0316,
+      "step": 4975
+    },
+    {
+      "epoch": 2.034174125305126,
+      "grad_norm": 0.4808228313922882,
+      "learning_rate": 1.7909885473176614e-05,
+      "loss": 0.0347,
+      "step": 5000
+    },
+    {
+      "epoch": 2.0443449959316515,
+      "grad_norm": 0.6781342029571533,
+      "learning_rate": 1.7721518987341772e-05,
+      "loss": 0.0366,
+      "step": 5025
+    },
+    {
+      "epoch": 2.0545158665581775,
+      "grad_norm": 0.5457364320755005,
+      "learning_rate": 1.7533152501506933e-05,
+      "loss": 0.0326,
+      "step": 5050
+    },
+    {
+      "epoch": 2.064686737184703,
+      "grad_norm": 1.1539140939712524,
+      "learning_rate": 1.7344786015672094e-05,
+      "loss": 0.032,
+      "step": 5075
+    },
+    {
+      "epoch": 2.0748576078112286,
+      "grad_norm": 0.7635537981987,
+      "learning_rate": 1.7156419529837255e-05,
+      "loss": 0.0318,
+      "step": 5100
+    },
+    {
+      "epoch": 2.085028478437754,
+      "grad_norm": 0.8772742748260498,
+      "learning_rate": 1.6968053044002412e-05,
+      "loss": 0.0337,
+      "step": 5125
+    },
+    {
+      "epoch": 2.0951993490642797,
+      "grad_norm": 0.45236992835998535,
+      "learning_rate": 1.677968655816757e-05,
+      "loss": 0.031,
+      "step": 5150
+    },
+    {
+      "epoch": 2.1053702196908057,
+      "grad_norm": 0.5050310492515564,
+      "learning_rate": 1.659132007233273e-05,
+      "loss": 0.031,
+      "step": 5175
+    },
+    {
+      "epoch": 2.1155410903173313,
+      "grad_norm": 0.442862331867218,
+      "learning_rate": 1.640295358649789e-05,
+      "loss": 0.0342,
+      "step": 5200
+    },
+    {
+      "epoch": 2.125711960943857,
+      "grad_norm": 0.5236470103263855,
+      "learning_rate": 1.6214587100663052e-05,
+      "loss": 0.0372,
+      "step": 5225
+    },
+    {
+      "epoch": 2.1358828315703824,
+      "grad_norm": 0.9813937544822693,
+      "learning_rate": 1.602622061482821e-05,
+      "loss": 0.0326,
+      "step": 5250
+    },
+    {
+      "epoch": 2.146053702196908,
+      "grad_norm": 0.349025696516037,
+      "learning_rate": 1.583785412899337e-05,
+      "loss": 0.0346,
+      "step": 5275
+    },
+    {
+      "epoch": 2.1562245728234335,
+      "grad_norm": 0.35612091422080994,
+      "learning_rate": 1.564948764315853e-05,
+      "loss": 0.0342,
+      "step": 5300
+    },
+    {
+      "epoch": 2.1663954434499595,
+      "grad_norm": 0.5912727117538452,
+      "learning_rate": 1.5461121157323692e-05,
+      "loss": 0.0324,
+      "step": 5325
+    },
+    {
+      "epoch": 2.176566314076485,
+      "grad_norm": 0.2870270609855652,
+      "learning_rate": 1.5272754671488847e-05,
+      "loss": 0.0342,
+      "step": 5350
+    },
+    {
+      "epoch": 2.1867371847030106,
+      "grad_norm": 0.3680706322193146,
+      "learning_rate": 1.5084388185654007e-05,
+      "loss": 0.0329,
+      "step": 5375
+    },
+    {
+      "epoch": 2.196908055329536,
+      "grad_norm": 0.9814783930778503,
+      "learning_rate": 1.4896021699819168e-05,
+      "loss": 0.0293,
+      "step": 5400
+    },
+    {
+      "epoch": 2.2070789259560617,
+      "grad_norm": 0.7239277958869934,
+      "learning_rate": 1.4707655213984328e-05,
+      "loss": 0.0346,
+      "step": 5425
+    },
+    {
+      "epoch": 2.2172497965825873,
+      "grad_norm": 0.44417452812194824,
+      "learning_rate": 1.4519288728149488e-05,
+      "loss": 0.0339,
+      "step": 5450
+    },
+    {
+      "epoch": 2.2274206672091132,
+      "grad_norm": 0.3636336326599121,
+      "learning_rate": 1.4330922242314648e-05,
+      "loss": 0.0327,
+      "step": 5475
+    },
+    {
+      "epoch": 2.237591537835639,
+      "grad_norm": 0.2732349634170532,
+      "learning_rate": 1.4142555756479809e-05,
+      "loss": 0.032,
+      "step": 5500
+    },
+    {
+      "epoch": 2.2477624084621644,
+      "grad_norm": 0.820342481136322,
+      "learning_rate": 1.3954189270644968e-05,
+      "loss": 0.0318,
+      "step": 5525
+    },
+    {
+      "epoch": 2.25793327908869,
+      "grad_norm": 0.31075552105903625,
+      "learning_rate": 1.3765822784810129e-05,
+      "loss": 0.0311,
+      "step": 5550
+    },
+    {
+      "epoch": 2.2681041497152155,
+      "grad_norm": 0.8737571835517883,
+      "learning_rate": 1.3577456298975286e-05,
+      "loss": 0.0356,
+      "step": 5575
+    },
+    {
+      "epoch": 2.2782750203417415,
+      "grad_norm": 0.9981245994567871,
+      "learning_rate": 1.3389089813140445e-05,
+      "loss": 0.0333,
+      "step": 5600
+    },
+    {
+      "epoch": 2.288445890968267,
+      "grad_norm": 0.5384612679481506,
+      "learning_rate": 1.3200723327305606e-05,
+      "loss": 0.0316,
+      "step": 5625
+    },
+    {
+      "epoch": 2.2986167615947926,
+      "grad_norm": 0.6893337965011597,
+      "learning_rate": 1.3012356841470765e-05,
+      "loss": 0.0324,
+      "step": 5650
+    },
+    {
+      "epoch": 2.308787632221318,
+      "grad_norm": 0.449916273355484,
+      "learning_rate": 1.2823990355635926e-05,
+      "loss": 0.0325,
+      "step": 5675
+    },
+    {
+      "epoch": 2.3189585028478437,
+      "grad_norm": 0.38824161887168884,
+      "learning_rate": 1.2635623869801086e-05,
+      "loss": 0.0339,
+      "step": 5700
+    },
+    {
+      "epoch": 2.329129373474369,
+      "grad_norm": 0.7458836436271667,
+      "learning_rate": 1.2447257383966246e-05,
+      "loss": 0.0355,
+      "step": 5725
+    },
+    {
+      "epoch": 2.339300244100895,
+      "grad_norm": 0.47954612970352173,
+      "learning_rate": 1.2258890898131404e-05,
+      "loss": 0.0323,
+      "step": 5750
+    },
+    {
+      "epoch": 2.3494711147274208,
+      "grad_norm": 0.42400848865509033,
+      "learning_rate": 1.2070524412296565e-05,
+      "loss": 0.0302,
+      "step": 5775
+    },
+    {
+      "epoch": 2.3596419853539463,
+      "grad_norm": 1.189965009689331,
+      "learning_rate": 1.1882157926461724e-05,
+      "loss": 0.0338,
+      "step": 5800
+    },
+    {
+      "epoch": 2.369812855980472,
+      "grad_norm": 0.5762277841567993,
+      "learning_rate": 1.1693791440626885e-05,
+      "loss": 0.0332,
+      "step": 5825
+    },
+    {
+      "epoch": 2.3799837266069974,
+      "grad_norm": 0.5994691848754883,
+      "learning_rate": 1.1505424954792044e-05,
+      "loss": 0.0364,
+      "step": 5850
+    },
+    {
+      "epoch": 2.390154597233523,
+      "grad_norm": 0.9533575773239136,
+      "learning_rate": 1.1317058468957203e-05,
+      "loss": 0.0326,
+      "step": 5875
+    },
+    {
+      "epoch": 2.400325467860049,
+      "grad_norm": 0.4238649308681488,
+      "learning_rate": 1.1128691983122364e-05,
+      "loss": 0.034,
+      "step": 5900
+    },
+    {
+      "epoch": 2.4104963384865745,
+      "grad_norm": 0.8726415038108826,
+      "learning_rate": 1.0940325497287523e-05,
+      "loss": 0.0327,
+      "step": 5925
+    },
+    {
+      "epoch": 2.4206672091131,
+      "grad_norm": 0.5922726988792419,
+      "learning_rate": 1.0751959011452683e-05,
+      "loss": 0.0337,
+      "step": 5950
+    },
+    {
+      "epoch": 2.4308380797396256,
+      "grad_norm": 0.3707614839076996,
+      "learning_rate": 1.0563592525617842e-05,
+      "loss": 0.0338,
+      "step": 5975
+    },
+    {
+      "epoch": 2.441008950366151,
+      "grad_norm": 0.4853639602661133,
+      "learning_rate": 1.0375226039783003e-05,
+      "loss": 0.0317,
+      "step": 6000
+    },
+    {
+      "epoch": 2.451179820992677,
+      "grad_norm": 0.8022235631942749,
+      "learning_rate": 1.0186859553948162e-05,
+      "loss": 0.032,
+      "step": 6025
+    },
+    {
+      "epoch": 2.4613506916192027,
+      "grad_norm": 0.8553130030632019,
+      "learning_rate": 9.998493068113323e-06,
+      "loss": 0.0312,
+      "step": 6050
+    },
+    {
+      "epoch": 2.4715215622457283,
+      "grad_norm": 0.4112774431705475,
+      "learning_rate": 9.81012658227848e-06,
+      "loss": 0.0349,
+      "step": 6075
+    },
+    {
+      "epoch": 2.481692432872254,
+      "grad_norm": 0.8546609282493591,
+      "learning_rate": 9.621760096443641e-06,
+      "loss": 0.0332,
+      "step": 6100
+    },
+    {
+      "epoch": 2.4918633034987794,
+      "grad_norm": 0.8445001840591431,
+      "learning_rate": 9.4333936106088e-06,
+      "loss": 0.0324,
+      "step": 6125
+    },
+    {
+      "epoch": 2.5020341741253054,
+      "grad_norm": 0.4580422043800354,
+      "learning_rate": 9.245027124773961e-06,
+      "loss": 0.0346,
+      "step": 6150
+    },
+    {
+      "epoch": 2.5122050447518305,
+      "grad_norm": 0.6121585369110107,
+      "learning_rate": 9.05666063893912e-06,
+      "loss": 0.0345,
+      "step": 6175
+    },
+    {
+      "epoch": 2.5223759153783565,
+      "grad_norm": 0.5637044906616211,
+      "learning_rate": 8.86829415310428e-06,
+      "loss": 0.0315,
+      "step": 6200
+    },
+    {
+      "epoch": 2.532546786004882,
+      "grad_norm": 0.6579483151435852,
+      "learning_rate": 8.67992766726944e-06,
+      "loss": 0.0345,
+      "step": 6225
+    },
+    {
+      "epoch": 2.5427176566314076,
+      "grad_norm": 0.30682843923568726,
+      "learning_rate": 8.499095840867993e-06,
+      "loss": 0.0334,
+      "step": 6250
+    },
+    {
+      "epoch": 2.552888527257933,
+      "grad_norm": 1.7261478900909424,
+      "learning_rate": 8.310729355033153e-06,
+      "loss": 0.0337,
+      "step": 6275
+    },
+    {
+      "epoch": 2.5630593978844587,
+      "grad_norm": 0.7609931826591492,
+      "learning_rate": 8.122362869198312e-06,
+      "loss": 0.0329,
+      "step": 6300
+    },
+    {
+      "epoch": 2.5732302685109847,
+      "grad_norm": 1.1947487592697144,
+      "learning_rate": 7.933996383363473e-06,
+      "loss": 0.0338,
+      "step": 6325
+    },
+    {
+      "epoch": 2.5834011391375102,
+      "grad_norm": 0.5045105814933777,
+      "learning_rate": 7.745629897528632e-06,
+      "loss": 0.0336,
+      "step": 6350
+    },
+    {
+      "epoch": 2.593572009764036,
+      "grad_norm": 0.8998399972915649,
+      "learning_rate": 7.557263411693792e-06,
+      "loss": 0.0334,
+      "step": 6375
+    },
+    {
+      "epoch": 2.6037428803905613,
+      "grad_norm": 0.3800385594367981,
+      "learning_rate": 7.368896925858952e-06,
+      "loss": 0.0306,
+      "step": 6400
+    },
+    {
+      "epoch": 2.613913751017087,
+      "grad_norm": 0.35073891282081604,
+      "learning_rate": 7.180530440024111e-06,
+      "loss": 0.0342,
+      "step": 6425
+    },
+    {
+      "epoch": 2.624084621643613,
+      "grad_norm": 0.35614126920700073,
+      "learning_rate": 6.992163954189271e-06,
+      "loss": 0.0317,
+      "step": 6450
+    },
+    {
+      "epoch": 2.6342554922701384,
+      "grad_norm": 1.0959842205047607,
+      "learning_rate": 6.8037974683544305e-06,
+      "loss": 0.0328,
+      "step": 6475
+    },
+    {
+      "epoch": 2.644426362896664,
+      "grad_norm": 0.9010970592498779,
+      "learning_rate": 6.6154309825195905e-06,
+      "loss": 0.0364,
+      "step": 6500
+    },
+    {
+      "epoch": 2.6545972335231895,
+      "grad_norm": 0.8300909996032715,
+      "learning_rate": 6.42706449668475e-06,
+      "loss": 0.0312,
+      "step": 6525
+    },
+    {
+      "epoch": 2.664768104149715,
+      "grad_norm": 0.7244754433631897,
+      "learning_rate": 6.23869801084991e-06,
+      "loss": 0.0319,
+      "step": 6550
+    },
+    {
+      "epoch": 2.674938974776241,
+      "grad_norm": 1.3230552673339844,
+      "learning_rate": 6.05033152501507e-06,
+      "loss": 0.0328,
+      "step": 6575
+    },
+    {
+      "epoch": 2.685109845402766,
+      "grad_norm": 0.437537282705307,
+      "learning_rate": 5.861965039180229e-06,
+      "loss": 0.0325,
+      "step": 6600
+    },
+    {
+      "epoch": 2.695280716029292,
+      "grad_norm": 0.4210902154445648,
+      "learning_rate": 5.673598553345389e-06,
+      "loss": 0.0362,
+      "step": 6625
+    },
+    {
+      "epoch": 2.7054515866558178,
+      "grad_norm": 0.3914755880832672,
+      "learning_rate": 5.485232067510549e-06,
+      "loss": 0.0329,
+      "step": 6650
+    },
+    {
+      "epoch": 2.7156224572823433,
+      "grad_norm": 0.9759465456008911,
+      "learning_rate": 5.296865581675708e-06,
+      "loss": 0.0343,
+      "step": 6675
+    },
+    {
+      "epoch": 2.725793327908869,
+      "grad_norm": 0.34633737802505493,
+      "learning_rate": 5.108499095840868e-06,
+      "loss": 0.0308,
+      "step": 6700
+    },
+    {
+      "epoch": 2.7359641985353944,
+      "grad_norm": 0.5408746600151062,
+      "learning_rate": 4.9201326100060275e-06,
+      "loss": 0.0285,
+      "step": 6725
+    },
+    {
+      "epoch": 2.7461350691619204,
+      "grad_norm": 0.3921310007572174,
+      "learning_rate": 4.7317661241711876e-06,
+      "loss": 0.0307,
+      "step": 6750
+    },
+    {
+      "epoch": 2.756305939788446,
+      "grad_norm": 0.6094385981559753,
+      "learning_rate": 4.543399638336348e-06,
+      "loss": 0.0303,
+      "step": 6775
+    },
+    {
+      "epoch": 2.7664768104149715,
+      "grad_norm": 0.5900077819824219,
+      "learning_rate": 4.355033152501508e-06,
+      "loss": 0.0355,
+      "step": 6800
+    },
+    {
+      "epoch": 2.776647681041497,
+      "grad_norm": 0.4339945912361145,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 0.0351,
+      "step": 6825
+    },
+    {
+      "epoch": 2.7868185516680226,
+      "grad_norm": 0.9042001962661743,
+      "learning_rate": 3.978300180831827e-06,
+      "loss": 0.033,
+      "step": 6850
+    },
+    {
+      "epoch": 2.7969894222945486,
+      "grad_norm": 0.5715941190719604,
+      "learning_rate": 3.789933694996986e-06,
+      "loss": 0.0325,
+      "step": 6875
+    },
+    {
+      "epoch": 2.807160292921074,
+      "grad_norm": 0.40120917558670044,
+      "learning_rate": 3.601567209162146e-06,
+      "loss": 0.0324,
+      "step": 6900
+    },
+    {
+      "epoch": 2.8173311635475997,
+      "grad_norm": 0.636159360408783,
+      "learning_rate": 3.413200723327306e-06,
+      "loss": 0.0311,
+      "step": 6925
+    },
+    {
+      "epoch": 2.8275020341741253,
+      "grad_norm": 0.79677414894104,
+      "learning_rate": 3.2248342374924654e-06,
+      "loss": 0.0298,
+      "step": 6950
+    },
+    {
+      "epoch": 2.837672904800651,
+      "grad_norm": 0.6220082640647888,
+      "learning_rate": 3.036467751657625e-06,
+      "loss": 0.0315,
+      "step": 6975
+    },
+    {
+      "epoch": 2.847843775427177,
+      "grad_norm": 0.4538786709308624,
+      "learning_rate": 2.848101265822785e-06,
+      "loss": 0.032,
+      "step": 7000
+    },
+    {
+      "epoch": 2.858014646053702,
+      "grad_norm": 0.44975048303604126,
+      "learning_rate": 2.6597347799879447e-06,
+      "loss": 0.0314,
+      "step": 7025
+    },
+    {
+      "epoch": 2.868185516680228,
+      "grad_norm": 0.2438650280237198,
+      "learning_rate": 2.4713682941531043e-06,
+      "loss": 0.0322,
+      "step": 7050
+    },
+    {
+      "epoch": 2.8783563873067535,
+      "grad_norm": 0.9189873337745667,
+      "learning_rate": 2.2830018083182644e-06,
+      "loss": 0.0315,
+      "step": 7075
+    },
+    {
+      "epoch": 2.888527257933279,
+      "grad_norm": 0.31788191199302673,
+      "learning_rate": 2.094635322483424e-06,
+      "loss": 0.0287,
+      "step": 7100
+    },
+    {
+      "epoch": 2.8986981285598046,
+      "grad_norm": 0.7033805847167969,
+      "learning_rate": 1.9062688366485836e-06,
+      "loss": 0.0331,
+      "step": 7125
+    },
+    {
+      "epoch": 2.90886899918633,
+      "grad_norm": 0.3187176287174225,
+      "learning_rate": 1.7179023508137434e-06,
+      "loss": 0.0349,
+      "step": 7150
+    },
+    {
+      "epoch": 2.919039869812856,
+      "grad_norm": 0.3502849042415619,
+      "learning_rate": 1.529535864978903e-06,
+      "loss": 0.0314,
+      "step": 7175
+    },
+    {
+      "epoch": 2.9292107404393817,
+      "grad_norm": 0.38132113218307495,
+      "learning_rate": 1.3411693791440627e-06,
+      "loss": 0.0307,
+      "step": 7200
+    },
+    {
+      "epoch": 2.9393816110659072,
+      "grad_norm": 0.335792601108551,
+      "learning_rate": 1.1528028933092225e-06,
+      "loss": 0.0329,
+      "step": 7225
+    },
+    {
+      "epoch": 2.949552481692433,
+      "grad_norm": 0.43150436878204346,
+      "learning_rate": 9.644364074743821e-07,
+      "loss": 0.0279,
+      "step": 7250
+    },
+    {
+      "epoch": 2.9597233523189583,
+      "grad_norm": 0.43568554520606995,
+      "learning_rate": 7.76069921639542e-07,
+      "loss": 0.0297,
+      "step": 7275
+    },
+    {
+      "epoch": 2.9698942229454843,
+      "grad_norm": 0.2997362017631531,
+      "learning_rate": 5.877034358047017e-07,
+      "loss": 0.0315,
+      "step": 7300
+    },
+    {
+      "epoch": 2.98006509357201,
+      "grad_norm": 0.555476725101471,
+      "learning_rate": 3.993369499698613e-07,
+      "loss": 0.0296,
+      "step": 7325
+    },
+    {
+      "epoch": 2.9902359641985354,
+      "grad_norm": 0.31480032205581665,
+      "learning_rate": 2.1097046413502108e-07,
+      "loss": 0.0334,
+      "step": 7350
+    },
+    {
+      "epoch": 3.0,
+      "eval_explained_variance": 0.273209810256958,
+      "eval_loss": 0.03396870195865631,
+      "eval_mae": 0.14112502336502075,
+      "eval_mse": 0.03396843746304512,
+      "eval_r2": 0.2725181579589844,
+      "eval_rmse": 0.18430528332916862,
+      "eval_runtime": 39.0131,
+      "eval_samples_per_second": 503.882,
+      "eval_steps_per_second": 7.895,
+      "step": 7374
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 7374,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 2
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.937435559513293e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-7374/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48650044b1efcc5cbfbdc6ba568b695fdd8577f68a6f2cd3a98c3c5b1b5be2c4
+size 5368

config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_name_or_path": "allenai/longformer-base-4096",
+  "_num_labels": 1,
+  "architectures": [
+    "LongformerForSequenceClassification"
+  ],
+  "attention_mode": "longformer",
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "target"
+  },
+  "ignore_attention_mask": false,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "target": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 4098,
+  "model_type": "longformer",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "onnx_export": false,
+  "pad_token_id": 1,
+  "problem_type": "regression",
+  "sep_token_id": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9242fc9cfb1524a85761f4f00c1b4ff8956fb34fb58246044b0f95b84eb92733
+size 594675108

runs/Apr04_07-57-07_r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo/events.out.tfevents.1743753431.r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo.216.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fc5f0e4a2dc0975a4ef0900d090a8393a9d009389d6edf1d5f4d125cfde19a4
-size 63364

 version https://git-lfs.github.com/spec/v1
+oid sha256:250f9914259741f9fa3fcc5fdde59155eb9579c874d609546a17e8c87b325d55
+size 69303

runs/Apr04_07-57-07_r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo/events.out.tfevents.1743756139.r-samtuckervegan-autotrain-advanced-3lepu8o1-e749d-y1ybo.216.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2b5cf7b48ec8903d8ed75029ec22b5e1cc4c561e160df0503fdabf63c47543b
+size 609

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "LongformerTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48650044b1efcc5cbfbdc6ba568b695fdd8577f68a6f2cd3a98c3c5b1b5be2c4
+size 5368

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "samtuckervegan/text_performance",
+    "model": "allenai/longformer-base-4096",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 128,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "test",
+    "text_column": "text",
+    "target_column": "target",
+    "logging_steps": -1,
+    "project_name": "text-performance-longformer",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "samtuckervegan",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff