spacesedan commited on Mar 19, 2025

Commit

ca3d9c0

verified ·

1 Parent(s): 147eec7

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +39 -0
checkpoint-1250/config.json +59 -0
checkpoint-1250/model.safetensors +3 -0
checkpoint-1250/optimizer.pt +3 -0
checkpoint-1250/rng_state.pth +3 -0
checkpoint-1250/scheduler.pt +3 -0
checkpoint-1250/trainer_state.json +1306 -0
checkpoint-1250/training_args.bin +3 -0
config.json +59 -0
merges.txt +0 -0
model.safetensors +3 -0
runs/Mar19_20-59-07_r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h/events.out.tfevents.1742417949.r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h.71.0 +2 -2
runs/Mar19_20-59-07_r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h/events.out.tfevents.1742421321.r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h.71.1 +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,39 @@

+---
+library_name: transformers
+tags:
+- autotrain
+- text-classification
+base_model: allenai/longformer-base-4096
+widget:
+- text: "I love AutoTrain"
+datasets:
+- spacesedan/Amazon_Reviews_Sentiment_10K
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 1.0463045835494995
+f1_macro: 0.509019730829119
+f1_micro: 0.5233333333333333
+f1_weighted: 0.516464791683911
+precision_macro: 0.5322711463107999
+precision_micro: 0.5233333333333333
+precision_weighted: 0.5447555045714482
+recall_macro: 0.5207324546507
+recall_micro: 0.5233333333333333
+recall_weighted: 0.5233333333333333
+accuracy: 0.5233333333333333

checkpoint-1250/config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "_name_or_path": "allenai/longformer-base-4096",
+  "_num_labels": 5,
+  "architectures": [
+    "LongformerForSequenceClassification"
+  ],
+  "attention_mode": "longformer",
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "very negative",
+    "1": "negative",
+    "2": "neutral",
+    "3": "positive",
+    "4": "very positive"
+  },
+  "ignore_attention_mask": false,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "negative": 1,
+    "neutral": 2,
+    "positive": 3,
+    "very negative": 0,
+    "very positive": 4
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 4098,
+  "model_type": "longformer",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "onnx_export": false,
+  "pad_token_id": 1,
+  "problem_type": "single_label_classification",
+  "sep_token_id": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

checkpoint-1250/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ea5a2958a9736d967d763abc8f3d7f4a1bebfee2308ff3232a133fdae09f851
+size 594687412

checkpoint-1250/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad0c96183a09d85452ddb4c7a1f516923b5c210a73709dcf2ec969e93ede113c
+size 1189534778

checkpoint-1250/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99a38c0987fd8f1754e2f11f40625624946104dfafd27ef602c27a3834a4cd57
+size 14244

checkpoint-1250/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88e1002e9313aa09f72c809ac17507e9e7c74ef6c0260b03690746a5ae7ee4ca
+size 1064

checkpoint-1250/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1306 @@

+{
+  "best_metric": 1.0463045835494995,
+  "best_model_checkpoint": "sentiment-analysis-longformer/checkpoint-1250",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1250,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0056,
+      "grad_norm": 4.3044047355651855,
+      "learning_rate": 8.000000000000001e-07,
+      "loss": 1.6185,
+      "step": 7
+    },
+    {
+      "epoch": 0.0112,
+      "grad_norm": 4.809700965881348,
+      "learning_rate": 1.7333333333333334e-06,
+      "loss": 1.6057,
+      "step": 14
+    },
+    {
+      "epoch": 0.0168,
+      "grad_norm": 2.63266921043396,
+      "learning_rate": 2.666666666666667e-06,
+      "loss": 1.6137,
+      "step": 21
+    },
+    {
+      "epoch": 0.0224,
+      "grad_norm": 3.832202196121216,
+      "learning_rate": 3.6e-06,
+      "loss": 1.6188,
+      "step": 28
+    },
+    {
+      "epoch": 0.028,
+      "grad_norm": 2.835721731185913,
+      "learning_rate": 4.533333333333334e-06,
+      "loss": 1.6015,
+      "step": 35
+    },
+    {
+      "epoch": 0.0336,
+      "grad_norm": 3.497213125228882,
+      "learning_rate": 5.466666666666667e-06,
+      "loss": 1.5799,
+      "step": 42
+    },
+    {
+      "epoch": 0.0392,
+      "grad_norm": 6.252377986907959,
+      "learning_rate": 6.4000000000000006e-06,
+      "loss": 1.6329,
+      "step": 49
+    },
+    {
+      "epoch": 0.0448,
+      "grad_norm": 3.252988576889038,
+      "learning_rate": 7.333333333333334e-06,
+      "loss": 1.6139,
+      "step": 56
+    },
+    {
+      "epoch": 0.0504,
+      "grad_norm": 3.3185133934020996,
+      "learning_rate": 8.266666666666667e-06,
+      "loss": 1.6002,
+      "step": 63
+    },
+    {
+      "epoch": 0.056,
+      "grad_norm": 1.9004288911819458,
+      "learning_rate": 9.2e-06,
+      "loss": 1.6223,
+      "step": 70
+    },
+    {
+      "epoch": 0.0616,
+      "grad_norm": 3.372849702835083,
+      "learning_rate": 1.0133333333333333e-05,
+      "loss": 1.6073,
+      "step": 77
+    },
+    {
+      "epoch": 0.0672,
+      "grad_norm": 5.070404052734375,
+      "learning_rate": 1.1066666666666667e-05,
+      "loss": 1.6172,
+      "step": 84
+    },
+    {
+      "epoch": 0.0728,
+      "grad_norm": 4.0468831062316895,
+      "learning_rate": 1.2e-05,
+      "loss": 1.6146,
+      "step": 91
+    },
+    {
+      "epoch": 0.0784,
+      "grad_norm": 3.557427167892456,
+      "learning_rate": 1.2933333333333334e-05,
+      "loss": 1.6143,
+      "step": 98
+    },
+    {
+      "epoch": 0.084,
+      "grad_norm": 2.1088807582855225,
+      "learning_rate": 1.3866666666666667e-05,
+      "loss": 1.594,
+      "step": 105
+    },
+    {
+      "epoch": 0.0896,
+      "grad_norm": 7.924050807952881,
+      "learning_rate": 1.48e-05,
+      "loss": 1.5701,
+      "step": 112
+    },
+    {
+      "epoch": 0.0952,
+      "grad_norm": 11.886157989501953,
+      "learning_rate": 1.5733333333333334e-05,
+      "loss": 1.4665,
+      "step": 119
+    },
+    {
+      "epoch": 0.1008,
+      "grad_norm": NaN,
+      "learning_rate": 1.6533333333333333e-05,
+      "loss": 1.4903,
+      "step": 126
+    },
+    {
+      "epoch": 0.1064,
+      "grad_norm": 22.54196548461914,
+      "learning_rate": 1.7466666666666667e-05,
+      "loss": 1.3656,
+      "step": 133
+    },
+    {
+      "epoch": 0.112,
+      "grad_norm": 7.953355312347412,
+      "learning_rate": 1.826666666666667e-05,
+      "loss": 1.3593,
+      "step": 140
+    },
+    {
+      "epoch": 0.1176,
+      "grad_norm": 7.947963237762451,
+      "learning_rate": 1.9200000000000003e-05,
+      "loss": 1.3578,
+      "step": 147
+    },
+    {
+      "epoch": 0.1232,
+      "grad_norm": 37.732215881347656,
+      "learning_rate": 2.0133333333333336e-05,
+      "loss": 1.3341,
+      "step": 154
+    },
+    {
+      "epoch": 0.1288,
+      "grad_norm": 24.383142471313477,
+      "learning_rate": 2.0933333333333335e-05,
+      "loss": 1.3916,
+      "step": 161
+    },
+    {
+      "epoch": 0.1344,
+      "grad_norm": 10.203289031982422,
+      "learning_rate": 2.186666666666667e-05,
+      "loss": 1.1943,
+      "step": 168
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 38.70049285888672,
+      "learning_rate": 2.2800000000000002e-05,
+      "loss": 1.115,
+      "step": 175
+    },
+    {
+      "epoch": 0.1456,
+      "grad_norm": 44.0554313659668,
+      "learning_rate": 2.3733333333333335e-05,
+      "loss": 1.1703,
+      "step": 182
+    },
+    {
+      "epoch": 0.1512,
+      "grad_norm": 44.12297821044922,
+      "learning_rate": 2.466666666666667e-05,
+      "loss": 1.2689,
+      "step": 189
+    },
+    {
+      "epoch": 0.1568,
+      "grad_norm": 24.104822158813477,
+      "learning_rate": 2.5600000000000002e-05,
+      "loss": 1.5482,
+      "step": 196
+    },
+    {
+      "epoch": 0.1624,
+      "grad_norm": 28.003192901611328,
+      "learning_rate": 2.6533333333333332e-05,
+      "loss": 1.1575,
+      "step": 203
+    },
+    {
+      "epoch": 0.168,
+      "grad_norm": 17.407148361206055,
+      "learning_rate": 2.746666666666667e-05,
+      "loss": 1.1556,
+      "step": 210
+    },
+    {
+      "epoch": 0.1736,
+      "grad_norm": 30.059938430786133,
+      "learning_rate": 2.84e-05,
+      "loss": 1.3255,
+      "step": 217
+    },
+    {
+      "epoch": 0.1792,
+      "grad_norm": 17.500478744506836,
+      "learning_rate": 2.9333333333333336e-05,
+      "loss": 1.1785,
+      "step": 224
+    },
+    {
+      "epoch": 0.1848,
+      "grad_norm": 18.938140869140625,
+      "learning_rate": 3.0266666666666666e-05,
+      "loss": 1.2427,
+      "step": 231
+    },
+    {
+      "epoch": 0.1904,
+      "grad_norm": 46.56889343261719,
+      "learning_rate": 3.12e-05,
+      "loss": 1.3003,
+      "step": 238
+    },
+    {
+      "epoch": 0.196,
+      "grad_norm": 14.280040740966797,
+      "learning_rate": 3.213333333333334e-05,
+      "loss": 1.3049,
+      "step": 245
+    },
+    {
+      "epoch": 0.2016,
+      "grad_norm": 20.890186309814453,
+      "learning_rate": 3.3066666666666666e-05,
+      "loss": 1.1882,
+      "step": 252
+    },
+    {
+      "epoch": 0.2072,
+      "grad_norm": 15.023652076721191,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 1.3098,
+      "step": 259
+    },
+    {
+      "epoch": 0.2128,
+      "grad_norm": 15.437768936157227,
+      "learning_rate": 3.493333333333333e-05,
+      "loss": 1.1317,
+      "step": 266
+    },
+    {
+      "epoch": 0.2184,
+      "grad_norm": 17.70416831970215,
+      "learning_rate": 3.586666666666667e-05,
+      "loss": 1.2326,
+      "step": 273
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 25.55316734313965,
+      "learning_rate": 3.68e-05,
+      "loss": 1.0633,
+      "step": 280
+    },
+    {
+      "epoch": 0.2296,
+      "grad_norm": 44.82810592651367,
+      "learning_rate": 3.773333333333334e-05,
+      "loss": 1.0947,
+      "step": 287
+    },
+    {
+      "epoch": 0.2352,
+      "grad_norm": 26.375022888183594,
+      "learning_rate": 3.866666666666667e-05,
+      "loss": 1.3421,
+      "step": 294
+    },
+    {
+      "epoch": 0.2408,
+      "grad_norm": 27.109973907470703,
+      "learning_rate": 3.960000000000001e-05,
+      "loss": 1.1407,
+      "step": 301
+    },
+    {
+      "epoch": 0.2464,
+      "grad_norm": 33.65099334716797,
+      "learning_rate": 4.0533333333333334e-05,
+      "loss": 0.9152,
+      "step": 308
+    },
+    {
+      "epoch": 0.252,
+      "grad_norm": 20.36009979248047,
+      "learning_rate": 4.146666666666667e-05,
+      "loss": 1.1515,
+      "step": 315
+    },
+    {
+      "epoch": 0.2576,
+      "grad_norm": 25.868627548217773,
+      "learning_rate": 4.24e-05,
+      "loss": 1.2575,
+      "step": 322
+    },
+    {
+      "epoch": 0.2632,
+      "grad_norm": 17.713809967041016,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 1.0586,
+      "step": 329
+    },
+    {
+      "epoch": 0.2688,
+      "grad_norm": 12.709273338317871,
+      "learning_rate": 4.426666666666667e-05,
+      "loss": 1.2379,
+      "step": 336
+    },
+    {
+      "epoch": 0.2744,
+      "grad_norm": 62.326900482177734,
+      "learning_rate": 4.52e-05,
+      "loss": 1.0445,
+      "step": 343
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 31.3813533782959,
+      "learning_rate": 4.6133333333333334e-05,
+      "loss": 1.2517,
+      "step": 350
+    },
+    {
+      "epoch": 0.2856,
+      "grad_norm": 31.736560821533203,
+      "learning_rate": 4.706666666666667e-05,
+      "loss": 1.2875,
+      "step": 357
+    },
+    {
+      "epoch": 0.2912,
+      "grad_norm": 9.466754913330078,
+      "learning_rate": 4.8e-05,
+      "loss": 1.323,
+      "step": 364
+    },
+    {
+      "epoch": 0.2968,
+      "grad_norm": 32.847103118896484,
+      "learning_rate": 4.8933333333333335e-05,
+      "loss": 1.3974,
+      "step": 371
+    },
+    {
+      "epoch": 0.3024,
+      "grad_norm": 10.510279655456543,
+      "learning_rate": 4.986666666666667e-05,
+      "loss": 1.17,
+      "step": 378
+    },
+    {
+      "epoch": 0.308,
+      "grad_norm": 37.245418548583984,
+      "learning_rate": 4.991111111111111e-05,
+      "loss": 1.1435,
+      "step": 385
+    },
+    {
+      "epoch": 0.3136,
+      "grad_norm": 29.977188110351562,
+      "learning_rate": 4.982222222222222e-05,
+      "loss": 1.2446,
+      "step": 392
+    },
+    {
+      "epoch": 0.3192,
+      "grad_norm": 21.97435188293457,
+      "learning_rate": 4.971851851851852e-05,
+      "loss": 1.2165,
+      "step": 399
+    },
+    {
+      "epoch": 0.3248,
+      "grad_norm": 15.939322471618652,
+      "learning_rate": 4.961481481481482e-05,
+      "loss": 1.4191,
+      "step": 406
+    },
+    {
+      "epoch": 0.3304,
+      "grad_norm": 14.586689949035645,
+      "learning_rate": 4.951111111111112e-05,
+      "loss": 1.2868,
+      "step": 413
+    },
+    {
+      "epoch": 0.336,
+      "grad_norm": 9.520950317382812,
+      "learning_rate": 4.940740740740741e-05,
+      "loss": 1.3862,
+      "step": 420
+    },
+    {
+      "epoch": 0.3416,
+      "grad_norm": 19.00330352783203,
+      "learning_rate": 4.9303703703703705e-05,
+      "loss": 1.3354,
+      "step": 427
+    },
+    {
+      "epoch": 0.3472,
+      "grad_norm": 6.289086818695068,
+      "learning_rate": 4.92e-05,
+      "loss": 1.3717,
+      "step": 434
+    },
+    {
+      "epoch": 0.3528,
+      "grad_norm": 66.89319610595703,
+      "learning_rate": 4.90962962962963e-05,
+      "loss": 1.2314,
+      "step": 441
+    },
+    {
+      "epoch": 0.3584,
+      "grad_norm": 14.694267272949219,
+      "learning_rate": 4.89925925925926e-05,
+      "loss": 1.2981,
+      "step": 448
+    },
+    {
+      "epoch": 0.364,
+      "grad_norm": 12.73987102508545,
+      "learning_rate": 4.888888888888889e-05,
+      "loss": 1.3561,
+      "step": 455
+    },
+    {
+      "epoch": 0.3696,
+      "grad_norm": 13.01091480255127,
+      "learning_rate": 4.878518518518519e-05,
+      "loss": 1.27,
+      "step": 462
+    },
+    {
+      "epoch": 0.3752,
+      "grad_norm": 13.623579978942871,
+      "learning_rate": 4.8681481481481485e-05,
+      "loss": 1.2522,
+      "step": 469
+    },
+    {
+      "epoch": 0.3808,
+      "grad_norm": 41.466163635253906,
+      "learning_rate": 4.8577777777777776e-05,
+      "loss": 1.4129,
+      "step": 476
+    },
+    {
+      "epoch": 0.3864,
+      "grad_norm": 28.096891403198242,
+      "learning_rate": 4.8474074074074074e-05,
+      "loss": 1.0454,
+      "step": 483
+    },
+    {
+      "epoch": 0.392,
+      "grad_norm": 26.944536209106445,
+      "learning_rate": 4.837037037037037e-05,
+      "loss": 1.2051,
+      "step": 490
+    },
+    {
+      "epoch": 0.3976,
+      "grad_norm": 18.42100715637207,
+      "learning_rate": 4.826666666666667e-05,
+      "loss": 1.127,
+      "step": 497
+    },
+    {
+      "epoch": 0.4032,
+      "grad_norm": 32.047943115234375,
+      "learning_rate": 4.816296296296297e-05,
+      "loss": 1.2287,
+      "step": 504
+    },
+    {
+      "epoch": 0.4088,
+      "grad_norm": 36.99417495727539,
+      "learning_rate": 4.805925925925926e-05,
+      "loss": 1.2363,
+      "step": 511
+    },
+    {
+      "epoch": 0.4144,
+      "grad_norm": 19.844234466552734,
+      "learning_rate": 4.7955555555555556e-05,
+      "loss": 1.194,
+      "step": 518
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 11.864848136901855,
+      "learning_rate": 4.7851851851851854e-05,
+      "loss": 1.2978,
+      "step": 525
+    },
+    {
+      "epoch": 0.4256,
+      "grad_norm": 13.68875789642334,
+      "learning_rate": 4.774814814814815e-05,
+      "loss": 1.0287,
+      "step": 532
+    },
+    {
+      "epoch": 0.4312,
+      "grad_norm": 21.006519317626953,
+      "learning_rate": 4.764444444444445e-05,
+      "loss": 1.1714,
+      "step": 539
+    },
+    {
+      "epoch": 0.4368,
+      "grad_norm": 14.085043907165527,
+      "learning_rate": 4.754074074074074e-05,
+      "loss": 1.3415,
+      "step": 546
+    },
+    {
+      "epoch": 0.4424,
+      "grad_norm": 6.128912925720215,
+      "learning_rate": 4.743703703703704e-05,
+      "loss": 1.2443,
+      "step": 553
+    },
+    {
+      "epoch": 0.448,
+      "grad_norm": 13.3624267578125,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 1.0881,
+      "step": 560
+    },
+    {
+      "epoch": 0.4536,
+      "grad_norm": 20.717557907104492,
+      "learning_rate": 4.722962962962963e-05,
+      "loss": 1.4919,
+      "step": 567
+    },
+    {
+      "epoch": 0.4592,
+      "grad_norm": 9.526569366455078,
+      "learning_rate": 4.712592592592593e-05,
+      "loss": 1.1529,
+      "step": 574
+    },
+    {
+      "epoch": 0.4648,
+      "grad_norm": 21.219013214111328,
+      "learning_rate": 4.702222222222222e-05,
+      "loss": 1.1257,
+      "step": 581
+    },
+    {
+      "epoch": 0.4704,
+      "grad_norm": 16.09003448486328,
+      "learning_rate": 4.691851851851852e-05,
+      "loss": 0.9876,
+      "step": 588
+    },
+    {
+      "epoch": 0.476,
+      "grad_norm": 12.591224670410156,
+      "learning_rate": 4.681481481481482e-05,
+      "loss": 1.2323,
+      "step": 595
+    },
+    {
+      "epoch": 0.4816,
+      "grad_norm": 23.968263626098633,
+      "learning_rate": 4.671111111111111e-05,
+      "loss": 1.2817,
+      "step": 602
+    },
+    {
+      "epoch": 0.4872,
+      "grad_norm": 47.702857971191406,
+      "learning_rate": 4.660740740740741e-05,
+      "loss": 1.2608,
+      "step": 609
+    },
+    {
+      "epoch": 0.4928,
+      "grad_norm": 20.887250900268555,
+      "learning_rate": 4.6503703703703705e-05,
+      "loss": 1.3105,
+      "step": 616
+    },
+    {
+      "epoch": 0.4984,
+      "grad_norm": 12.29234790802002,
+      "learning_rate": 4.64e-05,
+      "loss": 1.1678,
+      "step": 623
+    },
+    {
+      "epoch": 0.504,
+      "grad_norm": 6.830883026123047,
+      "learning_rate": 4.62962962962963e-05,
+      "loss": 1.1546,
+      "step": 630
+    },
+    {
+      "epoch": 0.5096,
+      "grad_norm": 11.166626930236816,
+      "learning_rate": 4.619259259259259e-05,
+      "loss": 1.3306,
+      "step": 637
+    },
+    {
+      "epoch": 0.5152,
+      "grad_norm": 14.119309425354004,
+      "learning_rate": 4.608888888888889e-05,
+      "loss": 1.252,
+      "step": 644
+    },
+    {
+      "epoch": 0.5208,
+      "grad_norm": 18.480911254882812,
+      "learning_rate": 4.598518518518519e-05,
+      "loss": 1.2212,
+      "step": 651
+    },
+    {
+      "epoch": 0.5264,
+      "grad_norm": 14.240557670593262,
+      "learning_rate": 4.5881481481481485e-05,
+      "loss": 1.1538,
+      "step": 658
+    },
+    {
+      "epoch": 0.532,
+      "grad_norm": 26.27284812927246,
+      "learning_rate": 4.577777777777778e-05,
+      "loss": 1.3187,
+      "step": 665
+    },
+    {
+      "epoch": 0.5376,
+      "grad_norm": 15.894841194152832,
+      "learning_rate": 4.5674074074074074e-05,
+      "loss": 1.3132,
+      "step": 672
+    },
+    {
+      "epoch": 0.5432,
+      "grad_norm": 9.206047058105469,
+      "learning_rate": 4.557037037037037e-05,
+      "loss": 1.1607,
+      "step": 679
+    },
+    {
+      "epoch": 0.5488,
+      "grad_norm": 27.2408390045166,
+      "learning_rate": 4.546666666666667e-05,
+      "loss": 1.3794,
+      "step": 686
+    },
+    {
+      "epoch": 0.5544,
+      "grad_norm": 32.67033767700195,
+      "learning_rate": 4.536296296296296e-05,
+      "loss": 1.3189,
+      "step": 693
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 14.853004455566406,
+      "learning_rate": 4.5259259259259265e-05,
+      "loss": 1.2994,
+      "step": 700
+    },
+    {
+      "epoch": 0.5656,
+      "grad_norm": 17.385454177856445,
+      "learning_rate": 4.5155555555555556e-05,
+      "loss": 1.3544,
+      "step": 707
+    },
+    {
+      "epoch": 0.5712,
+      "grad_norm": 8.822907447814941,
+      "learning_rate": 4.5051851851851854e-05,
+      "loss": 1.0642,
+      "step": 714
+    },
+    {
+      "epoch": 0.5768,
+      "grad_norm": 10.728267669677734,
+      "learning_rate": 4.494814814814815e-05,
+      "loss": 1.2032,
+      "step": 721
+    },
+    {
+      "epoch": 0.5824,
+      "grad_norm": 18.303375244140625,
+      "learning_rate": 4.484444444444444e-05,
+      "loss": 1.0763,
+      "step": 728
+    },
+    {
+      "epoch": 0.588,
+      "grad_norm": 14.441327095031738,
+      "learning_rate": 4.474074074074075e-05,
+      "loss": 0.9293,
+      "step": 735
+    },
+    {
+      "epoch": 0.5936,
+      "grad_norm": 12.140674591064453,
+      "learning_rate": 4.463703703703704e-05,
+      "loss": 1.0541,
+      "step": 742
+    },
+    {
+      "epoch": 0.5992,
+      "grad_norm": 27.210546493530273,
+      "learning_rate": 4.4533333333333336e-05,
+      "loss": 1.408,
+      "step": 749
+    },
+    {
+      "epoch": 0.6048,
+      "grad_norm": 11.405797958374023,
+      "learning_rate": 4.4429629629629634e-05,
+      "loss": 1.1593,
+      "step": 756
+    },
+    {
+      "epoch": 0.6104,
+      "grad_norm": 16.38625144958496,
+      "learning_rate": 4.4325925925925925e-05,
+      "loss": 1.1842,
+      "step": 763
+    },
+    {
+      "epoch": 0.616,
+      "grad_norm": 20.51258659362793,
+      "learning_rate": 4.422222222222222e-05,
+      "loss": 1.2711,
+      "step": 770
+    },
+    {
+      "epoch": 0.6216,
+      "grad_norm": 27.440540313720703,
+      "learning_rate": 4.411851851851852e-05,
+      "loss": 1.1911,
+      "step": 777
+    },
+    {
+      "epoch": 0.6272,
+      "grad_norm": 24.75773048400879,
+      "learning_rate": 4.401481481481481e-05,
+      "loss": 1.046,
+      "step": 784
+    },
+    {
+      "epoch": 0.6328,
+      "grad_norm": 18.571340560913086,
+      "learning_rate": 4.3911111111111116e-05,
+      "loss": 1.2397,
+      "step": 791
+    },
+    {
+      "epoch": 0.6384,
+      "grad_norm": 27.95873260498047,
+      "learning_rate": 4.380740740740741e-05,
+      "loss": 1.3633,
+      "step": 798
+    },
+    {
+      "epoch": 0.644,
+      "grad_norm": 9.077492713928223,
+      "learning_rate": 4.3703703703703705e-05,
+      "loss": 1.1718,
+      "step": 805
+    },
+    {
+      "epoch": 0.6496,
+      "grad_norm": 12.240832328796387,
+      "learning_rate": 4.36e-05,
+      "loss": 1.1483,
+      "step": 812
+    },
+    {
+      "epoch": 0.6552,
+      "grad_norm": 28.757829666137695,
+      "learning_rate": 4.3496296296296294e-05,
+      "loss": 1.1503,
+      "step": 819
+    },
+    {
+      "epoch": 0.6608,
+      "grad_norm": 17.067678451538086,
+      "learning_rate": 4.33925925925926e-05,
+      "loss": 1.3015,
+      "step": 826
+    },
+    {
+      "epoch": 0.6664,
+      "grad_norm": 15.772140502929688,
+      "learning_rate": 4.328888888888889e-05,
+      "loss": 1.0159,
+      "step": 833
+    },
+    {
+      "epoch": 0.672,
+      "grad_norm": 9.903558731079102,
+      "learning_rate": 4.318518518518519e-05,
+      "loss": 1.2331,
+      "step": 840
+    },
+    {
+      "epoch": 0.6776,
+      "grad_norm": 10.482284545898438,
+      "learning_rate": 4.3081481481481485e-05,
+      "loss": 0.8856,
+      "step": 847
+    },
+    {
+      "epoch": 0.6832,
+      "grad_norm": 16.578685760498047,
+      "learning_rate": 4.2977777777777776e-05,
+      "loss": 1.1766,
+      "step": 854
+    },
+    {
+      "epoch": 0.6888,
+      "grad_norm": 22.36375617980957,
+      "learning_rate": 4.287407407407408e-05,
+      "loss": 1.1828,
+      "step": 861
+    },
+    {
+      "epoch": 0.6944,
+      "grad_norm": 51.31915283203125,
+      "learning_rate": 4.277037037037037e-05,
+      "loss": 1.2534,
+      "step": 868
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 43.600711822509766,
+      "learning_rate": 4.266666666666667e-05,
+      "loss": 1.1778,
+      "step": 875
+    },
+    {
+      "epoch": 0.7056,
+      "grad_norm": 18.086084365844727,
+      "learning_rate": 4.256296296296297e-05,
+      "loss": 1.2067,
+      "step": 882
+    },
+    {
+      "epoch": 0.7112,
+      "grad_norm": 11.24244213104248,
+      "learning_rate": 4.245925925925926e-05,
+      "loss": 1.145,
+      "step": 889
+    },
+    {
+      "epoch": 0.7168,
+      "grad_norm": 12.832592964172363,
+      "learning_rate": 4.235555555555556e-05,
+      "loss": 1.0475,
+      "step": 896
+    },
+    {
+      "epoch": 0.7224,
+      "grad_norm": 33.09056854248047,
+      "learning_rate": 4.2251851851851854e-05,
+      "loss": 1.1055,
+      "step": 903
+    },
+    {
+      "epoch": 0.728,
+      "grad_norm": 16.331045150756836,
+      "learning_rate": 4.2148148148148145e-05,
+      "loss": 1.3999,
+      "step": 910
+    },
+    {
+      "epoch": 0.7336,
+      "grad_norm": 13.34793758392334,
+      "learning_rate": 4.204444444444445e-05,
+      "loss": 1.0609,
+      "step": 917
+    },
+    {
+      "epoch": 0.7392,
+      "grad_norm": 15.56451416015625,
+      "learning_rate": 4.194074074074074e-05,
+      "loss": 1.0921,
+      "step": 924
+    },
+    {
+      "epoch": 0.7448,
+      "grad_norm": 36.35686111450195,
+      "learning_rate": 4.183703703703704e-05,
+      "loss": 1.0468,
+      "step": 931
+    },
+    {
+      "epoch": 0.7504,
+      "grad_norm": 19.2562313079834,
+      "learning_rate": 4.1733333333333336e-05,
+      "loss": 1.3423,
+      "step": 938
+    },
+    {
+      "epoch": 0.756,
+      "grad_norm": 51.04462814331055,
+      "learning_rate": 4.162962962962963e-05,
+      "loss": 1.1937,
+      "step": 945
+    },
+    {
+      "epoch": 0.7616,
+      "grad_norm": 13.460155487060547,
+      "learning_rate": 4.152592592592593e-05,
+      "loss": 1.3615,
+      "step": 952
+    },
+    {
+      "epoch": 0.7672,
+      "grad_norm": 25.266849517822266,
+      "learning_rate": 4.142222222222222e-05,
+      "loss": 1.1688,
+      "step": 959
+    },
+    {
+      "epoch": 0.7728,
+      "grad_norm": 13.368788719177246,
+      "learning_rate": 4.131851851851852e-05,
+      "loss": 1.0765,
+      "step": 966
+    },
+    {
+      "epoch": 0.7784,
+      "grad_norm": 12.62553596496582,
+      "learning_rate": 4.121481481481482e-05,
+      "loss": 1.0952,
+      "step": 973
+    },
+    {
+      "epoch": 0.784,
+      "grad_norm": 17.647890090942383,
+      "learning_rate": 4.111111111111111e-05,
+      "loss": 1.08,
+      "step": 980
+    },
+    {
+      "epoch": 0.7896,
+      "grad_norm": 24.468290328979492,
+      "learning_rate": 4.1007407407407414e-05,
+      "loss": 1.3311,
+      "step": 987
+    },
+    {
+      "epoch": 0.7952,
+      "grad_norm": 13.195050239562988,
+      "learning_rate": 4.0903703703703705e-05,
+      "loss": 1.2307,
+      "step": 994
+    },
+    {
+      "epoch": 0.8008,
+      "grad_norm": 9.442852973937988,
+      "learning_rate": 4.08e-05,
+      "loss": 1.1655,
+      "step": 1001
+    },
+    {
+      "epoch": 0.8064,
+      "grad_norm": 11.254621505737305,
+      "learning_rate": 4.06962962962963e-05,
+      "loss": 1.2512,
+      "step": 1008
+    },
+    {
+      "epoch": 0.812,
+      "grad_norm": 11.979645729064941,
+      "learning_rate": 4.059259259259259e-05,
+      "loss": 1.2049,
+      "step": 1015
+    },
+    {
+      "epoch": 0.8176,
+      "grad_norm": 13.316203117370605,
+      "learning_rate": 4.0488888888888896e-05,
+      "loss": 1.1102,
+      "step": 1022
+    },
+    {
+      "epoch": 0.8232,
+      "grad_norm": 19.840560913085938,
+      "learning_rate": 4.038518518518519e-05,
+      "loss": 1.2102,
+      "step": 1029
+    },
+    {
+      "epoch": 0.8288,
+      "grad_norm": 17.739408493041992,
+      "learning_rate": 4.028148148148148e-05,
+      "loss": 1.0162,
+      "step": 1036
+    },
+    {
+      "epoch": 0.8344,
+      "grad_norm": 15.485058784484863,
+      "learning_rate": 4.017777777777778e-05,
+      "loss": 1.1379,
+      "step": 1043
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 29.864459991455078,
+      "learning_rate": 4.007407407407407e-05,
+      "loss": 1.322,
+      "step": 1050
+    },
+    {
+      "epoch": 0.8456,
+      "grad_norm": 27.43077278137207,
+      "learning_rate": 3.997037037037038e-05,
+      "loss": 1.1343,
+      "step": 1057
+    },
+    {
+      "epoch": 0.8512,
+      "grad_norm": 8.520703315734863,
+      "learning_rate": 3.986666666666667e-05,
+      "loss": 1.1657,
+      "step": 1064
+    },
+    {
+      "epoch": 0.8568,
+      "grad_norm": 13.474251747131348,
+      "learning_rate": 3.976296296296296e-05,
+      "loss": 0.9206,
+      "step": 1071
+    },
+    {
+      "epoch": 0.8624,
+      "grad_norm": 27.06108856201172,
+      "learning_rate": 3.9659259259259265e-05,
+      "loss": 0.9325,
+      "step": 1078
+    },
+    {
+      "epoch": 0.868,
+      "grad_norm": 19.521345138549805,
+      "learning_rate": 3.9555555555555556e-05,
+      "loss": 1.1772,
+      "step": 1085
+    },
+    {
+      "epoch": 0.8736,
+      "grad_norm": 21.125864028930664,
+      "learning_rate": 3.945185185185185e-05,
+      "loss": 1.2945,
+      "step": 1092
+    },
+    {
+      "epoch": 0.8792,
+      "grad_norm": 20.886058807373047,
+      "learning_rate": 3.934814814814815e-05,
+      "loss": 1.2349,
+      "step": 1099
+    },
+    {
+      "epoch": 0.8848,
+      "grad_norm": 12.874465942382812,
+      "learning_rate": 3.924444444444444e-05,
+      "loss": 1.2065,
+      "step": 1106
+    },
+    {
+      "epoch": 0.8904,
+      "grad_norm": 12.321556091308594,
+      "learning_rate": 3.914074074074075e-05,
+      "loss": 0.944,
+      "step": 1113
+    },
+    {
+      "epoch": 0.896,
+      "grad_norm": 30.77228355407715,
+      "learning_rate": 3.903703703703704e-05,
+      "loss": 1.055,
+      "step": 1120
+    },
+    {
+      "epoch": 0.9016,
+      "grad_norm": 18.482967376708984,
+      "learning_rate": 3.8933333333333336e-05,
+      "loss": 1.2785,
+      "step": 1127
+    },
+    {
+      "epoch": 0.9072,
+      "grad_norm": 11.217413902282715,
+      "learning_rate": 3.882962962962963e-05,
+      "loss": 1.0797,
+      "step": 1134
+    },
+    {
+      "epoch": 0.9128,
+      "grad_norm": 18.9666690826416,
+      "learning_rate": 3.8725925925925924e-05,
+      "loss": 1.0066,
+      "step": 1141
+    },
+    {
+      "epoch": 0.9184,
+      "grad_norm": 16.33734130859375,
+      "learning_rate": 3.862222222222223e-05,
+      "loss": 1.2794,
+      "step": 1148
+    },
+    {
+      "epoch": 0.924,
+      "grad_norm": 10.510845184326172,
+      "learning_rate": 3.851851851851852e-05,
+      "loss": 0.9998,
+      "step": 1155
+    },
+    {
+      "epoch": 0.9296,
+      "grad_norm": 16.016572952270508,
+      "learning_rate": 3.841481481481482e-05,
+      "loss": 0.967,
+      "step": 1162
+    },
+    {
+      "epoch": 0.9352,
+      "grad_norm": 15.4122953414917,
+      "learning_rate": 3.8311111111111115e-05,
+      "loss": 0.867,
+      "step": 1169
+    },
+    {
+      "epoch": 0.9408,
+      "grad_norm": 6.012162685394287,
+      "learning_rate": 3.8207407407407407e-05,
+      "loss": 1.125,
+      "step": 1176
+    },
+    {
+      "epoch": 0.9464,
+      "grad_norm": 22.730403900146484,
+      "learning_rate": 3.810370370370371e-05,
+      "loss": 1.3726,
+      "step": 1183
+    },
+    {
+      "epoch": 0.952,
+      "grad_norm": 9.818578720092773,
+      "learning_rate": 3.8e-05,
+      "loss": 1.1457,
+      "step": 1190
+    },
+    {
+      "epoch": 0.9576,
+      "grad_norm": 22.369892120361328,
+      "learning_rate": 3.789629629629629e-05,
+      "loss": 1.1371,
+      "step": 1197
+    },
+    {
+      "epoch": 0.9632,
+      "grad_norm": 17.22629737854004,
+      "learning_rate": 3.77925925925926e-05,
+      "loss": 1.039,
+      "step": 1204
+    },
+    {
+      "epoch": 0.9688,
+      "grad_norm": 10.770013809204102,
+      "learning_rate": 3.768888888888889e-05,
+      "loss": 0.8811,
+      "step": 1211
+    },
+    {
+      "epoch": 0.9744,
+      "grad_norm": 19.169052124023438,
+      "learning_rate": 3.7585185185185186e-05,
+      "loss": 1.282,
+      "step": 1218
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 9.597490310668945,
+      "learning_rate": 3.7481481481481484e-05,
+      "loss": 0.9561,
+      "step": 1225
+    },
+    {
+      "epoch": 0.9856,
+      "grad_norm": 11.950583457946777,
+      "learning_rate": 3.7377777777777775e-05,
+      "loss": 1.1541,
+      "step": 1232
+    },
+    {
+      "epoch": 0.9912,
+      "grad_norm": 13.237419128417969,
+      "learning_rate": 3.727407407407408e-05,
+      "loss": 1.0529,
+      "step": 1239
+    },
+    {
+      "epoch": 0.9968,
+      "grad_norm": 23.540124893188477,
+      "learning_rate": 3.717037037037037e-05,
+      "loss": 1.2993,
+      "step": 1246
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.5233333333333333,
+      "eval_f1_macro": 0.509019730829119,
+      "eval_f1_micro": 0.5233333333333333,
+      "eval_f1_weighted": 0.516464791683911,
+      "eval_loss": 1.0463045835494995,
+      "eval_precision_macro": 0.5322711463107999,
+      "eval_precision_micro": 0.5233333333333333,
+      "eval_precision_weighted": 0.5447555045714482,
+      "eval_recall_macro": 0.5207324546507,
+      "eval_recall_micro": 0.5233333333333333,
+      "eval_recall_weighted": 0.5233333333333333,
+      "eval_runtime": 8.5444,
+      "eval_samples_per_second": 35.111,
+      "eval_steps_per_second": 2.224,
+      "step": 1250
+    }
+  ],
+  "logging_steps": 7,
+  "max_steps": 3750,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 821082232320000.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1250/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e5bbc0c904e355fa1698548e96cd8e3199960f68b14173c93d658bffc65f533
+size 5432

config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "_name_or_path": "allenai/longformer-base-4096",
+  "_num_labels": 5,
+  "architectures": [
+    "LongformerForSequenceClassification"
+  ],
+  "attention_mode": "longformer",
+  "attention_probs_dropout_prob": 0.1,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "very negative",
+    "1": "negative",
+    "2": "neutral",
+    "3": "positive",
+    "4": "very positive"
+  },
+  "ignore_attention_mask": false,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "negative": 1,
+    "neutral": 2,
+    "positive": 3,
+    "very negative": 0,
+    "very positive": 4
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 4098,
+  "model_type": "longformer",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "onnx_export": false,
+  "pad_token_id": 1,
+  "problem_type": "single_label_classification",
+  "sep_token_id": 2,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ea5a2958a9736d967d763abc8f3d7f4a1bebfee2308ff3232a133fdae09f851
+size 594687412

runs/Mar19_20-59-07_r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h/events.out.tfevents.1742417949.r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h.71.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa6d2b087c036f25451e4d92a84255e66156ddc132ad701843f345de7a0a7355
-size 109073

 version https://git-lfs.github.com/spec/v1
+oid sha256:25b457dee90156719b88255eb9dc54ca2c2294d6880f6b10561201aa6f130c3c
+size 121232

runs/Mar19_20-59-07_r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h/events.out.tfevents.1742421321.r-spacesedan-longformer-sentiment-analysis-training-l-c848c-35h.71.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:524cd65ce443699f2dcc729855d349e4c6b38a65565cb9462721666a8357fdfd
+size 921

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "LongformerTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e5bbc0c904e355fa1698548e96cd8e3199960f68b14173c93d658bffc65f533
+size 5432

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "spacesedan/Amazon_Reviews_Sentiment_10K",
+    "model": "allenai/longformer-base-4096",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 128,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "test",
+    "text_column": "text",
+    "target_column": "label",
+    "logging_steps": -1,
+    "project_name": "sentiment-analysis-longformer",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "spacesedan",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff