Floressek commited on Nov 14, 2025

Commit

4c819e2

verified ·

1 Parent(s): 933e304

Upload folder using huggingface_hub

Browse files

Files changed (31) hide show

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/config.json +24 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/model.safetensors +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/optimizer.pt +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/rng_state.pth +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scaler.pt +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scheduler.pt +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/special_tokens_map.json +7 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer.json +0 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer_config.json +56 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/trainer_state.json +324 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/training_args.bin +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-4092/vocab.txt +0 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/config.json +24 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/model.safetensors +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/optimizer.pt +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/rng_state.pth +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scaler.pt +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scheduler.pt +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/special_tokens_map.json +7 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer.json +0 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer_config.json +56 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/trainer_state.json +621 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/training_args.bin +3 -0
Floressek/sentiment_classification_from_distillbert/checkpoint-8184/vocab.txt +0 -0
Floressek/sentiment_classification_from_distillbert/config.json +24 -0
Floressek/sentiment_classification_from_distillbert/model.safetensors +3 -0
Floressek/sentiment_classification_from_distillbert/special_tokens_map.json +7 -0
Floressek/sentiment_classification_from_distillbert/tokenizer.json +0 -0
Floressek/sentiment_classification_from_distillbert/tokenizer_config.json +56 -0
Floressek/sentiment_classification_from_distillbert/training_args.bin +3 -0
Floressek/sentiment_classification_from_distillbert/vocab.txt +0 -0

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.57.1",
+  "vocab_size": 30522
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48f53c9337700718dfe05b128baaf117bd0426c55a2997dcb5eba2a6ece8b78a
+size 267832560

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60dd26e70b6b195af77ec24609240dc615fa06373e1041dbb7d954caac63ab9
+size 535727755

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33b3fc4dc6e72c19b95ccc6bf2103e72921fd75896fc4bef7c6d974ba74630e9
+size 14645

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:484e81aa45a06ff82acc987feb7e25bd65f6d761d4b270f18fa3fdb66af71f57
+size 1383

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad6350ab52d6a97e61162b0ff6ee33357b33b4a128de1a32409ac383b24e0b81
+size 1465

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/trainer_state.json ADDED Viewed

	@@ -0,0 +1,324 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 4092,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.024437927663734114,
+      "grad_norm": 4.433139801025391,
+      "learning_rate": 1.9760508308895406e-05,
+      "loss": 0.2466,
+      "step": 100
+    },
+    {
+      "epoch": 0.04887585532746823,
+      "grad_norm": 2.9566471576690674,
+      "learning_rate": 1.9516129032258068e-05,
+      "loss": 0.0958,
+      "step": 200
+    },
+    {
+      "epoch": 0.07331378299120235,
+      "grad_norm": 3.056684732437134,
+      "learning_rate": 1.9271749755620726e-05,
+      "loss": 0.0854,
+      "step": 300
+    },
+    {
+      "epoch": 0.09775171065493646,
+      "grad_norm": 0.7034033536911011,
+      "learning_rate": 1.9027370478983384e-05,
+      "loss": 0.0854,
+      "step": 400
+    },
+    {
+      "epoch": 0.12218963831867058,
+      "grad_norm": 2.2177963256835938,
+      "learning_rate": 1.8782991202346042e-05,
+      "loss": 0.0851,
+      "step": 500
+    },
+    {
+      "epoch": 0.1466275659824047,
+      "grad_norm": 0.9197642803192139,
+      "learning_rate": 1.85386119257087e-05,
+      "loss": 0.0802,
+      "step": 600
+    },
+    {
+      "epoch": 0.1710654936461388,
+      "grad_norm": 2.132237672805786,
+      "learning_rate": 1.829423264907136e-05,
+      "loss": 0.068,
+      "step": 700
+    },
+    {
+      "epoch": 0.19550342130987292,
+      "grad_norm": 1.3075214624404907,
+      "learning_rate": 1.804985337243402e-05,
+      "loss": 0.0679,
+      "step": 800
+    },
+    {
+      "epoch": 0.21994134897360704,
+      "grad_norm": 2.7458581924438477,
+      "learning_rate": 1.7805474095796678e-05,
+      "loss": 0.0698,
+      "step": 900
+    },
+    {
+      "epoch": 0.24437927663734116,
+      "grad_norm": 1.7474775314331055,
+      "learning_rate": 1.7561094819159336e-05,
+      "loss": 0.0647,
+      "step": 1000
+    },
+    {
+      "epoch": 0.26881720430107525,
+      "grad_norm": 3.5915794372558594,
+      "learning_rate": 1.7316715542521995e-05,
+      "loss": 0.0598,
+      "step": 1100
+    },
+    {
+      "epoch": 0.2932551319648094,
+      "grad_norm": 0.5882957577705383,
+      "learning_rate": 1.7072336265884656e-05,
+      "loss": 0.0635,
+      "step": 1200
+    },
+    {
+      "epoch": 0.3176930596285435,
+      "grad_norm": 3.578653335571289,
+      "learning_rate": 1.6827956989247314e-05,
+      "loss": 0.0666,
+      "step": 1300
+    },
+    {
+      "epoch": 0.3421309872922776,
+      "grad_norm": 0.5485109090805054,
+      "learning_rate": 1.6583577712609973e-05,
+      "loss": 0.0583,
+      "step": 1400
+    },
+    {
+      "epoch": 0.36656891495601174,
+      "grad_norm": 0.6193661689758301,
+      "learning_rate": 1.633919843597263e-05,
+      "loss": 0.0637,
+      "step": 1500
+    },
+    {
+      "epoch": 0.39100684261974583,
+      "grad_norm": 0.11104666441679001,
+      "learning_rate": 1.609481915933529e-05,
+      "loss": 0.0505,
+      "step": 1600
+    },
+    {
+      "epoch": 0.41544477028348,
+      "grad_norm": 0.13834528625011444,
+      "learning_rate": 1.5850439882697947e-05,
+      "loss": 0.0535,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4398826979472141,
+      "grad_norm": 0.19737549126148224,
+      "learning_rate": 1.5606060606060605e-05,
+      "loss": 0.0503,
+      "step": 1800
+    },
+    {
+      "epoch": 0.46432062561094817,
+      "grad_norm": 5.043758392333984,
+      "learning_rate": 1.5361681329423267e-05,
+      "loss": 0.0512,
+      "step": 1900
+    },
+    {
+      "epoch": 0.4887585532746823,
+      "grad_norm": 0.07497064024209976,
+      "learning_rate": 1.5117302052785925e-05,
+      "loss": 0.0526,
+      "step": 2000
+    },
+    {
+      "epoch": 0.5131964809384164,
+      "grad_norm": 1.9213156700134277,
+      "learning_rate": 1.4872922776148583e-05,
+      "loss": 0.051,
+      "step": 2100
+    },
+    {
+      "epoch": 0.5376344086021505,
+      "grad_norm": 1.2963168621063232,
+      "learning_rate": 1.4628543499511243e-05,
+      "loss": 0.0429,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5620723362658846,
+      "grad_norm": 1.5551583766937256,
+      "learning_rate": 1.4384164222873903e-05,
+      "loss": 0.0602,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5865102639296188,
+      "grad_norm": 2.956341028213501,
+      "learning_rate": 1.4139784946236561e-05,
+      "loss": 0.0564,
+      "step": 2400
+    },
+    {
+      "epoch": 0.6109481915933529,
+      "grad_norm": 0.2880280911922455,
+      "learning_rate": 1.3895405669599221e-05,
+      "loss": 0.0425,
+      "step": 2500
+    },
+    {
+      "epoch": 0.635386119257087,
+      "grad_norm": 0.6562920808792114,
+      "learning_rate": 1.3651026392961877e-05,
+      "loss": 0.0456,
+      "step": 2600
+    },
+    {
+      "epoch": 0.6598240469208211,
+      "grad_norm": 0.09532313793897629,
+      "learning_rate": 1.3406647116324536e-05,
+      "loss": 0.051,
+      "step": 2700
+    },
+    {
+      "epoch": 0.6842619745845552,
+      "grad_norm": 2.781747817993164,
+      "learning_rate": 1.3162267839687195e-05,
+      "loss": 0.0512,
+      "step": 2800
+    },
+    {
+      "epoch": 0.7086999022482894,
+      "grad_norm": 2.1303281784057617,
+      "learning_rate": 1.2917888563049854e-05,
+      "loss": 0.0519,
+      "step": 2900
+    },
+    {
+      "epoch": 0.7331378299120235,
+      "grad_norm": 0.10773918032646179,
+      "learning_rate": 1.2673509286412513e-05,
+      "loss": 0.042,
+      "step": 3000
+    },
+    {
+      "epoch": 0.7575757575757576,
+      "grad_norm": 0.533173143863678,
+      "learning_rate": 1.2429130009775172e-05,
+      "loss": 0.0569,
+      "step": 3100
+    },
+    {
+      "epoch": 0.7820136852394917,
+      "grad_norm": 2.882176637649536,
+      "learning_rate": 1.2184750733137831e-05,
+      "loss": 0.0421,
+      "step": 3200
+    },
+    {
+      "epoch": 0.8064516129032258,
+      "grad_norm": 5.4323530197143555,
+      "learning_rate": 1.194037145650049e-05,
+      "loss": 0.0458,
+      "step": 3300
+    },
+    {
+      "epoch": 0.83088954056696,
+      "grad_norm": 0.08420676738023758,
+      "learning_rate": 1.169599217986315e-05,
+      "loss": 0.0432,
+      "step": 3400
+    },
+    {
+      "epoch": 0.855327468230694,
+      "grad_norm": 3.6947717666625977,
+      "learning_rate": 1.1451612903225808e-05,
+      "loss": 0.0387,
+      "step": 3500
+    },
+    {
+      "epoch": 0.8797653958944281,
+      "grad_norm": 4.411167621612549,
+      "learning_rate": 1.1209677419354839e-05,
+      "loss": 0.0515,
+      "step": 3600
+    },
+    {
+      "epoch": 0.9042033235581622,
+      "grad_norm": 3.9330861568450928,
+      "learning_rate": 1.0965298142717497e-05,
+      "loss": 0.0368,
+      "step": 3700
+    },
+    {
+      "epoch": 0.9286412512218963,
+      "grad_norm": 0.9437419772148132,
+      "learning_rate": 1.0720918866080157e-05,
+      "loss": 0.0479,
+      "step": 3800
+    },
+    {
+      "epoch": 0.9530791788856305,
+      "grad_norm": 1.0870046615600586,
+      "learning_rate": 1.0476539589442815e-05,
+      "loss": 0.0426,
+      "step": 3900
+    },
+    {
+      "epoch": 0.9775171065493646,
+      "grad_norm": 3.1951255798339844,
+      "learning_rate": 1.0232160312805475e-05,
+      "loss": 0.0432,
+      "step": 4000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9891518737672583,
+      "eval_f1": 0.9928387101834639,
+      "eval_loss": 0.04053397476673126,
+      "eval_runtime": 158.0778,
+      "eval_samples_per_second": 532.409,
+      "eval_steps_per_second": 11.096,
+      "step": 4092
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 8184,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.6013285411072e+16,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
+size 5841

Floressek/sentiment_classification_from_distillbert/checkpoint-4092/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.57.1",
+  "vocab_size": 30522
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ef48f63397444fab7fe77a7c3e8113768986ff1c8e5e9126a66727aa030ec64
+size 267832560

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:223c1ad8c221cdb8480bd0fbf758bce16ab32234cf8f118612402b10534a701a
+size 535727755

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1344599c19119661a23b30409dd058766623df54c92c90cfbddc059d4d9b8506
+size 14645

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c83094e856654a01b4be2edab3b20eb796b993d8a6bc80224e3383365e815ba
+size 1383

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd6f8ed4f35d1035d3220bdfe3aa23843396e8ea037faf54ec8c9dc5fe556d41
+size 1465

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/trainer_state.json ADDED Viewed

	@@ -0,0 +1,621 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 8184,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.024437927663734114,
+      "grad_norm": 4.433139801025391,
+      "learning_rate": 1.9760508308895406e-05,
+      "loss": 0.2466,
+      "step": 100
+    },
+    {
+      "epoch": 0.04887585532746823,
+      "grad_norm": 2.9566471576690674,
+      "learning_rate": 1.9516129032258068e-05,
+      "loss": 0.0958,
+      "step": 200
+    },
+    {
+      "epoch": 0.07331378299120235,
+      "grad_norm": 3.056684732437134,
+      "learning_rate": 1.9271749755620726e-05,
+      "loss": 0.0854,
+      "step": 300
+    },
+    {
+      "epoch": 0.09775171065493646,
+      "grad_norm": 0.7034033536911011,
+      "learning_rate": 1.9027370478983384e-05,
+      "loss": 0.0854,
+      "step": 400
+    },
+    {
+      "epoch": 0.12218963831867058,
+      "grad_norm": 2.2177963256835938,
+      "learning_rate": 1.8782991202346042e-05,
+      "loss": 0.0851,
+      "step": 500
+    },
+    {
+      "epoch": 0.1466275659824047,
+      "grad_norm": 0.9197642803192139,
+      "learning_rate": 1.85386119257087e-05,
+      "loss": 0.0802,
+      "step": 600
+    },
+    {
+      "epoch": 0.1710654936461388,
+      "grad_norm": 2.132237672805786,
+      "learning_rate": 1.829423264907136e-05,
+      "loss": 0.068,
+      "step": 700
+    },
+    {
+      "epoch": 0.19550342130987292,
+      "grad_norm": 1.3075214624404907,
+      "learning_rate": 1.804985337243402e-05,
+      "loss": 0.0679,
+      "step": 800
+    },
+    {
+      "epoch": 0.21994134897360704,
+      "grad_norm": 2.7458581924438477,
+      "learning_rate": 1.7805474095796678e-05,
+      "loss": 0.0698,
+      "step": 900
+    },
+    {
+      "epoch": 0.24437927663734116,
+      "grad_norm": 1.7474775314331055,
+      "learning_rate": 1.7561094819159336e-05,
+      "loss": 0.0647,
+      "step": 1000
+    },
+    {
+      "epoch": 0.26881720430107525,
+      "grad_norm": 3.5915794372558594,
+      "learning_rate": 1.7316715542521995e-05,
+      "loss": 0.0598,
+      "step": 1100
+    },
+    {
+      "epoch": 0.2932551319648094,
+      "grad_norm": 0.5882957577705383,
+      "learning_rate": 1.7072336265884656e-05,
+      "loss": 0.0635,
+      "step": 1200
+    },
+    {
+      "epoch": 0.3176930596285435,
+      "grad_norm": 3.578653335571289,
+      "learning_rate": 1.6827956989247314e-05,
+      "loss": 0.0666,
+      "step": 1300
+    },
+    {
+      "epoch": 0.3421309872922776,
+      "grad_norm": 0.5485109090805054,
+      "learning_rate": 1.6583577712609973e-05,
+      "loss": 0.0583,
+      "step": 1400
+    },
+    {
+      "epoch": 0.36656891495601174,
+      "grad_norm": 0.6193661689758301,
+      "learning_rate": 1.633919843597263e-05,
+      "loss": 0.0637,
+      "step": 1500
+    },
+    {
+      "epoch": 0.39100684261974583,
+      "grad_norm": 0.11104666441679001,
+      "learning_rate": 1.609481915933529e-05,
+      "loss": 0.0505,
+      "step": 1600
+    },
+    {
+      "epoch": 0.41544477028348,
+      "grad_norm": 0.13834528625011444,
+      "learning_rate": 1.5850439882697947e-05,
+      "loss": 0.0535,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4398826979472141,
+      "grad_norm": 0.19737549126148224,
+      "learning_rate": 1.5606060606060605e-05,
+      "loss": 0.0503,
+      "step": 1800
+    },
+    {
+      "epoch": 0.46432062561094817,
+      "grad_norm": 5.043758392333984,
+      "learning_rate": 1.5361681329423267e-05,
+      "loss": 0.0512,
+      "step": 1900
+    },
+    {
+      "epoch": 0.4887585532746823,
+      "grad_norm": 0.07497064024209976,
+      "learning_rate": 1.5117302052785925e-05,
+      "loss": 0.0526,
+      "step": 2000
+    },
+    {
+      "epoch": 0.5131964809384164,
+      "grad_norm": 1.9213156700134277,
+      "learning_rate": 1.4872922776148583e-05,
+      "loss": 0.051,
+      "step": 2100
+    },
+    {
+      "epoch": 0.5376344086021505,
+      "grad_norm": 1.2963168621063232,
+      "learning_rate": 1.4628543499511243e-05,
+      "loss": 0.0429,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5620723362658846,
+      "grad_norm": 1.5551583766937256,
+      "learning_rate": 1.4384164222873903e-05,
+      "loss": 0.0602,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5865102639296188,
+      "grad_norm": 2.956341028213501,
+      "learning_rate": 1.4139784946236561e-05,
+      "loss": 0.0564,
+      "step": 2400
+    },
+    {
+      "epoch": 0.6109481915933529,
+      "grad_norm": 0.2880280911922455,
+      "learning_rate": 1.3895405669599221e-05,
+      "loss": 0.0425,
+      "step": 2500
+    },
+    {
+      "epoch": 0.635386119257087,
+      "grad_norm": 0.6562920808792114,
+      "learning_rate": 1.3651026392961877e-05,
+      "loss": 0.0456,
+      "step": 2600
+    },
+    {
+      "epoch": 0.6598240469208211,
+      "grad_norm": 0.09532313793897629,
+      "learning_rate": 1.3406647116324536e-05,
+      "loss": 0.051,
+      "step": 2700
+    },
+    {
+      "epoch": 0.6842619745845552,
+      "grad_norm": 2.781747817993164,
+      "learning_rate": 1.3162267839687195e-05,
+      "loss": 0.0512,
+      "step": 2800
+    },
+    {
+      "epoch": 0.7086999022482894,
+      "grad_norm": 2.1303281784057617,
+      "learning_rate": 1.2917888563049854e-05,
+      "loss": 0.0519,
+      "step": 2900
+    },
+    {
+      "epoch": 0.7331378299120235,
+      "grad_norm": 0.10773918032646179,
+      "learning_rate": 1.2673509286412513e-05,
+      "loss": 0.042,
+      "step": 3000
+    },
+    {
+      "epoch": 0.7575757575757576,
+      "grad_norm": 0.533173143863678,
+      "learning_rate": 1.2429130009775172e-05,
+      "loss": 0.0569,
+      "step": 3100
+    },
+    {
+      "epoch": 0.7820136852394917,
+      "grad_norm": 2.882176637649536,
+      "learning_rate": 1.2184750733137831e-05,
+      "loss": 0.0421,
+      "step": 3200
+    },
+    {
+      "epoch": 0.8064516129032258,
+      "grad_norm": 5.4323530197143555,
+      "learning_rate": 1.194037145650049e-05,
+      "loss": 0.0458,
+      "step": 3300
+    },
+    {
+      "epoch": 0.83088954056696,
+      "grad_norm": 0.08420676738023758,
+      "learning_rate": 1.169599217986315e-05,
+      "loss": 0.0432,
+      "step": 3400
+    },
+    {
+      "epoch": 0.855327468230694,
+      "grad_norm": 3.6947717666625977,
+      "learning_rate": 1.1451612903225808e-05,
+      "loss": 0.0387,
+      "step": 3500
+    },
+    {
+      "epoch": 0.8797653958944281,
+      "grad_norm": 4.411167621612549,
+      "learning_rate": 1.1209677419354839e-05,
+      "loss": 0.0515,
+      "step": 3600
+    },
+    {
+      "epoch": 0.9042033235581622,
+      "grad_norm": 3.9330861568450928,
+      "learning_rate": 1.0965298142717497e-05,
+      "loss": 0.0368,
+      "step": 3700
+    },
+    {
+      "epoch": 0.9286412512218963,
+      "grad_norm": 0.9437419772148132,
+      "learning_rate": 1.0720918866080157e-05,
+      "loss": 0.0479,
+      "step": 3800
+    },
+    {
+      "epoch": 0.9530791788856305,
+      "grad_norm": 1.0870046615600586,
+      "learning_rate": 1.0476539589442815e-05,
+      "loss": 0.0426,
+      "step": 3900
+    },
+    {
+      "epoch": 0.9775171065493646,
+      "grad_norm": 3.1951255798339844,
+      "learning_rate": 1.0232160312805475e-05,
+      "loss": 0.0432,
+      "step": 4000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9891518737672583,
+      "eval_f1": 0.9928387101834639,
+      "eval_loss": 0.04053397476673126,
+      "eval_runtime": 158.0778,
+      "eval_samples_per_second": 532.409,
+      "eval_steps_per_second": 11.096,
+      "step": 4092
+    },
+    {
+      "epoch": 1.0019550342130987,
+      "grad_norm": 0.5268033146858215,
+      "learning_rate": 9.987781036168133e-06,
+      "loss": 0.0386,
+      "step": 4100
+    },
+    {
+      "epoch": 1.0263929618768328,
+      "grad_norm": 0.3988477289676666,
+      "learning_rate": 9.743401759530793e-06,
+      "loss": 0.0224,
+      "step": 4200
+    },
+    {
+      "epoch": 1.050830889540567,
+      "grad_norm": 0.031836945563554764,
+      "learning_rate": 9.499022482893451e-06,
+      "loss": 0.0344,
+      "step": 4300
+    },
+    {
+      "epoch": 1.075268817204301,
+      "grad_norm": 1.4545081853866577,
+      "learning_rate": 9.25464320625611e-06,
+      "loss": 0.0285,
+      "step": 4400
+    },
+    {
+      "epoch": 1.099706744868035,
+      "grad_norm": 0.054912444204092026,
+      "learning_rate": 9.01026392961877e-06,
+      "loss": 0.0176,
+      "step": 4500
+    },
+    {
+      "epoch": 1.1241446725317692,
+      "grad_norm": 1.042144775390625,
+      "learning_rate": 8.765884652981427e-06,
+      "loss": 0.0235,
+      "step": 4600
+    },
+    {
+      "epoch": 1.1485826001955035,
+      "grad_norm": 0.12319644540548325,
+      "learning_rate": 8.521505376344087e-06,
+      "loss": 0.0274,
+      "step": 4700
+    },
+    {
+      "epoch": 1.1730205278592376,
+      "grad_norm": 0.08448143303394318,
+      "learning_rate": 8.277126099706745e-06,
+      "loss": 0.0261,
+      "step": 4800
+    },
+    {
+      "epoch": 1.1974584555229717,
+      "grad_norm": 0.06239945441484451,
+      "learning_rate": 8.032746823069404e-06,
+      "loss": 0.022,
+      "step": 4900
+    },
+    {
+      "epoch": 1.2218963831867058,
+      "grad_norm": 0.0412888340651989,
+      "learning_rate": 7.788367546432064e-06,
+      "loss": 0.0239,
+      "step": 5000
+    },
+    {
+      "epoch": 1.2463343108504399,
+      "grad_norm": 0.10470504313707352,
+      "learning_rate": 7.543988269794722e-06,
+      "loss": 0.0256,
+      "step": 5100
+    },
+    {
+      "epoch": 1.270772238514174,
+      "grad_norm": 0.3927100896835327,
+      "learning_rate": 7.299608993157381e-06,
+      "loss": 0.0299,
+      "step": 5200
+    },
+    {
+      "epoch": 1.295210166177908,
+      "grad_norm": 0.2635032534599304,
+      "learning_rate": 7.05522971652004e-06,
+      "loss": 0.0223,
+      "step": 5300
+    },
+    {
+      "epoch": 1.3196480938416422,
+      "grad_norm": 0.05280297249555588,
+      "learning_rate": 6.810850439882698e-06,
+      "loss": 0.0319,
+      "step": 5400
+    },
+    {
+      "epoch": 1.3440860215053765,
+      "grad_norm": 0.02359873428940773,
+      "learning_rate": 6.566471163245357e-06,
+      "loss": 0.02,
+      "step": 5500
+    },
+    {
+      "epoch": 1.3685239491691106,
+      "grad_norm": 0.42642369866371155,
+      "learning_rate": 6.324535679374389e-06,
+      "loss": 0.0241,
+      "step": 5600
+    },
+    {
+      "epoch": 1.3929618768328447,
+      "grad_norm": 0.7084789872169495,
+      "learning_rate": 6.080156402737049e-06,
+      "loss": 0.0195,
+      "step": 5700
+    },
+    {
+      "epoch": 1.4173998044965788,
+      "grad_norm": 0.14894872903823853,
+      "learning_rate": 5.835777126099708e-06,
+      "loss": 0.0233,
+      "step": 5800
+    },
+    {
+      "epoch": 1.4418377321603129,
+      "grad_norm": 6.313684463500977,
+      "learning_rate": 5.591397849462365e-06,
+      "loss": 0.0181,
+      "step": 5900
+    },
+    {
+      "epoch": 1.466275659824047,
+      "grad_norm": 0.9010400772094727,
+      "learning_rate": 5.347018572825024e-06,
+      "loss": 0.0243,
+      "step": 6000
+    },
+    {
+      "epoch": 1.490713587487781,
+      "grad_norm": 0.15028172731399536,
+      "learning_rate": 5.102639296187683e-06,
+      "loss": 0.0227,
+      "step": 6100
+    },
+    {
+      "epoch": 1.5151515151515151,
+      "grad_norm": 1.0572513341903687,
+      "learning_rate": 4.858260019550342e-06,
+      "loss": 0.019,
+      "step": 6200
+    },
+    {
+      "epoch": 1.5395894428152492,
+      "grad_norm": 0.056791041046381,
+      "learning_rate": 4.613880742913001e-06,
+      "loss": 0.0204,
+      "step": 6300
+    },
+    {
+      "epoch": 1.5640273704789833,
+      "grad_norm": 0.3077963590621948,
+      "learning_rate": 4.36950146627566e-06,
+      "loss": 0.0253,
+      "step": 6400
+    },
+    {
+      "epoch": 1.5884652981427174,
+      "grad_norm": 0.03899073228240013,
+      "learning_rate": 4.125122189638319e-06,
+      "loss": 0.0216,
+      "step": 6500
+    },
+    {
+      "epoch": 1.6129032258064515,
+      "grad_norm": 1.412708044052124,
+      "learning_rate": 3.8807429130009776e-06,
+      "loss": 0.0224,
+      "step": 6600
+    },
+    {
+      "epoch": 1.6373411534701856,
+      "grad_norm": 0.08187337219715118,
+      "learning_rate": 3.6363636363636366e-06,
+      "loss": 0.0178,
+      "step": 6700
+    },
+    {
+      "epoch": 1.6617790811339197,
+      "grad_norm": 0.04208606481552124,
+      "learning_rate": 3.391984359726295e-06,
+      "loss": 0.0247,
+      "step": 6800
+    },
+    {
+      "epoch": 1.6862170087976538,
+      "grad_norm": 0.0923227071762085,
+      "learning_rate": 3.147605083088954e-06,
+      "loss": 0.0207,
+      "step": 6900
+    },
+    {
+      "epoch": 1.710654936461388,
+      "grad_norm": 1.2065573930740356,
+      "learning_rate": 2.903225806451613e-06,
+      "loss": 0.0362,
+      "step": 7000
+    },
+    {
+      "epoch": 1.7350928641251222,
+      "grad_norm": 5.345006942749023,
+      "learning_rate": 2.658846529814272e-06,
+      "loss": 0.0274,
+      "step": 7100
+    },
+    {
+      "epoch": 1.7595307917888563,
+      "grad_norm": 0.1414783000946045,
+      "learning_rate": 2.414467253176931e-06,
+      "loss": 0.0204,
+      "step": 7200
+    },
+    {
+      "epoch": 1.7839687194525904,
+      "grad_norm": 1.9135044813156128,
+      "learning_rate": 2.17008797653959e-06,
+      "loss": 0.0279,
+      "step": 7300
+    },
+    {
+      "epoch": 1.8084066471163245,
+      "grad_norm": 1.5857988595962524,
+      "learning_rate": 1.9257086999022484e-06,
+      "loss": 0.0251,
+      "step": 7400
+    },
+    {
+      "epoch": 1.8328445747800588,
+      "grad_norm": 0.38852691650390625,
+      "learning_rate": 1.6813294232649072e-06,
+      "loss": 0.0199,
+      "step": 7500
+    },
+    {
+      "epoch": 1.857282502443793,
+      "grad_norm": 0.09283141791820526,
+      "learning_rate": 1.4369501466275662e-06,
+      "loss": 0.0178,
+      "step": 7600
+    },
+    {
+      "epoch": 1.881720430107527,
+      "grad_norm": 0.04146512970328331,
+      "learning_rate": 1.192570869990225e-06,
+      "loss": 0.017,
+      "step": 7700
+    },
+    {
+      "epoch": 1.906158357771261,
+      "grad_norm": 0.029652154073119164,
+      "learning_rate": 9.481915933528838e-07,
+      "loss": 0.0193,
+      "step": 7800
+    },
+    {
+      "epoch": 1.9305962854349952,
+      "grad_norm": 0.04845303297042847,
+      "learning_rate": 7.038123167155427e-07,
+      "loss": 0.0287,
+      "step": 7900
+    },
+    {
+      "epoch": 1.9550342130987293,
+      "grad_norm": 0.05037612095475197,
+      "learning_rate": 4.618768328445748e-07,
+      "loss": 0.0257,
+      "step": 8000
+    },
+    {
+      "epoch": 1.9794721407624634,
+      "grad_norm": 0.9069479703903198,
+      "learning_rate": 2.1749755620723366e-07,
+      "loss": 0.0299,
+      "step": 8100
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9917183527007438,
+      "eval_f1": 0.994545440316787,
+      "eval_loss": 0.03528362512588501,
+      "eval_runtime": 158.882,
+      "eval_samples_per_second": 529.714,
+      "eval_steps_per_second": 11.04,
+      "step": 8184
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 8184,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.2026570822144e+16,
+  "train_batch_size": 48,
+  "trial_name": null,
+  "trial_params": null
+}

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
+size 5841

Floressek/sentiment_classification_from_distillbert/checkpoint-8184/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

Floressek/sentiment_classification_from_distillbert/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.57.1",
+  "vocab_size": 30522
+}

Floressek/sentiment_classification_from_distillbert/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ef48f63397444fab7fe77a7c3e8113768986ff1c8e5e9126a66727aa030ec64
+size 267832560

Floressek/sentiment_classification_from_distillbert/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

Floressek/sentiment_classification_from_distillbert/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

Floressek/sentiment_classification_from_distillbert/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

Floressek/sentiment_classification_from_distillbert/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c534883427a071f1d9f3ba8e1a112e5e016307e4c8c2c116719a636f815013c
+size 5841

Floressek/sentiment_classification_from_distillbert/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff