TetorisAce commited on Sep 19, 2025

Commit

813f6da

verified ·

1 Parent(s): 5f6890c

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-m5alrfwj/checkpoint-1000/config.json +36 -0
run-m5alrfwj/checkpoint-1000/model.safetensors +3 -0
run-m5alrfwj/checkpoint-1000/optimizer.pt +3 -0
run-m5alrfwj/checkpoint-1000/rng_state.pth +3 -0
run-m5alrfwj/checkpoint-1000/scheduler.pt +3 -0
run-m5alrfwj/checkpoint-1000/special_tokens_map.json +7 -0
run-m5alrfwj/checkpoint-1000/tokenizer.json +0 -0
run-m5alrfwj/checkpoint-1000/tokenizer_config.json +56 -0
run-m5alrfwj/checkpoint-1000/trainer_state.json +126 -0
run-m5alrfwj/checkpoint-1000/training_args.bin +3 -0
run-m5alrfwj/checkpoint-1000/vocab.txt +0 -0
run-m5alrfwj/checkpoint-1250/config.json +36 -0
run-m5alrfwj/checkpoint-1250/model.safetensors +3 -0
run-m5alrfwj/checkpoint-1250/optimizer.pt +3 -0
run-m5alrfwj/checkpoint-1250/rng_state.pth +3 -0
run-m5alrfwj/checkpoint-1250/scheduler.pt +3 -0
run-m5alrfwj/checkpoint-1250/special_tokens_map.json +7 -0
run-m5alrfwj/checkpoint-1250/tokenizer.json +0 -0
run-m5alrfwj/checkpoint-1250/tokenizer_config.json +56 -0
run-m5alrfwj/checkpoint-1250/trainer_state.json +135 -0
run-m5alrfwj/checkpoint-1250/training_args.bin +3 -0
run-m5alrfwj/checkpoint-1250/vocab.txt +0 -0
run-m5alrfwj/checkpoint-500/config.json +36 -0
run-m5alrfwj/checkpoint-500/model.safetensors +3 -0
run-m5alrfwj/checkpoint-500/optimizer.pt +3 -0
run-m5alrfwj/checkpoint-500/rng_state.pth +3 -0
run-m5alrfwj/checkpoint-500/scheduler.pt +3 -0
run-m5alrfwj/checkpoint-500/special_tokens_map.json +7 -0
run-m5alrfwj/checkpoint-500/tokenizer.json +0 -0
run-m5alrfwj/checkpoint-500/tokenizer_config.json +56 -0
run-m5alrfwj/checkpoint-500/trainer_state.json +101 -0
run-m5alrfwj/checkpoint-500/training_args.bin +3 -0
run-m5alrfwj/checkpoint-500/vocab.txt +0 -0
run-m5alrfwj/checkpoint-750/config.json +36 -0
run-m5alrfwj/checkpoint-750/model.safetensors +3 -0
run-m5alrfwj/checkpoint-750/optimizer.pt +3 -0
run-m5alrfwj/checkpoint-750/rng_state.pth +3 -0
run-m5alrfwj/checkpoint-750/scheduler.pt +3 -0
run-m5alrfwj/checkpoint-750/special_tokens_map.json +7 -0
run-m5alrfwj/checkpoint-750/tokenizer.json +0 -0
run-m5alrfwj/checkpoint-750/tokenizer_config.json +56 -0
run-m5alrfwj/checkpoint-750/trainer_state.json +110 -0
run-m5alrfwj/checkpoint-750/training_args.bin +3 -0
run-m5alrfwj/checkpoint-750/vocab.txt +0 -0
run-p24aopwx/checkpoint-125/config.json +36 -0
run-p24aopwx/checkpoint-125/model.safetensors +3 -0
run-p24aopwx/checkpoint-125/optimizer.pt +3 -0
run-p24aopwx/checkpoint-125/rng_state.pth +3 -0
run-p24aopwx/checkpoint-125/scheduler.pt +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:01e37ec40f27baf59831267cd31bca2337e778e9b1758b8f0f95468372c1c4e4
 size 267838720

 version https://git-lfs.github.com/spec/v1
+oid sha256:744af60a6ddc8b6e529237e37bb1396ab85e8d20e864172ea8939e8f949c1356
 size 267838720

run-m5alrfwj/checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-m5alrfwj/checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c95f26796a0482e9a2d9e9e3fdbd05b7666c887a0f05d5144aa67ff39ebec490
+size 267838720

run-m5alrfwj/checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b36e19acfb85dbc1d1ec4ff6d8dfd75c4ea08d0a27444f4018e5eb2a18086afb
+size 535741515

run-m5alrfwj/checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f15abc620b9b6f90f19156925266492577bf058cc7e6d3f51bf0a480c39782fc
+size 14645

run-m5alrfwj/checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08b121b2b56701397a9ea141c8953fd93f7eb780125bf0974d96e2df51581d85
+size 1465

run-m5alrfwj/checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-1000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-m5alrfwj/checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,126 @@

+{
+  "best_global_step": 500,
+  "best_metric": 0.6061989665031433,
+  "best_model_checkpoint": "topic_classifier_model/run-m5alrfwj/checkpoint-500",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.852,
+      "eval_loss": 0.6556681394577026,
+      "eval_runtime": 3.5365,
+      "eval_samples_per_second": 282.762,
+      "eval_steps_per_second": 17.814,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2926628589630127,
+      "learning_rate": 3.6834458632458664e-05,
+      "loss": 0.4736,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.872,
+      "eval_loss": 0.6061989665031433,
+      "eval_runtime": 2.8624,
+      "eval_samples_per_second": 349.36,
+      "eval_steps_per_second": 22.01,
+      "step": 500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.877,
+      "eval_loss": 0.6755268573760986,
+      "eval_runtime": 2.776,
+      "eval_samples_per_second": 360.235,
+      "eval_steps_per_second": 22.695,
+      "step": 750
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.07131870090961456,
+      "learning_rate": 1.231085102096821e-05,
+      "loss": 0.2149,
+      "step": 1000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.881,
+      "eval_loss": 0.6423977017402649,
+      "eval_runtime": 2.756,
+      "eval_samples_per_second": 362.842,
+      "eval_steps_per_second": 22.859,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 73536517621056.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 6.130901902872614e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 40
+  }
+}

run-m5alrfwj/checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:934cbbb308e8e2a43d3dfa9540bc12899ee79a766bc5434b122ad1473158b50e
+size 5777

run-m5alrfwj/checkpoint-1000/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-m5alrfwj/checkpoint-1250/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-m5alrfwj/checkpoint-1250/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f1b2aeb6e9a5f8e7b762de8008fe9972bc77072c9f9f4c2d55050700eaccee
+size 267838720

run-m5alrfwj/checkpoint-1250/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5b8be56b21c6beadd2505d5005abadf9dca833689a196390953a204b1865b1a
+size 535741515

run-m5alrfwj/checkpoint-1250/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48da12aac3dab05fc351cb7e7d8592ffbda0533648e28f9544fdcacc033f5933
+size 14645

run-m5alrfwj/checkpoint-1250/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:916551395698ccbd000ff6a0bb9554b7062f9fc857f3acaee26868d4e049c367
+size 1465

run-m5alrfwj/checkpoint-1250/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-1250/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-m5alrfwj/checkpoint-1250/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-1250/trainer_state.json ADDED Viewed

	@@ -0,0 +1,135 @@

+{
+  "best_global_step": 500,
+  "best_metric": 0.6061989665031433,
+  "best_model_checkpoint": "topic_classifier_model/run-m5alrfwj/checkpoint-500",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1250,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.852,
+      "eval_loss": 0.6556681394577026,
+      "eval_runtime": 3.5365,
+      "eval_samples_per_second": 282.762,
+      "eval_steps_per_second": 17.814,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2926628589630127,
+      "learning_rate": 3.6834458632458664e-05,
+      "loss": 0.4736,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.872,
+      "eval_loss": 0.6061989665031433,
+      "eval_runtime": 2.8624,
+      "eval_samples_per_second": 349.36,
+      "eval_steps_per_second": 22.01,
+      "step": 500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.877,
+      "eval_loss": 0.6755268573760986,
+      "eval_runtime": 2.776,
+      "eval_samples_per_second": 360.235,
+      "eval_steps_per_second": 22.695,
+      "step": 750
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.07131870090961456,
+      "learning_rate": 1.231085102096821e-05,
+      "loss": 0.2149,
+      "step": 1000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.881,
+      "eval_loss": 0.6423977017402649,
+      "eval_runtime": 2.756,
+      "eval_samples_per_second": 362.842,
+      "eval_steps_per_second": 22.859,
+      "step": 1000
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.879,
+      "eval_loss": 0.6394979953765869,
+      "eval_runtime": 7.4793,
+      "eval_samples_per_second": 133.701,
+      "eval_steps_per_second": 8.423,
+      "step": 1250
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 73536517621056.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 6.130901902872614e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 40
+  }
+}

run-m5alrfwj/checkpoint-1250/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:934cbbb308e8e2a43d3dfa9540bc12899ee79a766bc5434b122ad1473158b50e
+size 5777

run-m5alrfwj/checkpoint-1250/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-m5alrfwj/checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-m5alrfwj/checkpoint-500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f28e988ac645a6dab8bd4659d8985ddf5c13f05613270be2d6af9439c1b439e
+size 267838720

run-m5alrfwj/checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56b8874b57915d2de7c4ddc19d9699ae69fc24f474264b83541498ba43d322a4
+size 535741515

run-m5alrfwj/checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:103db0fef648fd202ff52ea1c22a0eec151fa1f2672aeacfbc9285e1a2415f6e
+size 14645

run-m5alrfwj/checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acf363e08a3863fb9f6e8411fd5fa8d4075b94de2189684197810d61f1b4c0bc
+size 1465

run-m5alrfwj/checkpoint-500/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-500/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-m5alrfwj/checkpoint-500/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+  "best_global_step": 500,
+  "best_metric": 0.6061989665031433,
+  "best_model_checkpoint": "topic_classifier_model/run-m5alrfwj/checkpoint-500",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.852,
+      "eval_loss": 0.6556681394577026,
+      "eval_runtime": 3.5365,
+      "eval_samples_per_second": 282.762,
+      "eval_steps_per_second": 17.814,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2926628589630127,
+      "learning_rate": 3.6834458632458664e-05,
+      "loss": 0.4736,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.872,
+      "eval_loss": 0.6061989665031433,
+      "eval_runtime": 2.8624,
+      "eval_samples_per_second": 349.36,
+      "eval_steps_per_second": 22.01,
+      "step": 500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 36660625210272.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 6.130901902872614e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 40
+  }
+}

run-m5alrfwj/checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:934cbbb308e8e2a43d3dfa9540bc12899ee79a766bc5434b122ad1473158b50e
+size 5777

run-m5alrfwj/checkpoint-500/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-m5alrfwj/checkpoint-750/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-m5alrfwj/checkpoint-750/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6cc822ab401951e3fb6f303ca4b41a0a42dc3396a2d1c60dd351a8e9eb8c69c
+size 267838720

run-m5alrfwj/checkpoint-750/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:318b3e1d6bb585f0de175ce560058a4498741f48a6e1f96ca2c44600a6d87a07
+size 535741515

run-m5alrfwj/checkpoint-750/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39ab02b420ea14e8398f4feeb9750f1ac77e942cc63743ba92de1d09214537f1
+size 14645

run-m5alrfwj/checkpoint-750/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8efa9aff56039be6656b8498ac6ec1beb41da2cc69f943fb3e30fbcbdb3bbe6
+size 1465

run-m5alrfwj/checkpoint-750/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-750/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-m5alrfwj/checkpoint-750/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-m5alrfwj/checkpoint-750/trainer_state.json ADDED Viewed

	@@ -0,0 +1,110 @@

+{
+  "best_global_step": 500,
+  "best_metric": 0.6061989665031433,
+  "best_model_checkpoint": "topic_classifier_model/run-m5alrfwj/checkpoint-500",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 750,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.852,
+      "eval_loss": 0.6556681394577026,
+      "eval_runtime": 3.5365,
+      "eval_samples_per_second": 282.762,
+      "eval_steps_per_second": 17.814,
+      "step": 250
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2926628589630127,
+      "learning_rate": 3.6834458632458664e-05,
+      "loss": 0.4736,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.872,
+      "eval_loss": 0.6061989665031433,
+      "eval_runtime": 2.8624,
+      "eval_samples_per_second": 349.36,
+      "eval_steps_per_second": 22.01,
+      "step": 500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.877,
+      "eval_loss": 0.6755268573760986,
+      "eval_runtime": 2.776,
+      "eval_samples_per_second": 360.235,
+      "eval_steps_per_second": 22.695,
+      "step": 750
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 36660625210272.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 6.130901902872614e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 40
+  }
+}

run-m5alrfwj/checkpoint-750/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:934cbbb308e8e2a43d3dfa9540bc12899ee79a766bc5434b122ad1473158b50e
+size 5777

run-m5alrfwj/checkpoint-750/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-p24aopwx/checkpoint-125/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-p24aopwx/checkpoint-125/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:744af60a6ddc8b6e529237e37bb1396ab85e8d20e864172ea8939e8f949c1356
+size 267838720

run-p24aopwx/checkpoint-125/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f670e413168bc73f3106e39f815042d3f4d389772597bd7e6e3266ba399336
+size 535741515

run-p24aopwx/checkpoint-125/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f956244cc01f93f2b094267fd3da2f00256cf8da5d8e24486beff947c2f4e255
+size 14645

run-p24aopwx/checkpoint-125/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:485461fcfdbfed1647e4ca0c7ce2d5a7eaf0fb2c12d7a631175d22160df6c527
+size 1465