TetorisAce commited on Sep 19, 2025

Commit

44ec689

verified ·

1 Parent(s): 6275295

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-8ogzfam8/checkpoint-125/config.json +36 -0
run-8ogzfam8/checkpoint-125/model.safetensors +3 -0
run-8ogzfam8/checkpoint-125/optimizer.pt +3 -0
run-8ogzfam8/checkpoint-125/rng_state.pth +3 -0
run-8ogzfam8/checkpoint-125/scheduler.pt +3 -0
run-8ogzfam8/checkpoint-125/special_tokens_map.json +7 -0
run-8ogzfam8/checkpoint-125/tokenizer.json +0 -0
run-8ogzfam8/checkpoint-125/tokenizer_config.json +56 -0
run-8ogzfam8/checkpoint-125/trainer_state.json +85 -0
run-8ogzfam8/checkpoint-125/training_args.bin +3 -0
run-8ogzfam8/checkpoint-125/vocab.txt +0 -0
run-b4ynzikp/checkpoint-128/config.json +36 -0
run-b4ynzikp/checkpoint-128/model.safetensors +3 -0
run-b4ynzikp/checkpoint-128/optimizer.pt +3 -0
run-b4ynzikp/checkpoint-128/rng_state.pth +3 -0
run-b4ynzikp/checkpoint-128/scheduler.pt +3 -0
run-b4ynzikp/checkpoint-128/special_tokens_map.json +7 -0
run-b4ynzikp/checkpoint-128/tokenizer.json +0 -0
run-b4ynzikp/checkpoint-128/tokenizer_config.json +56 -0
run-b4ynzikp/checkpoint-128/trainer_state.json +112 -0
run-b4ynzikp/checkpoint-128/training_args.bin +3 -0
run-b4ynzikp/checkpoint-128/vocab.txt +0 -0
run-b4ynzikp/checkpoint-160/config.json +36 -0
run-b4ynzikp/checkpoint-160/model.safetensors +3 -0
run-b4ynzikp/checkpoint-160/optimizer.pt +3 -0
run-b4ynzikp/checkpoint-160/rng_state.pth +3 -0
run-b4ynzikp/checkpoint-160/scheduler.pt +3 -0
run-b4ynzikp/checkpoint-160/special_tokens_map.json +7 -0
run-b4ynzikp/checkpoint-160/tokenizer.json +0 -0
run-b4ynzikp/checkpoint-160/tokenizer_config.json +56 -0
run-b4ynzikp/checkpoint-160/trainer_state.json +121 -0
run-b4ynzikp/checkpoint-160/training_args.bin +3 -0
run-b4ynzikp/checkpoint-160/vocab.txt +0 -0
run-b4ynzikp/checkpoint-64/config.json +36 -0
run-b4ynzikp/checkpoint-64/model.safetensors +3 -0
run-b4ynzikp/checkpoint-64/optimizer.pt +3 -0
run-b4ynzikp/checkpoint-64/rng_state.pth +3 -0
run-b4ynzikp/checkpoint-64/scheduler.pt +3 -0
run-b4ynzikp/checkpoint-64/special_tokens_map.json +7 -0
run-b4ynzikp/checkpoint-64/tokenizer.json +0 -0
run-b4ynzikp/checkpoint-64/tokenizer_config.json +56 -0
run-b4ynzikp/checkpoint-64/trainer_state.json +94 -0
run-b4ynzikp/checkpoint-64/training_args.bin +3 -0
run-b4ynzikp/checkpoint-64/vocab.txt +0 -0
run-b4ynzikp/checkpoint-96/config.json +36 -0
run-b4ynzikp/checkpoint-96/model.safetensors +3 -0
run-b4ynzikp/checkpoint-96/optimizer.pt +3 -0
run-b4ynzikp/checkpoint-96/rng_state.pth +3 -0
run-b4ynzikp/checkpoint-96/scheduler.pt +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37bbe8d27f5d1f173234e5bd80404b9fe1a13f43af7d4b32966b99328c7963f4
 size 267838720

 version https://git-lfs.github.com/spec/v1
+oid sha256:536eba2c997f23d7f1f444aca3b050dc6e8bac16da9485f788d7042b6383507d
 size 267838720

run-8ogzfam8/checkpoint-125/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-8ogzfam8/checkpoint-125/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:536eba2c997f23d7f1f444aca3b050dc6e8bac16da9485f788d7042b6383507d
+size 267838720

run-8ogzfam8/checkpoint-125/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9311a48d3095a1a99e6eeffa2f26c3c0da4ba40b343ebf8ef52e550768ebe378
+size 535741515

run-8ogzfam8/checkpoint-125/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bc39dd5b496e913d1823bb9a0ef294a9314eae36add2c428d8faa8c6b530e22
+size 14645

run-8ogzfam8/checkpoint-125/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d1696940a1a16fde0224ca55095d520907d7bad426c39c0875de7a8f3260f08
+size 1465

run-8ogzfam8/checkpoint-125/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-8ogzfam8/checkpoint-125/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-8ogzfam8/checkpoint-125/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-8ogzfam8/checkpoint-125/trainer_state.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "best_global_step": 125,
+  "best_metric": 0.42684227228164673,
+  "best_model_checkpoint": "topic_classifier_model/run-8ogzfam8/checkpoint-125",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 125,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.871,
+      "eval_loss": 0.42684227228164673,
+      "eval_runtime": 2.6757,
+      "eval_samples_per_second": 373.729,
+      "eval_steps_per_second": 23.545,
+      "step": 125
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 250,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 3.346839943657667e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 2,
+    "per_device_train_batch_size": 8,
+    "seed": 11
+  }
+}

run-8ogzfam8/checkpoint-125/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5eaecd1888fa501407db3d399b7bb3a72fc50b8bd9383dd5e59f00c74c28896d
+size 5777

run-8ogzfam8/checkpoint-125/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-b4ynzikp/checkpoint-128/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-b4ynzikp/checkpoint-128/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6cf4334741dac6b052bb36cbd2174fb68d6978f8fe773cf52b4715dc1a55036e
+size 267838720

run-b4ynzikp/checkpoint-128/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b7b133a72d72b80b672bd42e722fff5349d9e464eb8ce5ebfbd99da5961d4ce
+size 535741515

run-b4ynzikp/checkpoint-128/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37f1d64b44cfc70047ea95bee0dce4f8c64c7ca98f8d35ae5fcc3bbd8cc4a153
+size 14645

run-b4ynzikp/checkpoint-128/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deab46b0cd015dfe26bba19ad8de60669f7c0f69d8d4c0db4245c6a4e953344c
+size 1465

run-b4ynzikp/checkpoint-128/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-b4ynzikp/checkpoint-128/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-b4ynzikp/checkpoint-128/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-b4ynzikp/checkpoint-128/trainer_state.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "best_global_step": 64,
+  "best_metric": 0.3711593747138977,
+  "best_model_checkpoint": "topic_classifier_model/run-b4ynzikp/checkpoint-64",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 128,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.883,
+      "eval_loss": 0.3806321918964386,
+      "eval_runtime": 2.6362,
+      "eval_samples_per_second": 379.329,
+      "eval_steps_per_second": 23.898,
+      "step": 32
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.887,
+      "eval_loss": 0.3711593747138977,
+      "eval_runtime": 2.949,
+      "eval_samples_per_second": 339.1,
+      "eval_steps_per_second": 21.363,
+      "step": 64
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.875,
+      "eval_loss": 0.413004606962204,
+      "eval_runtime": 2.8246,
+      "eval_samples_per_second": 354.031,
+      "eval_steps_per_second": 22.304,
+      "step": 96
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.892,
+      "eval_loss": 0.3948880732059479,
+      "eval_runtime": 2.8594,
+      "eval_samples_per_second": 349.719,
+      "eval_steps_per_second": 22.032,
+      "step": 128
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 160,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 9.595199664340211e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 34
+  }
+}

run-b4ynzikp/checkpoint-128/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a2f05515e518ab0e60cb876ad54b4834adb2a49e3eb133f72a85de5d2c9955a
+size 5777

run-b4ynzikp/checkpoint-128/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-b4ynzikp/checkpoint-160/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-b4ynzikp/checkpoint-160/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb7d397cacb1dde9ea7c411e0795e891a957e7396cbaee2087aa3cbff8cd6b7
+size 267838720

run-b4ynzikp/checkpoint-160/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3ea0945f0beff56a4139e74dbcb650370e3224802e6d22d88fc7110df92fb3d
+size 535741515

run-b4ynzikp/checkpoint-160/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d2b9e34053390286cb908ab12c7309be0c9b9c0460a0981cce82fcc76aa141f
+size 14645

run-b4ynzikp/checkpoint-160/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a3ef02af74457a0f98e3c5d44026b4bf78d52855093f9286c83bfe9fc9b2e83
+size 1465

run-b4ynzikp/checkpoint-160/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-b4ynzikp/checkpoint-160/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-b4ynzikp/checkpoint-160/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-b4ynzikp/checkpoint-160/trainer_state.json ADDED Viewed

	@@ -0,0 +1,121 @@

+{
+  "best_global_step": 64,
+  "best_metric": 0.3711593747138977,
+  "best_model_checkpoint": "topic_classifier_model/run-b4ynzikp/checkpoint-64",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 160,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.883,
+      "eval_loss": 0.3806321918964386,
+      "eval_runtime": 2.6362,
+      "eval_samples_per_second": 379.329,
+      "eval_steps_per_second": 23.898,
+      "step": 32
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.887,
+      "eval_loss": 0.3711593747138977,
+      "eval_runtime": 2.949,
+      "eval_samples_per_second": 339.1,
+      "eval_steps_per_second": 21.363,
+      "step": 64
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.875,
+      "eval_loss": 0.413004606962204,
+      "eval_runtime": 2.8246,
+      "eval_samples_per_second": 354.031,
+      "eval_steps_per_second": 22.304,
+      "step": 96
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.892,
+      "eval_loss": 0.3948880732059479,
+      "eval_runtime": 2.8594,
+      "eval_samples_per_second": 349.719,
+      "eval_steps_per_second": 22.032,
+      "step": 128
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.886,
+      "eval_loss": 0.40653714537620544,
+      "eval_runtime": 2.7257,
+      "eval_samples_per_second": 366.881,
+      "eval_steps_per_second": 23.113,
+      "step": 160
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 160,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 9.595199664340211e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 34
+  }
+}

run-b4ynzikp/checkpoint-160/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a2f05515e518ab0e60cb876ad54b4834adb2a49e3eb133f72a85de5d2c9955a
+size 5777

run-b4ynzikp/checkpoint-160/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-b4ynzikp/checkpoint-64/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-b4ynzikp/checkpoint-64/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:578309e76d4da93327570ac537e74c76dd84c72238f66bb32073e2a2d2132820
+size 267838720

run-b4ynzikp/checkpoint-64/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46802ba298cd0de682c05b3e648ba84ba467c75b7f17d9ffac0e8e47359fdc9f
+size 535741515

run-b4ynzikp/checkpoint-64/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d841f3eb96223466fb5b93855329eba35d6e0c01f1e26cb76582e5042b724e5
+size 14645

run-b4ynzikp/checkpoint-64/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fab63d5d8fd6f6d958c14964451579e805a98b78785db739da80963e503724a
+size 1465

run-b4ynzikp/checkpoint-64/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-b4ynzikp/checkpoint-64/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-b4ynzikp/checkpoint-64/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-b4ynzikp/checkpoint-64/trainer_state.json ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+  "best_global_step": 64,
+  "best_metric": 0.3711593747138977,
+  "best_model_checkpoint": "topic_classifier_model/run-b4ynzikp/checkpoint-64",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 64,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.883,
+      "eval_loss": 0.3806321918964386,
+      "eval_runtime": 2.6362,
+      "eval_samples_per_second": 379.329,
+      "eval_steps_per_second": 23.898,
+      "step": 32
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.887,
+      "eval_loss": 0.3711593747138977,
+      "eval_runtime": 2.949,
+      "eval_samples_per_second": 339.1,
+      "eval_steps_per_second": 21.363,
+      "step": 64
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 160,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "_wandb": {
+      "cli_version": "0.21.3",
+      "m": [],
+      "python_version": "3.12.11",
+      "t": {
+        "1": [
+          1,
+          2,
+          3,
+          5,
+          11,
+          12,
+          41,
+          49,
+          51,
+          53,
+          71,
+          98,
+          100,
+          105
+        ],
+        "12": "0.21.3",
+        "13": "linux-x86_64",
+        "3": [
+          14
+        ],
+        "4": "3.12.11",
+        "5": "0.21.3",
+        "6": "4.56.1",
+        "8": [
+          1,
+          12
+        ]
+      }
+    },
+    "assignments": {},
+    "learning_rate": 9.595199664340211e-05,
+    "metric": "eval/loss",
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 34
+  }
+}

run-b4ynzikp/checkpoint-64/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a2f05515e518ab0e60cb876ad54b4834adb2a49e3eb133f72a85de5d2c9955a
+size 5777

run-b4ynzikp/checkpoint-64/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-b4ynzikp/checkpoint-96/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "transformers_version": "4.56.1",
+  "vocab_size": 30522
+}

run-b4ynzikp/checkpoint-96/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33f2cf6ecef48c9bba48f107e21a93d9441389424089057c0dc47353fe891505
+size 267838720

run-b4ynzikp/checkpoint-96/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:768cea78b214cef66deff96972bd18d761e6b1a3dc7a501d3c27e84bcfbc1fee
+size 535741515

run-b4ynzikp/checkpoint-96/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd7528a0a367ccff46409e79eda2e03ed5275eb23037156cbc14ad6032c18ca2
+size 14645

run-b4ynzikp/checkpoint-96/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb00b453fe64822f0322b77b2ea47e915c9805d6d803456a196ec6d59355796b
+size 1465