SaitejaJate commited on Aug 4, 2025

Commit

ba48e40

verified ·

1 Parent(s): 25ce104

Upload folder using huggingface_hub

Browse files

Files changed (29) hide show

.DS_Store +0 -0
checkpoint-200/config.json +25 -0
checkpoint-200/model.safetensors +3 -0
checkpoint-200/optimizer.pt +3 -0
checkpoint-200/rng_state.pth +3 -0
checkpoint-200/scheduler.pt +3 -0
checkpoint-200/trainer_state.json +70 -0
checkpoint-200/training_args.bin +3 -0
checkpoint-400/config.json +25 -0
checkpoint-400/model.safetensors +3 -0
checkpoint-400/optimizer.pt +3 -0
checkpoint-400/rng_state.pth +3 -0
checkpoint-400/scheduler.pt +3 -0
checkpoint-400/trainer_state.json +107 -0
checkpoint-400/training_args.bin +3 -0
checkpoint-596/config.json +25 -0
checkpoint-596/model.safetensors +3 -0
checkpoint-596/optimizer.pt +3 -0
checkpoint-596/rng_state.pth +3 -0
checkpoint-596/scheduler.pt +3 -0
checkpoint-596/trainer_state.json +128 -0
checkpoint-596/training_args.bin +3 -0
config.json +25 -0
model.safetensors +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
training_args.bin +3 -0
vocab.txt +0 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

checkpoint-200/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 30522
+}

checkpoint-200/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ba1ed4009db0f14fe96f723225588f2732128715085903a163691ad19d1183e
+size 267832560

checkpoint-200/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78129a9bf914880b817b2e7d52fc1084f4990cee96d77884966a0d07f850171d
+size 535724410

checkpoint-200/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b182573f61d8bcf5eaefcbf8f98d8734b6db51b44ad36aed3a305c431539fa1
+size 13990

checkpoint-200/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3604dd67acb2c38b76741c9250cef6941c411bc49a9218ce93c4bfef9ecf1d
+size 1064

checkpoint-200/trainer_state.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "best_metric": 0.9941434846266471,
+  "best_model_checkpoint": "./cbt_classifier/checkpoint-200",
+  "epoch": 0.6700167504187605,
+  "eval_steps": 200,
+  "global_step": 200,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.16750418760469013,
+      "grad_norm": 0.858985424041748,
+      "learning_rate": 2.5e-05,
+      "loss": 0.5097,
+      "step": 50
+    },
+    {
+      "epoch": 0.33500837520938026,
+      "grad_norm": 2.54575777053833,
+      "learning_rate": 5e-05,
+      "loss": 0.0863,
+      "step": 100
+    },
+    {
+      "epoch": 0.5025125628140703,
+      "grad_norm": 38.31464767456055,
+      "learning_rate": 4.495967741935484e-05,
+      "loss": 0.0248,
+      "step": 150
+    },
+    {
+      "epoch": 0.6700167504187605,
+      "grad_norm": 0.0192014928907156,
+      "learning_rate": 3.991935483870968e-05,
+      "loss": 0.0322,
+      "step": 200
+    },
+    {
+      "epoch": 0.6700167504187605,
+      "eval_accuracy": 0.9941434846266471,
+      "eval_loss": 0.023075200617313385,
+      "eval_runtime": 7.8942,
+      "eval_samples_per_second": 86.519,
+      "eval_steps_per_second": 10.894,
+      "step": 200
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 596,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 105973918924800.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb638b9ec3e9109a3efd288a3ca3e8f97fe303983495cc33a011258151b00099
+size 5240

checkpoint-400/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 30522
+}

checkpoint-400/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a75078d7a59b6799de31673f39d27b34af0df229c8e3941beebcec1fe94aff22
+size 267832560

checkpoint-400/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e861d35446023aa01f002cd372ae3f20f187ece1fbeacc408fd97f317f9b367
+size 535724410

checkpoint-400/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06e55bfc8723f269a626afca0be6f7def5753f3bb265436b94c5580b703cfcc7
+size 13990

checkpoint-400/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fe93f995d555235837eb9f033279025ea84b859d771a1c41c585f055a1dd1f4
+size 1064

checkpoint-400/trainer_state.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "best_metric": 0.9985358711566618,
+  "best_model_checkpoint": "./cbt_classifier/checkpoint-400",
+  "epoch": 1.338358458961474,
+  "eval_steps": 200,
+  "global_step": 400,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.16750418760469013,
+      "grad_norm": 0.858985424041748,
+      "learning_rate": 2.5e-05,
+      "loss": 0.5097,
+      "step": 50
+    },
+    {
+      "epoch": 0.33500837520938026,
+      "grad_norm": 2.54575777053833,
+      "learning_rate": 5e-05,
+      "loss": 0.0863,
+      "step": 100
+    },
+    {
+      "epoch": 0.5025125628140703,
+      "grad_norm": 38.31464767456055,
+      "learning_rate": 4.495967741935484e-05,
+      "loss": 0.0248,
+      "step": 150
+    },
+    {
+      "epoch": 0.6700167504187605,
+      "grad_norm": 0.0192014928907156,
+      "learning_rate": 3.991935483870968e-05,
+      "loss": 0.0322,
+      "step": 200
+    },
+    {
+      "epoch": 0.6700167504187605,
+      "eval_accuracy": 0.9941434846266471,
+      "eval_loss": 0.023075200617313385,
+      "eval_runtime": 7.8942,
+      "eval_samples_per_second": 86.519,
+      "eval_steps_per_second": 10.894,
+      "step": 200
+    },
+    {
+      "epoch": 0.8375209380234506,
+      "grad_norm": 0.010706444270908833,
+      "learning_rate": 3.487903225806452e-05,
+      "loss": 0.0155,
+      "step": 250
+    },
+    {
+      "epoch": 1.003350083752094,
+      "grad_norm": 0.030784547328948975,
+      "learning_rate": 2.9838709677419357e-05,
+      "loss": 0.0134,
+      "step": 300
+    },
+    {
+      "epoch": 1.170854271356784,
+      "grad_norm": 0.012720568105578423,
+      "learning_rate": 2.4798387096774196e-05,
+      "loss": 0.015,
+      "step": 350
+    },
+    {
+      "epoch": 1.338358458961474,
+      "grad_norm": 0.0057291556149721146,
+      "learning_rate": 1.975806451612903e-05,
+      "loss": 0.0028,
+      "step": 400
+    },
+    {
+      "epoch": 1.338358458961474,
+      "eval_accuracy": 0.9985358711566618,
+      "eval_loss": 0.003182810265570879,
+      "eval_runtime": 7.8968,
+      "eval_samples_per_second": 86.491,
+      "eval_steps_per_second": 10.891,
+      "step": 400
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 596,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 211682903052288.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-400/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb638b9ec3e9109a3efd288a3ca3e8f97fe303983495cc33a011258151b00099
+size 5240

checkpoint-596/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 30522
+}

checkpoint-596/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a08e793dd75ec73a58d43bba6e373a162ad5044da70b8cf545f8a11ddaedee7a
+size 267832560

checkpoint-596/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12648e8aff4a6628a4586970dea643ba86fbb79dba4233473847d0ef97890d14
+size 535724410

checkpoint-596/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06e55bfc8723f269a626afca0be6f7def5753f3bb265436b94c5580b703cfcc7
+size 13990

checkpoint-596/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67d6d0ca6fc85976a390b2f807110452cd6223e6beace8ef4178de54cf822add
+size 1064

checkpoint-596/trainer_state.json ADDED Viewed

	@@ -0,0 +1,128 @@

+{
+  "best_metric": 0.9985358711566618,
+  "best_model_checkpoint": "./cbt_classifier/checkpoint-400",
+  "epoch": 1.9949748743718594,
+  "eval_steps": 200,
+  "global_step": 596,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.16750418760469013,
+      "grad_norm": 0.858985424041748,
+      "learning_rate": 2.5e-05,
+      "loss": 0.5097,
+      "step": 50
+    },
+    {
+      "epoch": 0.33500837520938026,
+      "grad_norm": 2.54575777053833,
+      "learning_rate": 5e-05,
+      "loss": 0.0863,
+      "step": 100
+    },
+    {
+      "epoch": 0.5025125628140703,
+      "grad_norm": 38.31464767456055,
+      "learning_rate": 4.495967741935484e-05,
+      "loss": 0.0248,
+      "step": 150
+    },
+    {
+      "epoch": 0.6700167504187605,
+      "grad_norm": 0.0192014928907156,
+      "learning_rate": 3.991935483870968e-05,
+      "loss": 0.0322,
+      "step": 200
+    },
+    {
+      "epoch": 0.6700167504187605,
+      "eval_accuracy": 0.9941434846266471,
+      "eval_loss": 0.023075200617313385,
+      "eval_runtime": 7.8942,
+      "eval_samples_per_second": 86.519,
+      "eval_steps_per_second": 10.894,
+      "step": 200
+    },
+    {
+      "epoch": 0.8375209380234506,
+      "grad_norm": 0.010706444270908833,
+      "learning_rate": 3.487903225806452e-05,
+      "loss": 0.0155,
+      "step": 250
+    },
+    {
+      "epoch": 1.003350083752094,
+      "grad_norm": 0.030784547328948975,
+      "learning_rate": 2.9838709677419357e-05,
+      "loss": 0.0134,
+      "step": 300
+    },
+    {
+      "epoch": 1.170854271356784,
+      "grad_norm": 0.012720568105578423,
+      "learning_rate": 2.4798387096774196e-05,
+      "loss": 0.015,
+      "step": 350
+    },
+    {
+      "epoch": 1.338358458961474,
+      "grad_norm": 0.0057291556149721146,
+      "learning_rate": 1.975806451612903e-05,
+      "loss": 0.0028,
+      "step": 400
+    },
+    {
+      "epoch": 1.338358458961474,
+      "eval_accuracy": 0.9985358711566618,
+      "eval_loss": 0.003182810265570879,
+      "eval_runtime": 7.8968,
+      "eval_samples_per_second": 86.491,
+      "eval_steps_per_second": 10.891,
+      "step": 400
+    },
+    {
+      "epoch": 1.5058626465661642,
+      "grad_norm": 0.009387499652802944,
+      "learning_rate": 1.4717741935483872e-05,
+      "loss": 0.0003,
+      "step": 450
+    },
+    {
+      "epoch": 1.6733668341708543,
+      "grad_norm": 0.0058885738253593445,
+      "learning_rate": 9.67741935483871e-06,
+      "loss": 0.01,
+      "step": 500
+    },
+    {
+      "epoch": 1.8408710217755444,
+      "grad_norm": 0.0037801910657435656,
+      "learning_rate": 4.637096774193548e-06,
+      "loss": 0.0002,
+      "step": 550
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 596,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 315537343598592.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-596/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb638b9ec3e9109a3efd288a3ca3e8f97fe303983495cc33a011258151b00099
+size 5240

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.49.0",
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a75078d7a59b6799de31673f39d27b34af0df229c8e3941beebcec1fe94aff22
+size 267832560

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb638b9ec3e9109a3efd288a3ca3e8f97fe303983495cc33a011258151b00099
+size 5240

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff