santoshmds21
/

bert-phishing-classifier_teacher

@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7047
-- Accuracy: 0.491
-- Auc: 0.75
 ## Model description
@@ -43,7 +43,7 @@ The following hyperparameters were used during training:
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
-- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - num_epochs: 10
@@ -51,21 +51,21 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | Auc   |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:-----:|
-| 0.7135        | 1.0   | 263  | 0.6957          | 0.509    | 0.692 |
-| 0.7053        | 2.0   | 526  | 0.7073          | 0.491    | 0.274 |
-| 0.7033        | 3.0   | 789  | 0.7039          | 0.509    | 0.701 |
-| 0.7025        | 4.0   | 1052 | 0.6955          | 0.491    | 0.471 |
-| 0.6995        | 5.0   | 1315 | 0.7008          | 0.491    | 0.533 |
-| 0.6993        | 6.0   | 1578 | 0.6982          | 0.491    | 0.708 |
-| 0.696         | 7.0   | 1841 | 0.6993          | 0.491    | 0.654 |
-| 0.6939        | 8.0   | 2104 | 0.6954          | 0.491    | 0.705 |
-| 0.6907        | 9.0   | 2367 | 0.6994          | 0.491    | 0.673 |
-| 0.6946        | 10.0  | 2630 | 0.7047          | 0.491    | 0.75  |
 ### Framework versions
-- Transformers 4.52.4
-- Pytorch 2.6.0+cu124
-- Datasets 3.6.0
-- Tokenizers 0.21.1

 This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.2897
+- Accuracy: 0.864
+- Auc: 0.951
 ## Model description
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - num_epochs: 10
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | Auc   |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:-----:|
+| 0.5042        | 1.0   | 263  | 0.3865          | 0.813    | 0.912 |
+| 0.4105        | 2.0   | 526  | 0.3380          | 0.847    | 0.931 |
+| 0.3583        | 3.0   | 789  | 0.3148          | 0.856    | 0.939 |
+| 0.3553        | 4.0   | 1052 | 0.3454          | 0.851    | 0.945 |
+| 0.3511        | 5.0   | 1315 | 0.3450          | 0.86     | 0.948 |
+| 0.3477        | 6.0   | 1578 | 0.2906          | 0.871    | 0.95  |
+| 0.3346        | 7.0   | 1841 | 0.2879          | 0.876    | 0.95  |
+| 0.3096        | 8.0   | 2104 | 0.2892          | 0.869    | 0.95  |
+| 0.3153        | 9.0   | 2367 | 0.2841          | 0.88     | 0.951 |
+| 0.3140        | 10.0  | 2630 | 0.2897          | 0.864    | 0.951 |
 ### Framework versions
+- Transformers 5.0.0
+- Pytorch 2.9.0+cu128
+- Datasets 4.0.0
+- Tokenizers 0.22.2

config.json CHANGED Viewed

@@ -1,9 +1,13 @@
 {
   "architectures": [
     "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
@@ -14,6 +18,7 @@
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
     "Not Safe": 1,
     "Safe": 0
@@ -26,9 +31,9 @@
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
-  "torch_dtype": "float32",
-  "transformers_version": "4.52.4",
   "type_vocab_size": 2,
-  "use_cache": true,
   "vocab_size": 30522
 }

 {
+  "add_cross_attention": false,
   "architectures": [
     "BertForSequenceClassification"
   ],
   "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": null,
   "classifier_dropout": null,
+  "dtype": "float32",
+  "eos_token_id": null,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
+  "is_decoder": false,
   "label2id": {
     "Not Safe": 1,
     "Safe": 0
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
+  "tie_word_embeddings": true,
+  "transformers_version": "5.0.0",
   "type_vocab_size": 2,
+  "use_cache": false,
   "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6df64b9931b12c83211a9ba030e7d537ddd03b464a1d8f0d317147b5f34ea82
-size 437958648

 version https://git-lfs.github.com/spec/v1
+oid sha256:5420ca952cc43dc0d622129400eb0245c8e79708adefac6859ec5dcd5e4e4383
+size 437958624

tokenizer_config.json CHANGED Viewed

@@ -1,50 +1,8 @@
 {
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "100": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "clean_up_tokenization_spaces": false,
   "cls_token": "[CLS]",
   "do_lower_case": true,
-  "extra_special_tokens": {},
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",

 {
+  "backend": "tokenizers",
   "cls_token": "[CLS]",
   "do_lower_case": true,
+  "is_local": false,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09a6e9bc44b14e5c67ec1b7b660450f8bb920def366f7a2218c5ae1db00e4764
-size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f59c3ead0ade00341beb61ed9e00321ae520b2f6f46fcfb3ac32e5e0f011b17
+size 5201