ccaug/modernbert-pcap_2

Browse files

Files changed (5) hide show

README.md +21 -19
config.json +8 -8
model.safetensors +2 -2
tokenizer.json +2 -2
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0785
-- Accuracy: 0.9859
-- F1: 0.9821
-- Precision: 0.9784
-- Recall: 0.9859
 ## Model description
@@ -50,27 +50,29 @@ The following hyperparameters were used during training:
 - seed: 42
 - gradient_accumulation_steps: 2
 - total_train_batch_size: 12
-- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - num_epochs: 1
 - mixed_precision_training: Native AMP
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
-|:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
-| No log        | 0.125 | 25   | 1.5465          | 0.4375   | 0.2954 | 0.2488    | 0.4375 |
-| No log        | 0.25  | 50   | 0.6815          | 0.7484   | 0.7144 | 0.7826    | 0.7484 |
-| No log        | 0.375 | 75   | 0.5321          | 0.8281   | 0.7816 | 0.7651    | 0.8281 |
-| No log        | 0.5   | 100  | 0.3030          | 0.9125   | 0.9002 | 0.9154    | 0.9125 |
-| No log        | 0.625 | 125  | 0.1586          | 0.9625   | 0.9587 | 0.9561    | 0.9625 |
-| No log        | 0.75  | 150  | 0.0844          | 0.9781   | 0.9743 | 0.9710    | 0.9781 |
-| No log        | 0.875 | 175  | 0.0785          | 0.9859   | 0.9821 | 0.9784    | 0.9859 |
 ### Framework versions
-- Transformers 4.48.3
-- Pytorch 2.6.0+cu124
-- Datasets 3.4.0
-- Tokenizers 0.21.0

 This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.1729
+- Accuracy: 0.9439
+- F1: 0.9439
+- Precision: 0.9461
+- Recall: 0.9439
 ## Model description
 - seed: 42
 - gradient_accumulation_steps: 2
 - total_train_batch_size: 12
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - num_epochs: 1
 - mixed_precision_training: Native AMP
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
+|:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
+| 3.3687        | 0.1111 | 25   | 1.3275          | 0.425    | 0.3703 | 0.6006    | 0.425  |
+| 1.9545        | 0.2222 | 50   | 0.8983          | 0.6839   | 0.6536 | 0.6803    | 0.6839 |
+| 1.2368        | 0.3333 | 75   | 0.4575          | 0.8367   | 0.8323 | 0.8547    | 0.8367 |
+| 0.9007        | 0.4444 | 100  | 0.4360          | 0.8578   | 0.8436 | 0.8784    | 0.8578 |
+| 0.8601        | 0.5556 | 125  | 0.2811          | 0.8856   | 0.8869 | 0.8931    | 0.8856 |
+| 0.5962        | 0.6667 | 150  | 0.3887          | 0.8817   | 0.8717 | 0.9074    | 0.8817 |
+| 0.5623        | 0.7778 | 175  | 0.2442          | 0.9128   | 0.9102 | 0.9233    | 0.9128 |
+| 0.295         | 0.8889 | 200  | 0.2014          | 0.9283   | 0.9264 | 0.9361    | 0.9283 |
+| 0.6196        | 1.0    | 225  | 0.1729          | 0.9439   | 0.9439 | 0.9461    | 0.9439 |
 ### Framework versions
+- Transformers 4.57.1
+- Pytorch 2.8.0+cu126
+- Datasets 4.0.0
+- Tokenizers 0.22.1

config.json CHANGED Viewed

@@ -1,11 +1,10 @@
 {
-  "_name_or_path": "answerdotai/ModernBERT-base",
   "architectures": [
     "ModernBertForSequenceClassification"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "bos_token_id": 50281,
   "classifier_activation": "gelu",
   "classifier_bias": false,
   "classifier_dropout": 0.0,
@@ -13,8 +12,9 @@
   "cls_token_id": 50281,
   "decoder_bias": true,
   "deterministic_flash_attn": false,
   "embedding_dropout": 0.0,
-  "eos_token_id": 50282,
   "global_attn_every_n_layers": 3,
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
@@ -28,7 +28,8 @@
     "4": "LABEL_4",
     "5": "LABEL_5",
     "6": "LABEL_6",
-    "7": "LABEL_7"
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
@@ -41,7 +42,8 @@
     "LABEL_4": 4,
     "LABEL_5": 5,
     "LABEL_6": 6,
-    "LABEL_7": 7
   },
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
@@ -57,12 +59,10 @@
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
-  "reference_compile": true,
   "repad_logits_with_grad": false,
   "sep_token_id": 50282,
   "sparse_pred_ignore_index": -100,
   "sparse_prediction": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.48.3",
   "vocab_size": 50368
 }

 {
   "architectures": [
     "ModernBertForSequenceClassification"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "bos_token_id": null,
   "classifier_activation": "gelu",
   "classifier_bias": false,
   "classifier_dropout": 0.0,
   "cls_token_id": 50281,
   "decoder_bias": true,
   "deterministic_flash_attn": false,
+  "dtype": "float32",
   "embedding_dropout": 0.0,
+  "eos_token_id": null,
   "global_attn_every_n_layers": 3,
   "global_rope_theta": 160000.0,
   "gradient_checkpointing": false,
     "4": "LABEL_4",
     "5": "LABEL_5",
     "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8"
   },
   "initializer_cutoff_factor": 2.0,
   "initializer_range": 0.02,
     "LABEL_4": 4,
     "LABEL_5": 5,
     "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8
   },
   "layer_norm_eps": 1e-05,
   "local_attention": 128,
   "pad_token_id": 50283,
   "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "repad_logits_with_grad": false,
   "sep_token_id": 50282,
   "sparse_pred_ignore_index": -100,
   "sparse_prediction": false,
+  "transformers_version": "4.57.1",
   "vocab_size": 50368
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1adce4623ec98ce1e8a4271d5744552e9382f14946e3d8fa566a0d5d3b5f0de
-size 598458240

 version https://git-lfs.github.com/spec/v1
+oid sha256:73a22c47896844149915e77622eabcec9a3cfd867243f68d5bd0687b11a00497
+size 598461316

tokenizer.json CHANGED Viewed

@@ -2,13 +2,13 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 8192,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
-      "Fixed": 8192
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 512,
     "strategy": "LongestFirst",
     "stride": 0
   },
   "padding": {
     "strategy": {
+      "Fixed": 512
     },
     "direction": "Right",
     "pad_to_multiple_of": null,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d45de738ffef306b4138f74ff30b89973d6aae780063db7b619b5bbc00356014
-size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:f7ec83b42e86ca222902aeff78581a6aecbaa9bd51002bddc3f3eb0fae99720f
+size 5777