ccaug commited on
Commit
3fc0991
·
verified ·
1 Parent(s): 4d11a5d

ccaug/modernbert-pcap_2

Browse files
Files changed (5) hide show
  1. README.md +21 -19
  2. config.json +8 -8
  3. model.safetensors +2 -2
  4. tokenizer.json +2 -2
  5. training_args.bin +2 -2
README.md CHANGED
@@ -21,11 +21,11 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.0785
25
- - Accuracy: 0.9859
26
- - F1: 0.9821
27
- - Precision: 0.9784
28
- - Recall: 0.9859
29
 
30
  ## Model description
31
 
@@ -50,27 +50,29 @@ The following hyperparameters were used during training:
50
  - seed: 42
51
  - gradient_accumulation_steps: 2
52
  - total_train_batch_size: 12
53
- - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
54
  - lr_scheduler_type: linear
55
  - num_epochs: 1
56
  - mixed_precision_training: Native AMP
57
 
58
  ### Training results
59
 
60
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
61
- |:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
62
- | No log | 0.125 | 25 | 1.5465 | 0.4375 | 0.2954 | 0.2488 | 0.4375 |
63
- | No log | 0.25 | 50 | 0.6815 | 0.7484 | 0.7144 | 0.7826 | 0.7484 |
64
- | No log | 0.375 | 75 | 0.5321 | 0.8281 | 0.7816 | 0.7651 | 0.8281 |
65
- | No log | 0.5 | 100 | 0.3030 | 0.9125 | 0.9002 | 0.9154 | 0.9125 |
66
- | No log | 0.625 | 125 | 0.1586 | 0.9625 | 0.9587 | 0.9561 | 0.9625 |
67
- | No log | 0.75 | 150 | 0.0844 | 0.9781 | 0.9743 | 0.9710 | 0.9781 |
68
- | No log | 0.875 | 175 | 0.0785 | 0.9859 | 0.9821 | 0.9784 | 0.9859 |
 
 
69
 
70
 
71
  ### Framework versions
72
 
73
- - Transformers 4.48.3
74
- - Pytorch 2.6.0+cu124
75
- - Datasets 3.4.0
76
- - Tokenizers 0.21.0
 
21
 
22
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.1729
25
+ - Accuracy: 0.9439
26
+ - F1: 0.9439
27
+ - Precision: 0.9461
28
+ - Recall: 0.9439
29
 
30
  ## Model description
31
 
 
50
  - seed: 42
51
  - gradient_accumulation_steps: 2
52
  - total_train_batch_size: 12
53
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
54
  - lr_scheduler_type: linear
55
  - num_epochs: 1
56
  - mixed_precision_training: Native AMP
57
 
58
  ### Training results
59
 
60
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
61
+ |:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
62
+ | 3.3687 | 0.1111 | 25 | 1.3275 | 0.425 | 0.3703 | 0.6006 | 0.425 |
63
+ | 1.9545 | 0.2222 | 50 | 0.8983 | 0.6839 | 0.6536 | 0.6803 | 0.6839 |
64
+ | 1.2368 | 0.3333 | 75 | 0.4575 | 0.8367 | 0.8323 | 0.8547 | 0.8367 |
65
+ | 0.9007 | 0.4444 | 100 | 0.4360 | 0.8578 | 0.8436 | 0.8784 | 0.8578 |
66
+ | 0.8601 | 0.5556 | 125 | 0.2811 | 0.8856 | 0.8869 | 0.8931 | 0.8856 |
67
+ | 0.5962 | 0.6667 | 150 | 0.3887 | 0.8817 | 0.8717 | 0.9074 | 0.8817 |
68
+ | 0.5623 | 0.7778 | 175 | 0.2442 | 0.9128 | 0.9102 | 0.9233 | 0.9128 |
69
+ | 0.295 | 0.8889 | 200 | 0.2014 | 0.9283 | 0.9264 | 0.9361 | 0.9283 |
70
+ | 0.6196 | 1.0 | 225 | 0.1729 | 0.9439 | 0.9439 | 0.9461 | 0.9439 |
71
 
72
 
73
  ### Framework versions
74
 
75
+ - Transformers 4.57.1
76
+ - Pytorch 2.8.0+cu126
77
+ - Datasets 4.0.0
78
+ - Tokenizers 0.22.1
config.json CHANGED
@@ -1,11 +1,10 @@
1
  {
2
- "_name_or_path": "answerdotai/ModernBERT-base",
3
  "architectures": [
4
  "ModernBertForSequenceClassification"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 50281,
9
  "classifier_activation": "gelu",
10
  "classifier_bias": false,
11
  "classifier_dropout": 0.0,
@@ -13,8 +12,9 @@
13
  "cls_token_id": 50281,
14
  "decoder_bias": true,
15
  "deterministic_flash_attn": false,
 
16
  "embedding_dropout": 0.0,
17
- "eos_token_id": 50282,
18
  "global_attn_every_n_layers": 3,
19
  "global_rope_theta": 160000.0,
20
  "gradient_checkpointing": false,
@@ -28,7 +28,8 @@
28
  "4": "LABEL_4",
29
  "5": "LABEL_5",
30
  "6": "LABEL_6",
31
- "7": "LABEL_7"
 
32
  },
33
  "initializer_cutoff_factor": 2.0,
34
  "initializer_range": 0.02,
@@ -41,7 +42,8 @@
41
  "LABEL_4": 4,
42
  "LABEL_5": 5,
43
  "LABEL_6": 6,
44
- "LABEL_7": 7
 
45
  },
46
  "layer_norm_eps": 1e-05,
47
  "local_attention": 128,
@@ -57,12 +59,10 @@
57
  "pad_token_id": 50283,
58
  "position_embedding_type": "absolute",
59
  "problem_type": "single_label_classification",
60
- "reference_compile": true,
61
  "repad_logits_with_grad": false,
62
  "sep_token_id": 50282,
63
  "sparse_pred_ignore_index": -100,
64
  "sparse_prediction": false,
65
- "torch_dtype": "float32",
66
- "transformers_version": "4.48.3",
67
  "vocab_size": 50368
68
  }
 
1
  {
 
2
  "architectures": [
3
  "ModernBertForSequenceClassification"
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
  "classifier_activation": "gelu",
9
  "classifier_bias": false,
10
  "classifier_dropout": 0.0,
 
12
  "cls_token_id": 50281,
13
  "decoder_bias": true,
14
  "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
  "embedding_dropout": 0.0,
17
+ "eos_token_id": null,
18
  "global_attn_every_n_layers": 3,
19
  "global_rope_theta": 160000.0,
20
  "gradient_checkpointing": false,
 
28
  "4": "LABEL_4",
29
  "5": "LABEL_5",
30
  "6": "LABEL_6",
31
+ "7": "LABEL_7",
32
+ "8": "LABEL_8"
33
  },
34
  "initializer_cutoff_factor": 2.0,
35
  "initializer_range": 0.02,
 
42
  "LABEL_4": 4,
43
  "LABEL_5": 5,
44
  "LABEL_6": 6,
45
+ "LABEL_7": 7,
46
+ "LABEL_8": 8
47
  },
48
  "layer_norm_eps": 1e-05,
49
  "local_attention": 128,
 
59
  "pad_token_id": 50283,
60
  "position_embedding_type": "absolute",
61
  "problem_type": "single_label_classification",
 
62
  "repad_logits_with_grad": false,
63
  "sep_token_id": 50282,
64
  "sparse_pred_ignore_index": -100,
65
  "sparse_prediction": false,
66
+ "transformers_version": "4.57.1",
 
67
  "vocab_size": 50368
68
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1adce4623ec98ce1e8a4271d5744552e9382f14946e3d8fa566a0d5d3b5f0de
3
- size 598458240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73a22c47896844149915e77622eabcec9a3cfd867243f68d5bd0687b11a00497
3
+ size 598461316
tokenizer.json CHANGED
@@ -2,13 +2,13 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 8192,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
- "Fixed": 8192
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 512,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
  "padding": {
10
  "strategy": {
11
+ "Fixed": 512
12
  },
13
  "direction": "Right",
14
  "pad_to_multiple_of": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d45de738ffef306b4138f74ff30b89973d6aae780063db7b619b5bbc00356014
3
- size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ec83b42e86ca222902aeff78581a6aecbaa9bd51002bddc3f3eb0fae99720f
3
+ size 5777