kashparty commited on
Commit
8db397d
·
verified ·
1 Parent(s): ad82495

Training in progress, epoch 2

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c036511430a8237c8eea7775495c0c2954cfb32f7c69963d98a197cfc54b60b2
3
  size 578898352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3176e54cd5919e675f983531a89c3b226c2637143bf280f193683173f3ca4e95
3
  size 578898352
config.json CHANGED
@@ -1,55 +1,44 @@
1
  {
2
- "_name_or_path": "answerdotai/ModernBERT-base",
3
  "architectures": [
4
- "ModernBertForSequenceClassification"
5
  ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "bos_token_id": 50281,
9
- "classifier_activation": "gelu",
10
- "classifier_bias": false,
11
- "classifier_dropout": 0.0,
12
- "classifier_pooling": "mean",
13
- "cls_token_id": 50281,
14
- "decoder_bias": true,
15
- "deterministic_flash_attn": false,
16
- "embedding_dropout": 0.0,
17
- "eos_token_id": 50282,
18
- "global_attn_every_n_layers": 3,
19
- "global_rope_theta": 160000.0,
20
- "gradient_checkpointing": false,
21
- "hidden_activation": "gelu",
22
  "hidden_size": 768,
23
  "id2label": {
24
  "0": "NEGATIVE",
25
  "1": "POSITIVE"
26
  },
27
- "initializer_cutoff_factor": 2.0,
28
  "initializer_range": 0.02,
29
- "intermediate_size": 1152,
30
  "label2id": {
31
  "NEGATIVE": 0,
32
  "POSITIVE": 1
33
  },
34
- "layer_norm_eps": 1e-05,
35
- "local_attention": 128,
36
- "local_rope_theta": 10000.0,
37
- "max_position_embeddings": 8192,
38
- "mlp_bias": false,
39
- "mlp_dropout": 0.0,
40
- "model_type": "modernbert",
41
- "norm_bias": false,
42
- "norm_eps": 1e-05,
43
  "num_attention_heads": 12,
44
- "num_hidden_layers": 22,
45
- "pad_token_id": 50283,
46
- "position_embedding_type": "absolute",
47
- "reference_compile": true,
48
- "repad_logits_with_grad": false,
49
- "sep_token_id": 50282,
50
- "sparse_pred_ignore_index": -100,
51
- "sparse_prediction": false,
 
 
 
 
 
52
  "torch_dtype": "float32",
53
  "transformers_version": "4.48.3",
54
- "vocab_size": 50368
 
55
  }
 
1
  {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
  ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "NEGATIVE",
12
  "1": "POSITIVE"
13
  },
 
14
  "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
  "label2id": {
17
  "NEGATIVE": 0,
18
  "POSITIVE": 1
19
  },
20
+ "layer_norm_eps": 1e-07,
21
+ "legacy": true,
22
+ "max_position_embeddings": 512,
23
+ "max_relative_positions": -1,
24
+ "model_type": "deberta-v2",
25
+ "norm_rel_ebd": "layer_norm",
 
 
 
26
  "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "pooler_dropout": 0,
30
+ "pooler_hidden_act": "gelu",
31
+ "pooler_hidden_size": 768,
32
+ "pos_att_type": [
33
+ "p2c",
34
+ "c2p"
35
+ ],
36
+ "position_biased_input": false,
37
+ "position_buckets": 256,
38
+ "relative_attention": true,
39
+ "share_att_key": true,
40
  "torch_dtype": "float32",
41
  "transformers_version": "4.48.3",
42
+ "type_vocab_size": 0,
43
+ "vocab_size": 128100
44
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1baf2c59ca1866a584e3d2dabade35a20feefdf0728e62c665bea5365b2cb2b5
3
- size 598439784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e16ef993a03095c95e697ce1fb86368493e8d811fee7a52b954efda966bd848f
3
+ size 737719272
runs/Feb26_04-11-30_b40b033170fa/events.out.tfevents.1740543090.b40b033170fa.19395.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f83de1405563b619dae14d34a0921dcd98ad6d395c935b9fd43ba86f6a72f84d
3
- size 6847
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0477e386443367be75dfe7cf83fd22fc024f1e5eaa1de8f4360d2c7ea4c9c60
3
+ size 8219