guyhadad01 commited on
Commit
4664ca2
·
verified ·
1 Parent(s): df44d7d

Training in progress, step 200

Browse files
config.json CHANGED
@@ -1,61 +1,25 @@
1
  {
2
- "_sliding_window_pattern": 6,
3
  "architectures": [
4
- "Gemma3TextModel"
5
  ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "attn_logit_softcapping": null,
9
- "bos_token_id": 2,
10
- "dtype": "float32",
11
- "eos_token_id": 1,
12
- "final_logit_softcapping": null,
13
- "head_dim": 256,
14
- "hidden_activation": "gelu_pytorch_tanh",
15
- "hidden_size": 768,
16
  "initializer_range": 0.02,
17
- "intermediate_size": 1152,
18
- "layer_types": [
19
- "sliding_attention",
20
- "sliding_attention",
21
- "sliding_attention",
22
- "sliding_attention",
23
- "sliding_attention",
24
- "full_attention",
25
- "sliding_attention",
26
- "sliding_attention",
27
- "sliding_attention",
28
- "sliding_attention",
29
- "sliding_attention",
30
- "full_attention",
31
- "sliding_attention",
32
- "sliding_attention",
33
- "sliding_attention",
34
- "sliding_attention",
35
- "sliding_attention",
36
- "full_attention",
37
- "sliding_attention",
38
- "sliding_attention",
39
- "sliding_attention",
40
- "sliding_attention",
41
- "sliding_attention",
42
- "full_attention"
43
- ],
44
- "max_position_embeddings": 2048,
45
- "model_type": "gemma3_text",
46
- "num_attention_heads": 3,
47
- "num_hidden_layers": 24,
48
- "num_key_value_heads": 1,
49
  "pad_token_id": 0,
50
- "query_pre_attn_scalar": 256,
51
- "rms_norm_eps": 1e-06,
52
- "rope_local_base_freq": 10000.0,
53
- "rope_scaling": null,
54
- "rope_theta": 1000000.0,
55
- "sliding_window": 512,
56
  "torch_dtype": "float32",
57
  "transformers_version": "4.55.2",
58
- "use_bidirectional_attention": true,
59
  "use_cache": true,
60
- "vocab_size": 262144
61
  }
 
1
  {
 
2
  "architectures": [
3
+ "BertModel"
4
  ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
 
 
 
 
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 6,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
 
 
 
 
 
20
  "torch_dtype": "float32",
21
  "transformers_version": "4.55.2",
22
+ "type_vocab_size": 2,
23
  "use_cache": true,
24
+ "vocab_size": 30522
25
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e240cf271268992d16d1852e9c2f8faa0358f5909badd9cb77e979db5eeec01
3
- size 1211486072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee6d5fe7579a8728e8ce1b5a3d16cc6d66c5719392898bc44d8ff0f2fa3b0a9f
3
+ size 90864192
runs/Sep28_19-44-56_ip-172-31-44-130/events.out.tfevents.1759088698.ip-172-31-44-130 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bf06ba47f568bbde925f1b5c91a9b374f2f9e71a01aec85a863b6732efdba0c
3
+ size 5015
special_tokens_map.json CHANGED
@@ -1,30 +1,34 @@
1
  {
2
- "boi_token": "<start_of_image>",
3
- "bos_token": {
4
- "content": "<bos>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
- "eoi_token": "<end_of_image>",
11
- "eos_token": {
12
- "content": "<eos>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
- "image_token": "<image_soft_token>",
19
  "pad_token": {
20
- "content": "<pad>",
 
 
 
 
 
 
 
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false
25
  },
26
  "unk_token": {
27
- "content": "<unk>",
28
  "lstrip": false,
29
  "normalized": false,
30
  "rstrip": false,
 
1
  {
2
+ "cls_token": {
3
+ "content": "[CLS]",
 
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
+ "mask_token": {
10
+ "content": "[MASK]",
 
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
16
  "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
28
  "single_word": false
29
  },
30
  "unk_token": {
31
+ "content": "[UNK]",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c79a190be01275b078b3574d02188abc5784e5651a101b20d826371ba8e897dc
3
- size 33385261
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91f1def9b9391fdabe028cd3f3fcc4efd34e5d1f08c3bf2de513ebb5911a1854
3
+ size 711649
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41bbb7d97dbf5fc36f4da6eb62b2c1b3fde18fa18ee31c63764adb9fcb45d686
3
  size 6097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38868219e5f23127c67539cb1aea90b6a40b71d9518f7f8f35a1f4dc71c50de
3
  size 6097
vocab.txt ADDED
The diff for this file is too large to render. See raw diff