Training in progress, step 200

Browse files

Files changed (8) hide show

config.json +16 -52
model.safetensors +2 -2
runs/Sep28_19-44-56_ip-172-31-44-130/events.out.tfevents.1759088698.ip-172-31-44-130 +3 -0
special_tokens_map.json +13 -9
tokenizer.json +2 -2
tokenizer_config.json +0 -0
training_args.bin +1 -1
vocab.txt +0 -0

config.json CHANGED Viewed

@@ -1,61 +1,25 @@
 {
-  "_sliding_window_pattern": 6,
   "architectures": [
-    "Gemma3TextModel"
   ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "attn_logit_softcapping": null,
-  "bos_token_id": 2,
-  "dtype": "float32",
-  "eos_token_id": 1,
-  "final_logit_softcapping": null,
-  "head_dim": 256,
-  "hidden_activation": "gelu_pytorch_tanh",
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 1152,
-  "layer_types": [
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "sliding_attention",
-    "full_attention"
-  ],
-  "max_position_embeddings": 2048,
-  "model_type": "gemma3_text",
-  "num_attention_heads": 3,
-  "num_hidden_layers": 24,
-  "num_key_value_heads": 1,
   "pad_token_id": 0,
-  "query_pre_attn_scalar": 256,
-  "rms_norm_eps": 1e-06,
-  "rope_local_base_freq": 10000.0,
-  "rope_scaling": null,
-  "rope_theta": 1000000.0,
-  "sliding_window": 512,
   "torch_dtype": "float32",
   "transformers_version": "4.55.2",
-  "use_bidirectional_attention": true,
   "use_cache": true,
-  "vocab_size": 262144
 }

 {
   "architectures": [
+    "BertModel"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
   "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
   "pad_token_id": 0,
+  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.55.2",
+  "type_vocab_size": 2,
   "use_cache": true,
+  "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e240cf271268992d16d1852e9c2f8faa0358f5909badd9cb77e979db5eeec01
-size 1211486072

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee6d5fe7579a8728e8ce1b5a3d16cc6d66c5719392898bc44d8ff0f2fa3b0a9f
+size 90864192

runs/Sep28_19-44-56_ip-172-31-44-130/events.out.tfevents.1759088698.ip-172-31-44-130 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bf06ba47f568bbde925f1b5c91a9b374f2f9e71a01aec85a863b6732efdba0c
+size 5015

special_tokens_map.json CHANGED Viewed

@@ -1,30 +1,34 @@
 {
-  "boi_token": "<start_of_image>",
-  "bos_token": {
-    "content": "<bos>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
-  "eoi_token": "<end_of_image>",
-  "eos_token": {
-    "content": "<eos>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
-  "image_token": "<image_soft_token>",
   "pad_token": {
-    "content": "<pad>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
-    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

 {
+  "cls_token": {
+    "content": "[CLS]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
+  "mask_token": {
+    "content": "[MASK]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
   "unk_token": {
+    "content": "[UNK]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c79a190be01275b078b3574d02188abc5784e5651a101b20d826371ba8e897dc
-size 33385261

 version https://git-lfs.github.com/spec/v1
+oid sha256:91f1def9b9391fdabe028cd3f3fcc4efd34e5d1f08c3bf2de513ebb5911a1854
+size 711649

tokenizer_config.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41bbb7d97dbf5fc36f4da6eb62b2c1b3fde18fa18ee31c63764adb9fcb45d686
 size 6097

 version https://git-lfs.github.com/spec/v1
+oid sha256:d38868219e5f23127c67539cb1aea90b6a40b71d9518f7f8f35a1f4dc71c50de
 size 6097

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff