Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

chat_template.jinja +47 -0
config.json +43 -8
generation_config.json +1 -2
model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
model.safetensors.index.json +1 -0
preprocessor_config.json +1 -0
tokenizer_config.json +0 -1
trainer_state.json +2 -2
training_args.bin +2 -2

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,47 @@

+{{ bos_token }}
+{%- if messages[0]['role'] == 'system' -%}
+    {%- if messages[0]['content'] is string -%}
+        {%- set first_user_prefix = messages[0]['content'] + '
+' -%}
+    {%- else -%}
+        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
+' -%}
+    {%- endif -%}
+    {%- set loop_messages = messages[1:] -%}
+{%- else -%}
+    {%- set first_user_prefix = "" -%}
+    {%- set loop_messages = messages -%}
+{%- endif -%}
+{%- for message in loop_messages -%}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
+        {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
+    {%- endif -%}
+    {%- if (message['role'] == 'assistant') -%}
+        {%- set role = "model" -%}
+    {%- else -%}
+        {%- set role = message['role'] -%}
+    {%- endif -%}
+    {{ '<start_of_turn>' + role + '
+' + (first_user_prefix if loop.first else "") }}
+    {%- if message['content'] is string -%}
+        {{ message['content'] | trim }}
+    {%- elif message['content'] is iterable -%}
+        {%- for item in message['content'] -%}
+            {%- if item['type'] == 'image' -%}
+                {{ '<start_of_image>' }}
+            {%- elif item['type'] == 'text' -%}
+                {{ item['text'] | trim }}
+            {%- endif -%}
+        {%- endfor -%}
+    {%- else -%}
+        {{ raise_exception("Invalid content type") }}
+    {%- endif -%}
+    {{ '<end_of_turn>
+' }}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{'<start_of_turn>model
+'}}
+{%- endif -%}

config.json CHANGED Viewed

@@ -3,6 +3,7 @@
     "Gemma3ForConditionalGeneration"
   ],
   "boi_token_index": 255999,
   "eoi_token_index": 256000,
   "eos_token_id": [
     1,
@@ -14,16 +15,53 @@
   "model_type": "gemma3",
   "projector_lr": 1e-05,
   "text_config": {
     "attention_bias": false,
     "attention_dropout": 0.0,
     "attn_logit_softcapping": null,
-    "cache_implementation": "hybrid",
     "final_logit_softcapping": null,
     "head_dim": 256,
     "hidden_activation": "gelu_pytorch_tanh",
     "hidden_size": 2560,
     "initializer_range": 0.02,
     "intermediate_size": 10240,
     "max_position_embeddings": 131072,
     "model_type": "gemma3_text",
     "num_attention_heads": 8,
@@ -38,16 +76,14 @@
     },
     "rope_theta": 1000000.0,
     "sliding_window": 1024,
-    "sliding_window_pattern": 6,
-    "torch_dtype": "bfloat16",
     "use_cache": true,
     "vocab_size": 262208
   },
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.51.3",
-  "use_cache": true,
   "vision_config": {
     "attention_dropout": 0.0,
     "hidden_act": "gelu_pytorch_tanh",
     "hidden_size": 1152,
     "image_size": 896,
@@ -58,8 +94,7 @@
     "num_channels": 3,
     "num_hidden_layers": 27,
     "patch_size": 14,
-    "torch_dtype": "bfloat16",
     "vision_use_head": false
   },
-  "vision_lr": 2e-06
 }

     "Gemma3ForConditionalGeneration"
   ],
   "boi_token_index": 255999,
+  "dtype": "bfloat16",
   "eoi_token_index": 256000,
   "eos_token_id": [
     1,
   "model_type": "gemma3",
   "projector_lr": 1e-05,
   "text_config": {
+    "_sliding_window_pattern": 6,
     "attention_bias": false,
     "attention_dropout": 0.0,
     "attn_logit_softcapping": null,
+    "dtype": "bfloat16",
     "final_logit_softcapping": null,
     "head_dim": 256,
     "hidden_activation": "gelu_pytorch_tanh",
     "hidden_size": 2560,
     "initializer_range": 0.02,
     "intermediate_size": 10240,
+    "layer_types": [
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "full_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention",
+      "sliding_attention"
+    ],
     "max_position_embeddings": 131072,
     "model_type": "gemma3_text",
     "num_attention_heads": 8,
     },
     "rope_theta": 1000000.0,
     "sliding_window": 1024,
     "use_cache": true,
     "vocab_size": 262208
   },
+  "transformers_version": "4.56.2",
+  "use_cache": false,
   "vision_config": {
     "attention_dropout": 0.0,
+    "dtype": "bfloat16",
     "hidden_act": "gelu_pytorch_tanh",
     "hidden_size": 1152,
     "image_size": 896,
     "num_channels": 3,
     "num_hidden_layers": 27,
     "patch_size": 14,
     "vision_use_head": false
   },
+  "vision_lr": 2e-07
 }

generation_config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "attn_implementation": "eager",
   "bos_token_id": 2,
   "cache_implementation": "hybrid",
   "do_sample": true,
@@ -10,5 +9,5 @@
   "pad_token_id": 0,
   "top_k": 64,
   "top_p": 0.95,
-  "transformers_version": "4.51.3"
 }

 {
   "bos_token_id": 2,
   "cache_implementation": "hybrid",
   "do_sample": true,
   "pad_token_id": 0,
   "top_k": 64,
   "top_p": 0.95,
+  "transformers_version": "4.56.2"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f14754a6dfb75799d0f7f7ea96bbfbeab100c5a045e4b26ce9d70499e7974d0e
 size 4961251752

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5d9a77e86c1357e257f687f60a88bc0515b87b98453ae4e6f9822786fcedee0
 size 4961251752

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b6c0122c7e2a0a1b5efffc9a5b49df579290c6411d81e1cf8e7e78c8cd9c1bb
 size 4981531360

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9aff74d9968d7cc46bce45054180ef7329ca542532bf8e1cd8ec235601f1280
 size 4981531360

model.safetensors.index.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
   "metadata": {
     "total_size": 9942663904
   },
   "weight_map": {

 {
   "metadata": {
+    "total_parameters": 4300079472,
     "total_size": 9942663904
   },
   "weight_map": {

preprocessor_config.json CHANGED Viewed

@@ -3,6 +3,7 @@
   "data_format": "channels_first",
   "default_to_square": true,
   "device": null,
   "do_center_crop": null,
   "do_convert_rgb": null,
   "do_normalize": true,

   "data_format": "channels_first",
   "default_to_square": true,
   "device": null,
+  "disable_grouping": null,
   "do_center_crop": null,
   "do_convert_rgb": null,
   "do_normalize": true,

tokenizer_config.json CHANGED Viewed

@@ -51325,7 +51325,6 @@
   },
   "boi_token": "<start_of_image>",
   "bos_token": "<bos>",
-  "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<end_of_image>",
   "eos_token": "<eos>",

   },
   "boi_token": "<start_of_image>",
   "bos_token": "<bos>",
   "clean_up_tokenization_spaces": false,
   "eoi_token": "<end_of_image>",
   "eos_token": "<eos>",

trainer_state.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83490120dd7e1cf6a7140a26734eb574276ca6c824fec30f83f638ae410b59cf
-size 33348269

 version https://git-lfs.github.com/spec/v1
+oid sha256:16b2511cceb96f7d1d9121e08b005a7ee57f2d82a1ffc78f975569438275dd43
+size 941672

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:943dbe161f39363cefe248ee99e67069490bd33278e55d04272c11b4fa4932bf
-size 7736

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa3a1f1ebe49efb547a861186ccb3fe2e5d6169d2545c77525f68141e4295b43
+size 8273