guyhadad01 commited on Oct 24, 2025

Commit

de28f07

verified ·

1 Parent(s): 2b712ea

Training in progress, step 200

Browse files

Files changed (24) hide show

.gitattributes +1 -0
added_tokens.json +3 -0
chat_template.jinja +85 -0
config.json +60 -0
merges.txt +0 -0
model.safetensors +3 -0
runs/Oct10_09-14-23_ip-172-31-44-130/events.out.tfevents.1760087665.ip-172-31-44-130.129292.0 +3 -0
runs/Oct13_11-31-30_ip-172-31-44-130/events.out.tfevents.1760355092.ip-172-31-44-130.64297.0 +3 -0
runs/Oct13_11-32-27_ip-172-31-44-130/events.out.tfevents.1760355148.ip-172-31-44-130.75867.0 +3 -0
runs/Oct13_11-35-30_ip-172-31-44-130/events.out.tfevents.1760355331.ip-172-31-44-130.76888.0 +3 -0
runs/Oct15_07-27-48_ip-172-31-44-130/events.out.tfevents.1760513270.ip-172-31-44-130.39696.0 +3 -0
runs/Oct15_07-28-13_ip-172-31-44-130/events.out.tfevents.1760513294.ip-172-31-44-130.39696.1 +3 -0
runs/Oct15_07-56-55_ip-172-31-44-130/events.out.tfevents.1760515017.ip-172-31-44-130.77663.0 +3 -0
runs/Oct15_07-59-24_ip-172-31-44-130/events.out.tfevents.1760515166.ip-172-31-44-130.77663.1 +3 -0
runs/Oct24_08-23-31_ip-172-31-44-130/events.out.tfevents.1761294212.ip-172-31-44-130.20083.0 +3 -0
runs/Oct24_09-31-35_ip-172-31-44-130/events.out.tfevents.1761298297.ip-172-31-44-130.94906.0 +3 -0
runs/Oct24_09-34-08_ip-172-31-44-130/events.out.tfevents.1761298449.ip-172-31-44-130.96236.0 +3 -0
special_tokens_map.json +33 -0
tokenizer.json +3 -0
tokenizer.model +3 -0
tokenizer_config.json +0 -0
training_args.bin +3 -0
vocab.json +0 -0
vocab.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<image_soft_token>": 262144
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,85 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set content = message.content %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in message.content %}
+                {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
+                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "Gemma3TextModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "dtype": "bfloat16",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 1152,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 2048,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 3,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 257,
+  "transformers_version": "4.57.0",
+  "use_bidirectional_attention": true,
+  "use_cache": true,
+  "vocab_size": 262144
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00293b8b4761f0d06286339bd7a4cc183edee2c660dc984093785075d782b637
+size 605759848

runs/Oct10_09-14-23_ip-172-31-44-130/events.out.tfevents.1760087665.ip-172-31-44-130.129292.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e7289d1981b4d81496fff2acf2caaa259d77ccda3376b0046f03e10d7a54e2
+size 4511

runs/Oct13_11-31-30_ip-172-31-44-130/events.out.tfevents.1760355092.ip-172-31-44-130.64297.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79df46a5850d5049016b37e7a805a17189d2a1e5bc466328920d8217676a1b63
+size 4603

runs/Oct13_11-32-27_ip-172-31-44-130/events.out.tfevents.1760355148.ip-172-31-44-130.75867.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b9a14fbd825017023d9de9fc7ffc335785f7992b758c7b64ca3c99b2e71ab71
+size 4603

runs/Oct13_11-35-30_ip-172-31-44-130/events.out.tfevents.1760355331.ip-172-31-44-130.76888.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a1eef828ac3ca727a8ac33ca98fbd2770569c20a5a5059a4444005ea700660d
+size 4951

runs/Oct15_07-27-48_ip-172-31-44-130/events.out.tfevents.1760513270.ip-172-31-44-130.39696.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8dd43b55c9534f7f2c6088a2aef0de9d2e714d8afbb66767693cfaf1d7c25827
+size 4512

runs/Oct15_07-28-13_ip-172-31-44-130/events.out.tfevents.1760513294.ip-172-31-44-130.39696.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27c8b5783d66622a961867d06999da5baf179cbe8260c983c3fe9b8b8eb873a6
+size 9078

runs/Oct15_07-56-55_ip-172-31-44-130/events.out.tfevents.1760515017.ip-172-31-44-130.77663.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c6072e10a753df9f6d63a9e87be3fec20080fad056c0d21eb9ebb5dd691a928
+size 4513

runs/Oct15_07-59-24_ip-172-31-44-130/events.out.tfevents.1760515166.ip-172-31-44-130.77663.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e698f0c42de97e5cc47cff40a7b9df8e9b66340fbeee4f07f4525f5c1eba97b
+size 4513

runs/Oct24_08-23-31_ip-172-31-44-130/events.out.tfevents.1761294212.ip-172-31-44-130.20083.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c95bc0b34de147cc9a62ba7fd979c4ec3893a4d955e2008ac8747158de157fab
+size 4513

runs/Oct24_09-31-35_ip-172-31-44-130/events.out.tfevents.1761298297.ip-172-31-44-130.94906.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0446d9e18c0e32e859b31a25cb7e606a47561c339afa7e6b15c79bb28307cf0e
+size 4606

runs/Oct24_09-34-08_ip-172-31-44-130/events.out.tfevents.1761298449.ip-172-31-44-130.96236.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47a550e74f6ac897f652f51c9b2ade6c540cf19491f77913970c6f7d01e89890
+size 5442

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c79a190be01275b078b3574d02188abc5784e5651a101b20d826371ba8e897dc
+size 33385261

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6468685d270b281475a8de797422da740ad1a8afa73bff9ef93badd261914b2f
+size 6289

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff