guyhadad01 commited on
Commit
89b1084
·
verified ·
1 Parent(s): 676341c

Training in progress, step 158

Browse files
Files changed (28) hide show
  1. added_tokens.json +3 -0
  2. chat_template.jinja +85 -0
  3. config.json +25 -0
  4. merges.txt +0 -0
  5. model.safetensors +3 -0
  6. runs/Nov02_09-30-52_ip-172-31-44-130/events.out.tfevents.1762075854.ip-172-31-44-130.109570.0 +3 -0
  7. runs/Nov02_09-36-22_ip-172-31-44-130/events.out.tfevents.1762076184.ip-172-31-44-130.112373.0 +3 -0
  8. runs/Oct10_09-14-23_ip-172-31-44-130/events.out.tfevents.1760087665.ip-172-31-44-130.129292.0 +3 -0
  9. runs/Oct13_11-31-30_ip-172-31-44-130/events.out.tfevents.1760355092.ip-172-31-44-130.64297.0 +3 -0
  10. runs/Oct13_11-32-27_ip-172-31-44-130/events.out.tfevents.1760355148.ip-172-31-44-130.75867.0 +3 -0
  11. runs/Oct13_11-35-30_ip-172-31-44-130/events.out.tfevents.1760355331.ip-172-31-44-130.76888.0 +3 -0
  12. runs/Oct15_07-27-48_ip-172-31-44-130/events.out.tfevents.1760513270.ip-172-31-44-130.39696.0 +3 -0
  13. runs/Oct15_07-28-13_ip-172-31-44-130/events.out.tfevents.1760513294.ip-172-31-44-130.39696.1 +3 -0
  14. runs/Oct15_07-56-55_ip-172-31-44-130/events.out.tfevents.1760515017.ip-172-31-44-130.77663.0 +3 -0
  15. runs/Oct15_07-59-24_ip-172-31-44-130/events.out.tfevents.1760515166.ip-172-31-44-130.77663.1 +3 -0
  16. runs/Oct24_08-23-31_ip-172-31-44-130/events.out.tfevents.1761294212.ip-172-31-44-130.20083.0 +3 -0
  17. runs/Oct24_09-31-35_ip-172-31-44-130/events.out.tfevents.1761298297.ip-172-31-44-130.94906.0 +3 -0
  18. runs/Oct24_09-34-08_ip-172-31-44-130/events.out.tfevents.1761298449.ip-172-31-44-130.96236.0 +3 -0
  19. runs/Oct26_07-26-05_ip-172-31-44-130/events.out.tfevents.1761463567.ip-172-31-44-130.12132.0 +3 -0
  20. runs/Oct26_11-59-28_ip-172-31-44-130/events.out.tfevents.1761479970.ip-172-31-44-130.228432.0 +3 -0
  21. runs/Oct27_07-22-20_ip-172-31-44-130/events.out.tfevents.1761549742.ip-172-31-44-130.10093.0 +3 -0
  22. special_tokens_map.json +37 -0
  23. tokenizer.json +0 -0
  24. tokenizer.model +3 -0
  25. tokenizer_config.json +65 -0
  26. training_args.bin +3 -0
  27. vocab.json +0 -0
  28. vocab.txt +0 -0
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
chat_template.jinja ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
+ {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
+ {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
+ {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
+ {%- endfor %}
66
+ {%- endif %}
67
+ {{- '<|im_end|>\n' }}
68
+ {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
+ {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
+ {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
+ {{- '<|im_end|>\n' }}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if add_generation_prompt %}
81
+ {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "dtype": "bfloat16",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 384,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 1536,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "transformers_version": "4.57.0",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b0fa3643d242645dbf52689a915e8545a28964e3288ba549b797f11ad6fb4e
3
+ size 45437864
runs/Nov02_09-30-52_ip-172-31-44-130/events.out.tfevents.1762075854.ip-172-31-44-130.109570.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:180b5636b098bbf107e9af31adde90540d4aa70fc972d46d941dd5512e13a49e
3
+ size 5700
runs/Nov02_09-36-22_ip-172-31-44-130/events.out.tfevents.1762076184.ip-172-31-44-130.112373.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937c42b2e86cbf72818ee07781c08390af37854a424dde0c463ac523a7b8de9c
3
+ size 5496
runs/Oct10_09-14-23_ip-172-31-44-130/events.out.tfevents.1760087665.ip-172-31-44-130.129292.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2e7289d1981b4d81496fff2acf2caaa259d77ccda3376b0046f03e10d7a54e2
3
+ size 4511
runs/Oct13_11-31-30_ip-172-31-44-130/events.out.tfevents.1760355092.ip-172-31-44-130.64297.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79df46a5850d5049016b37e7a805a17189d2a1e5bc466328920d8217676a1b63
3
+ size 4603
runs/Oct13_11-32-27_ip-172-31-44-130/events.out.tfevents.1760355148.ip-172-31-44-130.75867.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b9a14fbd825017023d9de9fc7ffc335785f7992b758c7b64ca3c99b2e71ab71
3
+ size 4603
runs/Oct13_11-35-30_ip-172-31-44-130/events.out.tfevents.1760355331.ip-172-31-44-130.76888.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a1eef828ac3ca727a8ac33ca98fbd2770569c20a5a5059a4444005ea700660d
3
+ size 4951
runs/Oct15_07-27-48_ip-172-31-44-130/events.out.tfevents.1760513270.ip-172-31-44-130.39696.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dd43b55c9534f7f2c6088a2aef0de9d2e714d8afbb66767693cfaf1d7c25827
3
+ size 4512
runs/Oct15_07-28-13_ip-172-31-44-130/events.out.tfevents.1760513294.ip-172-31-44-130.39696.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c8b5783d66622a961867d06999da5baf179cbe8260c983c3fe9b8b8eb873a6
3
+ size 9078
runs/Oct15_07-56-55_ip-172-31-44-130/events.out.tfevents.1760515017.ip-172-31-44-130.77663.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c6072e10a753df9f6d63a9e87be3fec20080fad056c0d21eb9ebb5dd691a928
3
+ size 4513
runs/Oct15_07-59-24_ip-172-31-44-130/events.out.tfevents.1760515166.ip-172-31-44-130.77663.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e698f0c42de97e5cc47cff40a7b9df8e9b66340fbeee4f07f4525f5c1eba97b
3
+ size 4513
runs/Oct24_08-23-31_ip-172-31-44-130/events.out.tfevents.1761294212.ip-172-31-44-130.20083.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95bc0b34de147cc9a62ba7fd979c4ec3893a4d955e2008ac8747158de157fab
3
+ size 4513
runs/Oct24_09-31-35_ip-172-31-44-130/events.out.tfevents.1761298297.ip-172-31-44-130.94906.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0446d9e18c0e32e859b31a25cb7e606a47561c339afa7e6b15c79bb28307cf0e
3
+ size 4606
runs/Oct24_09-34-08_ip-172-31-44-130/events.out.tfevents.1761298449.ip-172-31-44-130.96236.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd8e7f8dbdf6254dc9144520cf89413b4cc23060a1e1c86417a3310e5ea7faa2
3
+ size 9172
runs/Oct26_07-26-05_ip-172-31-44-130/events.out.tfevents.1761463567.ip-172-31-44-130.12132.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:283bb8873f8a3ef41ac3313163ef54203678ccbd6a693e9b8bd80e05fdc59f01
3
+ size 9381
runs/Oct26_11-59-28_ip-172-31-44-130/events.out.tfevents.1761479970.ip-172-31-44-130.228432.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bee8a3561b33de8d5dd68eca2c2064b675d1798c92451435c04c8145480cc6f7
3
+ size 13087
runs/Oct27_07-22-20_ip-172-31-44-130/events.out.tfevents.1761549742.ip-172-31-44-130.10093.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ec4ea36becaa43f675d185a690f22a86be514bd4b042bb203e36d7d83ac684
3
+ size 13181
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "max_length": 128,
51
+ "model_max_length": 256,
52
+ "never_split": null,
53
+ "pad_to_multiple_of": null,
54
+ "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
+ "sep_token": "[SEP]",
58
+ "stride": 0,
59
+ "strip_accents": null,
60
+ "tokenize_chinese_chars": true,
61
+ "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "longest_first",
64
+ "unk_token": "[UNK]"
65
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f1c54f2129ed0ff515c4baf67c18f0a77a71d0b30328268c85f52204ecd86ce
3
+ size 6225
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff