hssawhney commited on
Commit
87cb44a
·
1 Parent(s): c626221

Removed the files

Browse files
Files changed (34) hide show
  1. added_tokens.json +0 -28
  2. chat_template.jinja +0 -85
  3. checkpoint-31/added_tokens.json +0 -28
  4. checkpoint-31/chat_template.jinja +0 -85
  5. checkpoint-31/config.json +0 -31
  6. checkpoint-31/generation_config.json +0 -6
  7. checkpoint-31/merges.txt +0 -0
  8. checkpoint-31/model.safetensors +0 -3
  9. checkpoint-31/optimizer.pt +0 -3
  10. checkpoint-31/rng_state.pth +0 -3
  11. checkpoint-31/scheduler.pt +0 -3
  12. checkpoint-31/special_tokens_map.json +0 -25
  13. checkpoint-31/tokenizer.json +0 -3
  14. checkpoint-31/tokenizer_config.json +0 -239
  15. checkpoint-31/trainer_state.json +0 -64
  16. checkpoint-31/training_args.bin +0 -3
  17. checkpoint-31/vocab.json +0 -0
  18. config.json +0 -31
  19. generation_config.json +0 -6
  20. merges.txt +0 -0
  21. model.safetensors +0 -3
  22. runs/May31_14-58-04_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696284.gnoto-rcp1.epfl.ch +0 -3
  23. runs/May31_15-01-49_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696510.gnoto-rcp1.epfl.ch +0 -3
  24. runs/May31_15-04-23_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696663.gnoto-rcp1.epfl.ch +0 -3
  25. runs/May31_15-04-58_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696698.gnoto-rcp1.epfl.ch +0 -3
  26. runs/May31_15-10-03_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697004.gnoto-rcp1.epfl.ch +0 -3
  27. runs/May31_15-11-12_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697072.gnoto-rcp1.epfl.ch +0 -3
  28. runs/May31_15-15-06_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697306.gnoto-rcp1.epfl.ch +0 -3
  29. runs/May31_15-15-39_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697339.gnoto-rcp1.epfl.ch +0 -3
  30. special_tokens_map.json +0 -25
  31. tokenizer.json +0 -3
  32. tokenizer_config.json +0 -239
  33. training_args.bin +0 -3
  34. vocab.json +0 -0
added_tokens.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "</think>": 151668,
3
- "</tool_call>": 151658,
4
- "</tool_response>": 151666,
5
- "<think>": 151667,
6
- "<tool_call>": 151657,
7
- "<tool_response>": 151665,
8
- "<|box_end|>": 151649,
9
- "<|box_start|>": 151648,
10
- "<|endoftext|>": 151643,
11
- "<|file_sep|>": 151664,
12
- "<|fim_middle|>": 151660,
13
- "<|fim_pad|>": 151662,
14
- "<|fim_prefix|>": 151659,
15
- "<|fim_suffix|>": 151661,
16
- "<|im_end|>": 151645,
17
- "<|im_start|>": 151644,
18
- "<|image_pad|>": 151655,
19
- "<|object_ref_end|>": 151647,
20
- "<|object_ref_start|>": 151646,
21
- "<|quad_end|>": 151651,
22
- "<|quad_start|>": 151650,
23
- "<|repo_name|>": 151663,
24
- "<|video_pad|>": 151656,
25
- "<|vision_end|>": 151653,
26
- "<|vision_pad|>": 151654,
27
- "<|vision_start|>": 151652
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chat_template.jinja DELETED
@@ -1,85 +0,0 @@
1
- {%- if tools %}
2
- {{- '<|im_start|>system\n' }}
3
- {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
5
- {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
- {%- for tool in tools %}
8
- {{- "\n" }}
9
- {{- tool | tojson }}
10
- {%- endfor %}
11
- {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
- {%- else %}
13
- {%- if messages[0].role == 'system' %}
14
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
- {%- endif %}
16
- {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
- {%- set ns.multi_step_tool = false %}
22
- {%- set ns.last_query_index = index %}
23
- {%- endif %}
24
- {%- endfor %}
25
- {%- for message in messages %}
26
- {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
- {%- elif message.role == "assistant" %}
29
- {%- set content = message.content %}
30
- {%- set reasoning_content = '' %}
31
- {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
- {%- set reasoning_content = message.reasoning_content %}
33
- {%- else %}
34
- {%- if '</think>' in message.content %}
35
- {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
- {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
- {%- endif %}
38
- {%- endif %}
39
- {%- if loop.index0 > ns.last_query_index %}
40
- {%- if loop.last or (not loop.last and reasoning_content) %}
41
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
- {%- else %}
43
- {{- '<|im_start|>' + message.role + '\n' + content }}
44
- {%- endif %}
45
- {%- else %}
46
- {{- '<|im_start|>' + message.role + '\n' + content }}
47
- {%- endif %}
48
- {%- if message.tool_calls %}
49
- {%- for tool_call in message.tool_calls %}
50
- {%- if (loop.first and content) or (not loop.first) %}
51
- {{- '\n' }}
52
- {%- endif %}
53
- {%- if tool_call.function %}
54
- {%- set tool_call = tool_call.function %}
55
- {%- endif %}
56
- {{- '<tool_call>\n{"name": "' }}
57
- {{- tool_call.name }}
58
- {{- '", "arguments": ' }}
59
- {%- if tool_call.arguments is string %}
60
- {{- tool_call.arguments }}
61
- {%- else %}
62
- {{- tool_call.arguments | tojson }}
63
- {%- endif %}
64
- {{- '}\n</tool_call>' }}
65
- {%- endfor %}
66
- {%- endif %}
67
- {{- '<|im_end|>\n' }}
68
- {%- elif message.role == "tool" %}
69
- {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
- {{- '<|im_start|>user' }}
71
- {%- endif %}
72
- {{- '\n<tool_response>\n' }}
73
- {{- message.content }}
74
- {{- '\n</tool_response>' }}
75
- {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
- {{- '<|im_end|>\n' }}
77
- {%- endif %}
78
- {%- endif %}
79
- {%- endfor %}
80
- {%- if add_generation_prompt %}
81
- {{- '<|im_start|>assistant\n' }}
82
- {%- if enable_thinking is defined and enable_thinking is false %}
83
- {{- '<think>\n\n</think>\n\n' }}
84
- {%- endif %}
85
- {%- endif %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-31/added_tokens.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "</think>": 151668,
3
- "</tool_call>": 151658,
4
- "</tool_response>": 151666,
5
- "<think>": 151667,
6
- "<tool_call>": 151657,
7
- "<tool_response>": 151665,
8
- "<|box_end|>": 151649,
9
- "<|box_start|>": 151648,
10
- "<|endoftext|>": 151643,
11
- "<|file_sep|>": 151664,
12
- "<|fim_middle|>": 151660,
13
- "<|fim_pad|>": 151662,
14
- "<|fim_prefix|>": 151659,
15
- "<|fim_suffix|>": 151661,
16
- "<|im_end|>": 151645,
17
- "<|im_start|>": 151644,
18
- "<|image_pad|>": 151655,
19
- "<|object_ref_end|>": 151647,
20
- "<|object_ref_start|>": 151646,
21
- "<|quad_end|>": 151651,
22
- "<|quad_start|>": 151650,
23
- "<|repo_name|>": 151663,
24
- "<|video_pad|>": 151656,
25
- "<|vision_end|>": 151653,
26
- "<|vision_pad|>": 151654,
27
- "<|vision_start|>": 151652
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-31/chat_template.jinja DELETED
@@ -1,85 +0,0 @@
1
- {%- if tools %}
2
- {{- '<|im_start|>system\n' }}
3
- {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
5
- {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
- {%- for tool in tools %}
8
- {{- "\n" }}
9
- {{- tool | tojson }}
10
- {%- endfor %}
11
- {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
- {%- else %}
13
- {%- if messages[0].role == 'system' %}
14
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
- {%- endif %}
16
- {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
- {%- set ns.multi_step_tool = false %}
22
- {%- set ns.last_query_index = index %}
23
- {%- endif %}
24
- {%- endfor %}
25
- {%- for message in messages %}
26
- {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
- {%- elif message.role == "assistant" %}
29
- {%- set content = message.content %}
30
- {%- set reasoning_content = '' %}
31
- {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
- {%- set reasoning_content = message.reasoning_content %}
33
- {%- else %}
34
- {%- if '</think>' in message.content %}
35
- {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
- {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
- {%- endif %}
38
- {%- endif %}
39
- {%- if loop.index0 > ns.last_query_index %}
40
- {%- if loop.last or (not loop.last and reasoning_content) %}
41
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
- {%- else %}
43
- {{- '<|im_start|>' + message.role + '\n' + content }}
44
- {%- endif %}
45
- {%- else %}
46
- {{- '<|im_start|>' + message.role + '\n' + content }}
47
- {%- endif %}
48
- {%- if message.tool_calls %}
49
- {%- for tool_call in message.tool_calls %}
50
- {%- if (loop.first and content) or (not loop.first) %}
51
- {{- '\n' }}
52
- {%- endif %}
53
- {%- if tool_call.function %}
54
- {%- set tool_call = tool_call.function %}
55
- {%- endif %}
56
- {{- '<tool_call>\n{"name": "' }}
57
- {{- tool_call.name }}
58
- {{- '", "arguments": ' }}
59
- {%- if tool_call.arguments is string %}
60
- {{- tool_call.arguments }}
61
- {%- else %}
62
- {{- tool_call.arguments | tojson }}
63
- {%- endif %}
64
- {{- '}\n</tool_call>' }}
65
- {%- endfor %}
66
- {%- endif %}
67
- {{- '<|im_end|>\n' }}
68
- {%- elif message.role == "tool" %}
69
- {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
- {{- '<|im_start|>user' }}
71
- {%- endif %}
72
- {{- '\n<tool_response>\n' }}
73
- {{- message.content }}
74
- {{- '\n</tool_response>' }}
75
- {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
- {{- '<|im_end|>\n' }}
77
- {%- endif %}
78
- {%- endif %}
79
- {%- endfor %}
80
- {%- if add_generation_prompt %}
81
- {{- '<|im_start|>assistant\n' }}
82
- {%- if enable_thinking is defined and enable_thinking is false %}
83
- {{- '<think>\n\n</think>\n\n' }}
84
- {%- endif %}
85
- {%- endif %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-31/config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "eos_token_id": 151643,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 1024,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "max_position_embeddings": 32768,
15
- "max_window_layers": 28,
16
- "model_type": "qwen3",
17
- "num_attention_heads": 16,
18
- "num_hidden_layers": 28,
19
- "num_key_value_heads": 8,
20
- "pretraining_tp": 1,
21
- "rms_norm_eps": 1e-06,
22
- "rope_scaling": null,
23
- "rope_theta": 1000000,
24
- "sliding_window": null,
25
- "tie_word_embeddings": true,
26
- "torch_dtype": "bfloat16",
27
- "transformers_version": "4.52.4",
28
- "use_cache": false,
29
- "use_sliding_window": false,
30
- "vocab_size": 151936
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-31/generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token_id": 151643,
3
- "eos_token_id": 151643,
4
- "max_new_tokens": 2048,
5
- "transformers_version": "4.52.4"
6
- }
 
 
 
 
 
 
 
checkpoint-31/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-31/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1a86ffeeb2da4511911a4c7c7e8eabb008cbc9d36bc5dd4ea61880957b38820
3
- size 1192135096
 
 
 
 
checkpoint-31/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:690d5f3bc9153ea7fc2d4ddd2b27c1194b4b15ab8351339024f0bad89882753b
3
- size 2384459962
 
 
 
 
checkpoint-31/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69
3
- size 14244
 
 
 
 
checkpoint-31/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:50878f449b5ab5955d8b0d723c4fa5bf560fc0e364a6f3230f046ef0b03f629c
3
- size 1064
 
 
 
 
checkpoint-31/special_tokens_map.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>"
16
- ],
17
- "eos_token": {
18
- "content": "<|endoftext|>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- "pad_token": "<|endoftext|>"
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-31/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
- size 11422654
 
 
 
 
checkpoint-31/tokenizer_config.json DELETED
@@ -1,239 +0,0 @@
1
- {
2
- "add_bos_token": false,
3
- "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "151643": {
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "151644": {
14
- "content": "<|im_start|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "151645": {
22
- "content": "<|im_end|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "151646": {
30
- "content": "<|object_ref_start|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "151647": {
38
- "content": "<|object_ref_end|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "151648": {
46
- "content": "<|box_start|>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "151649": {
54
- "content": "<|box_end|>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "151650": {
62
- "content": "<|quad_start|>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "151651": {
70
- "content": "<|quad_end|>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
- },
77
- "151652": {
78
- "content": "<|vision_start|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "151653": {
86
- "content": "<|vision_end|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "151654": {
94
- "content": "<|vision_pad|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "151655": {
102
- "content": "<|image_pad|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "151656": {
110
- "content": "<|video_pad|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "151657": {
118
- "content": "<tool_call>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "151658": {
126
- "content": "</tool_call>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "151659": {
134
- "content": "<|fim_prefix|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "151660": {
142
- "content": "<|fim_middle|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "151661": {
150
- "content": "<|fim_suffix|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "151662": {
158
- "content": "<|fim_pad|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "151663": {
166
- "content": "<|repo_name|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "151664": {
174
- "content": "<|file_sep|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "151665": {
182
- "content": "<tool_response>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "151666": {
190
- "content": "</tool_response>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "151667": {
198
- "content": "<think>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": false
204
- },
205
- "151668": {
206
- "content": "</think>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": false
212
- }
213
- },
214
- "additional_special_tokens": [
215
- "<|im_start|>",
216
- "<|im_end|>",
217
- "<|object_ref_start|>",
218
- "<|object_ref_end|>",
219
- "<|box_start|>",
220
- "<|box_end|>",
221
- "<|quad_start|>",
222
- "<|quad_end|>",
223
- "<|vision_start|>",
224
- "<|vision_end|>",
225
- "<|vision_pad|>",
226
- "<|image_pad|>",
227
- "<|video_pad|>"
228
- ],
229
- "bos_token": null,
230
- "clean_up_tokenization_spaces": false,
231
- "eos_token": "<|endoftext|>",
232
- "errors": "replace",
233
- "extra_special_tokens": {},
234
- "model_max_length": 131072,
235
- "pad_token": "<|endoftext|>",
236
- "split_special_tokens": false,
237
- "tokenizer_class": "Qwen2Tokenizer",
238
- "unk_token": null
239
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-31/trainer_state.json DELETED
@@ -1,64 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 0.001005139179352496,
6
- "eval_steps": 10000,
7
- "global_step": 31,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.00032423844495241803,
14
- "grad_norm": 4.0625,
15
- "learning_rate": 1.6691306063588583e-05,
16
- "loss": 1.4601,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.0006484768899048361,
21
- "grad_norm": 2.640625,
22
- "learning_rate": 6.909830056250527e-06,
23
- "loss": 0.7619,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.000972715334857254,
28
- "grad_norm": 2.890625,
29
- "learning_rate": 2.1852399266194312e-07,
30
- "loss": 0.6335,
31
- "step": 30
32
- }
33
- ],
34
- "logging_steps": 10,
35
- "max_steps": 31,
36
- "num_input_tokens_seen": 0,
37
- "num_train_epochs": 1,
38
- "save_steps": 10000,
39
- "stateful_callbacks": {
40
- "EarlyStoppingCallback": {
41
- "args": {
42
- "early_stopping_patience": 3,
43
- "early_stopping_threshold": 0.0
44
- },
45
- "attributes": {
46
- "early_stopping_patience_counter": 0
47
- }
48
- },
49
- "TrainerControl": {
50
- "args": {
51
- "should_epoch_stop": false,
52
- "should_evaluate": false,
53
- "should_log": false,
54
- "should_save": true,
55
- "should_training_stop": true
56
- },
57
- "attributes": {}
58
- }
59
- },
60
- "total_flos": 671145548316672.0,
61
- "train_batch_size": 8,
62
- "trial_name": null,
63
- "trial_params": null
64
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-31/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a98198acb8e0de8a01c9686880a9c86a1f64d8ba522c7e488bcfdce34cb933d3
3
- size 5368
 
 
 
 
checkpoint-31/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
config.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "eos_token_id": 151643,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 1024,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 3072,
14
- "max_position_embeddings": 32768,
15
- "max_window_layers": 28,
16
- "model_type": "qwen3",
17
- "num_attention_heads": 16,
18
- "num_hidden_layers": 28,
19
- "num_key_value_heads": 8,
20
- "pretraining_tp": 4,
21
- "rms_norm_eps": 1e-06,
22
- "rope_scaling": null,
23
- "rope_theta": 1000000,
24
- "sliding_window": null,
25
- "tie_word_embeddings": true,
26
- "torch_dtype": "bfloat16",
27
- "transformers_version": "4.52.4",
28
- "use_cache": false,
29
- "use_sliding_window": false,
30
- "vocab_size": 151936
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token_id": 151643,
3
- "eos_token_id": 151643,
4
- "max_new_tokens": 2048,
5
- "transformers_version": "4.52.4"
6
- }
 
 
 
 
 
 
 
merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e3e4e23f11cf7f0c7fbdfb74169b3d6f9301196543a8a310eae974c4ae4f746
3
- size 1192135096
 
 
 
 
runs/May31_14-58-04_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696284.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e7df08808ab6a439f3c35aef486fb78e6102297b5f88ae26bfc880483a31b8b
3
- size 5275
 
 
 
 
runs/May31_15-01-49_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696510.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:71ac4cca0b48ba5df0480d4fece54a0bff25a3fffb966725c5cdbea4047ec8be
3
- size 5274
 
 
 
 
runs/May31_15-04-23_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696663.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8aa26b26521c383e26917613f3080d6adcfe75d7f4204cc97760f2ea0b57fc7
3
- size 5071
 
 
 
 
runs/May31_15-04-58_gnoto-rcp1.epfl.ch/events.out.tfevents.1748696698.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6799a9e07decfa509d9afb27f19b0439ea5fc99738b1dd3a5ec4aacace089cec
3
- size 5278
 
 
 
 
runs/May31_15-10-03_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697004.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:930110b321e111c44c55488e87188cf91ef3f560097879bd8f29a05b36d790b1
3
- size 5071
 
 
 
 
runs/May31_15-11-12_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697072.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebe3a71080e1f35cf394ad8907dfcc0010d23e81ddabe6cfa90c2958b5d44c28
3
- size 5070
 
 
 
 
runs/May31_15-15-06_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697306.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d17619f715d72ab69517a766262f536eb2605529c77ecb1cde42a892fb521ea9
3
- size 5278
 
 
 
 
runs/May31_15-15-39_gnoto-rcp1.epfl.ch/events.out.tfevents.1748697339.gnoto-rcp1.epfl.ch DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f03f6059d82c3be5775bc170511832d54cd49eb421c0d9c1afa2c007c70687ef
3
- size 6044
 
 
 
 
special_tokens_map.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>"
16
- ],
17
- "eos_token": {
18
- "content": "<|endoftext|>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- "pad_token": "<|endoftext|>"
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
- size 11422654
 
 
 
 
tokenizer_config.json DELETED
@@ -1,239 +0,0 @@
1
- {
2
- "add_bos_token": false,
3
- "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "151643": {
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "151644": {
14
- "content": "<|im_start|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "151645": {
22
- "content": "<|im_end|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "151646": {
30
- "content": "<|object_ref_start|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "151647": {
38
- "content": "<|object_ref_end|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "151648": {
46
- "content": "<|box_start|>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "151649": {
54
- "content": "<|box_end|>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "151650": {
62
- "content": "<|quad_start|>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "151651": {
70
- "content": "<|quad_end|>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
- },
77
- "151652": {
78
- "content": "<|vision_start|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "151653": {
86
- "content": "<|vision_end|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "151654": {
94
- "content": "<|vision_pad|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "151655": {
102
- "content": "<|image_pad|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "151656": {
110
- "content": "<|video_pad|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "151657": {
118
- "content": "<tool_call>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "151658": {
126
- "content": "</tool_call>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "151659": {
134
- "content": "<|fim_prefix|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "151660": {
142
- "content": "<|fim_middle|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "151661": {
150
- "content": "<|fim_suffix|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "151662": {
158
- "content": "<|fim_pad|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "151663": {
166
- "content": "<|repo_name|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "151664": {
174
- "content": "<|file_sep|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "151665": {
182
- "content": "<tool_response>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "151666": {
190
- "content": "</tool_response>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "151667": {
198
- "content": "<think>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": false
204
- },
205
- "151668": {
206
- "content": "</think>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": false
212
- }
213
- },
214
- "additional_special_tokens": [
215
- "<|im_start|>",
216
- "<|im_end|>",
217
- "<|object_ref_start|>",
218
- "<|object_ref_end|>",
219
- "<|box_start|>",
220
- "<|box_end|>",
221
- "<|quad_start|>",
222
- "<|quad_end|>",
223
- "<|vision_start|>",
224
- "<|vision_end|>",
225
- "<|vision_pad|>",
226
- "<|image_pad|>",
227
- "<|video_pad|>"
228
- ],
229
- "bos_token": null,
230
- "clean_up_tokenization_spaces": false,
231
- "eos_token": "<|endoftext|>",
232
- "errors": "replace",
233
- "extra_special_tokens": {},
234
- "model_max_length": 131072,
235
- "pad_token": "<|endoftext|>",
236
- "split_special_tokens": false,
237
- "tokenizer_class": "Qwen2Tokenizer",
238
- "unk_token": null
239
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1283e35f25386746ef57567d4f80fd84cfbf48e0584d99c58122adcdfec1a13
3
- size 5368
 
 
 
 
vocab.json DELETED
The diff for this file is too large to render. See raw diff