AF0815 commited on
Commit
46a04e4
·
verified ·
1 Parent(s): abaeea4

Upload merged Qwen3-4B-Instruct-2507 model (auto-generated README)

Browse files
README.md CHANGED
@@ -45,7 +45,7 @@ Loss is applied to **all assistant turns** in the trajectory, enabling the model
45
 
46
  - DBBench dataset: `u-10bei/dbbench_sft_dataset_react_v4`
47
  - ALFWorld dataset: `u-10bei/sft_alfworld_trajectory_dataset_v5`
48
- - Mixing ratio (pre-merge target): **DB:ALF = 3:1**
49
 
50
  ### DB Oversampling (category-aware)
51
  Enabled: **False**
@@ -64,9 +64,9 @@ DB category weights used during training-data preparation:
64
 
65
  - Base model: Qwen/Qwen3-4B-Instruct-2507
66
  - Method: LoRA (full precision base)
67
- - Max sequence length: 4096
68
  - Epochs: 1
69
- - Learning rate: 3e-05
70
  - LoRA: r=32, alpha=64, dropout=0.05
71
  - Per-device train batch size: 2
72
  - Gradient accumulation: 4
 
45
 
46
  - DBBench dataset: `u-10bei/dbbench_sft_dataset_react_v4`
47
  - ALFWorld dataset: `u-10bei/sft_alfworld_trajectory_dataset_v5`
48
+ - Mixing ratio (pre-merge target): **DB:ALF = 2:1**
49
 
50
  ### DB Oversampling (category-aware)
51
  Enabled: **False**
 
64
 
65
  - Base model: Qwen/Qwen3-4B-Instruct-2507
66
  - Method: LoRA (full precision base)
67
+ - Max sequence length: 2048
68
  - Epochs: 1
69
+ - Learning rate: 2e-06
70
  - LoRA: r=32, alpha=64, dropout=0.05
71
  - Per-device train batch size: 2
72
  - Gradient accumulation: 4
chat_template.jinja CHANGED
@@ -1,86 +1,15 @@
1
- {%- if tools %}
2
- {{- '<|im_start|>system\n' }}
3
- {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
5
- {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
- {%- for tool in tools %}
8
- {{- "\n" }}
9
- {{- tool | tojson }}
10
- {%- endfor %}
11
- {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
- {%- else %}
13
- {%- if messages[0].role == 'system' %}
14
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
- {%- endif %}
16
- {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
- {%- set ns.multi_step_tool = false %}
22
- {%- set ns.last_query_index = index %}
23
- {%- endif %}
24
- {%- endfor %}
25
  {%- for message in messages %}
26
  {%- if message.content is string %}
27
  {%- set content = message.content %}
28
  {%- else %}
29
  {%- set content = '' %}
30
  {%- endif %}
31
- {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
- {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
- {%- elif message.role == "assistant" %}
34
- {%- set reasoning_content = '' %}
35
- {%- if message.reasoning_content is string %}
36
- {%- set reasoning_content = message.reasoning_content %}
37
- {%- else %}
38
- {%- if '</think>' in content %}
39
- {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
- {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
- {%- endif %}
42
- {%- endif %}
43
- {%- if loop.index0 > ns.last_query_index %}
44
- {%- if loop.last or (not loop.last and reasoning_content) %}
45
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
- {%- else %}
47
- {{- '<|im_start|>' + message.role + '\n' + content }}
48
- {%- endif %}
49
- {%- else %}
50
- {{- '<|im_start|>' + message.role + '\n' + content }}
51
- {%- endif %}
52
- {%- if message.tool_calls %}
53
- {%- for tool_call in message.tool_calls %}
54
- {%- if (loop.first and content) or (not loop.first) %}
55
- {{- '\n' }}
56
- {%- endif %}
57
- {%- if tool_call.function %}
58
- {%- set tool_call = tool_call.function %}
59
- {%- endif %}
60
- {{- '<tool_call>\n{"name": "' }}
61
- {{- tool_call.name }}
62
- {{- '", "arguments": ' }}
63
- {%- if tool_call.arguments is string %}
64
- {{- tool_call.arguments }}
65
- {%- else %}
66
- {{- tool_call.arguments | tojson }}
67
- {%- endif %}
68
- {{- '}\n</tool_call>' }}
69
- {%- endfor %}
70
- {%- endif %}
71
- {{- '<|im_end|>\n' }}
72
- {%- elif message.role == "tool" %}
73
- {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
74
- {{- '<|im_start|>user' }}
75
- {%- endif %}
76
- {{- '\n<tool_response>\n' }}
77
- {{- content }}
78
- {{- '\n</tool_response>' }}
79
- {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
80
- {{- '<|im_end|>\n' }}
81
- {%- endif %}
82
- {%- endif %}
83
  {%- endfor %}
84
  {%- if add_generation_prompt %}
85
- {{- '<|im_start|>assistant\n' }}
86
- {%- endif %}
 
 
1
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  {%- for message in messages %}
3
  {%- if message.content is string %}
4
  {%- set content = message.content %}
5
  {%- else %}
6
  {%- set content = '' %}
7
  {%- endif %}
8
+ {{- '<|im_start|>' + message.role + '
9
+ ' + content + '<|im_end|>
10
+ ' }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {%- endfor %}
12
  {%- if add_generation_prompt %}
13
+ {{- '<|im_start|>assistant
14
+ ' }}
15
+ {%- endif %}
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd754441f37fcaeabebf5f7d200d69d88198dafbf7ade0c78b6e1aafd3164871
3
  size 4967215360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c18c270488cd6d47925d6ac190d8bf33979b68c332b4c66a430372ac96d052ff
3
  size 4967215360
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49fb62246505ed17e61b85f1dc5de3f3e325b94da7aca2a1db3d2622a29fc9e1
3
  size 3077766632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7c341236300634374d73ae4ddb293256413322c45c1f1e54052dc037005972
3
  size 3077766632