takayosh commited on
Commit
f015ee7
·
verified ·
1 Parent(s): 2612134

Upload merged Qwen3-4B-Instruct-2507 model (auto-generated README)

Browse files
README.md CHANGED
@@ -5,7 +5,7 @@ datasets:
5
  language:
6
  - en
7
  license: apache-2.0
8
- library_name: transform
9
  pipeline_tag: text-generation
10
  tags:
11
  - lora
@@ -15,23 +15,19 @@ tags:
15
  - dbbench
16
  ---
17
 
18
- # qwen3-4b-agent-distilled-lora-v1
19
 
20
  This repository provides a **LoRA adapter** fine-tuned from
21
  **Qwen/Qwen3-4B-Instruct-2507** using **LoRA + Unsloth**.
22
 
23
- This repository contains merged full model weights.
24
- The base model is NOT required separately.
25
 
26
  ## Training Objective
27
 
28
- This adapter is trained to improve multi-turn agent task performance
29
  on ALFWorld (household tasks) and DBBench (database operations).
30
 
31
- Additional teacher-distilled trajectories with chain-of-thought and
32
- self-reflection are included to enhance long-horizon reasoning and
33
- error recovery capability.
34
-
35
  Loss is applied to **all assistant turns** in the multi-turn trajectory,
36
  enabling the model to learn environment observation, action selection,
37
  tool use, and recovery from errors.
@@ -42,7 +38,7 @@ tool use, and recovery from errors.
42
  - Method: LoRA (full precision base)
43
  - Max sequence length: 2048
44
  - Epochs: 2
45
- - Learning rate: 2e-06
46
  - LoRA: r=64, alpha=128
47
 
48
  ## Usage
 
5
  language:
6
  - en
7
  license: apache-2.0
8
+ library_name: peft
9
  pipeline_tag: text-generation
10
  tags:
11
  - lora
 
15
  - dbbench
16
  ---
17
 
18
+ # <【課題】ここは自分で記入して下さい>
19
 
20
  This repository provides a **LoRA adapter** fine-tuned from
21
  **Qwen/Qwen3-4B-Instruct-2507** using **LoRA + Unsloth**.
22
 
23
+ This repository contains **LoRA adapter weights only**.
24
+ The base model must be loaded separately.
25
 
26
  ## Training Objective
27
 
28
+ This adapter is trained to improve **multi-turn agent task performance**
29
  on ALFWorld (household tasks) and DBBench (database operations).
30
 
 
 
 
 
31
  Loss is applied to **all assistant turns** in the multi-turn trajectory,
32
  enabling the model to learn environment observation, action selection,
33
  tool use, and recovery from errors.
 
38
  - Method: LoRA (full precision base)
39
  - Max sequence length: 2048
40
  - Epochs: 2
41
+ - Learning rate: 3e-05
42
  - LoRA: r=64, alpha=128
43
 
44
  ## Usage
chat_template.jinja CHANGED
@@ -14,14 +14,6 @@
14
  {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
  {%- endif %}
16
  {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
- {%- set ns.multi_step_tool = false %}
22
- {%- set ns.last_query_index = index %}
23
- {%- endif %}
24
- {%- endfor %}
25
  {%- for message in messages %}
26
  {%- if message.content is string %}
27
  {%- set content = message.content %}
@@ -31,24 +23,7 @@
31
  {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
32
  {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
33
  {%- elif message.role == "assistant" %}
34
- {%- set reasoning_content = '' %}
35
- {%- if message.reasoning_content is string %}
36
- {%- set reasoning_content = message.reasoning_content %}
37
- {%- else %}
38
- {%- if '</think>' in content %}
39
- {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
40
- {%- set content = content.split('</think>')[-1].lstrip('\n') %}
41
- {%- endif %}
42
- {%- endif %}
43
- {%- if loop.index0 > ns.last_query_index %}
44
- {%- if loop.last or (not loop.last and reasoning_content) %}
45
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
46
- {%- else %}
47
- {{- '<|im_start|>' + message.role + '\n' + content }}
48
- {%- endif %}
49
- {%- else %}
50
- {{- '<|im_start|>' + message.role + '\n' + content }}
51
- {%- endif %}
52
  {%- if message.tool_calls %}
53
  {%- for tool_call in message.tool_calls %}
54
  {%- if (loop.first and content) or (not loop.first) %}
 
14
  {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
  {%- endif %}
16
  {%- endif %}
 
 
 
 
 
 
 
 
17
  {%- for message in messages %}
18
  {%- if message.content is string %}
19
  {%- set content = message.content %}
 
23
  {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
24
  {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
25
  {%- elif message.role == "assistant" %}
26
+ {{- '<|im_start|>' + message.role + '\n' + content }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  {%- if message.tool_calls %}
28
  {%- for tool_call in message.tool_calls %}
29
  {%- if (loop.first and content) or (not loop.first) %}
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a3382308a863559a8db8bf34228e1dfb5ca2838b86efa74239e505a411e8e5f
3
  size 4967215360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bee29e292cd94ff0c3f7fa22480a95a969ccfd742bda7a09a3b5eb1de15ad43
3
  size 4967215360
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd0189eb77daea083e3f8c9f47001ef7a2a5e24f203467b16b63372d55a8e6d0
3
  size 3077766632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b878d1fbafea40cb2f87334e672f631601c6641e37c5e6e3e27b2102608b4ad
3
  size 3077766632
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "single_word": false
23
  },
24
  "pad_token": {
25
- "content": "<|vision_pad|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
22
  "single_word": false
23
  },
24
  "pad_token": {
25
+ "content": "<|endoftext|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -232,8 +232,8 @@
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
  "model_max_length": 262144,
235
- "pad_token": "<|vision_pad|>",
236
- "padding_side": "left",
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",
239
  "unk_token": null
 
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
  "model_max_length": 262144,
235
+ "pad_token": "<|endoftext|>",
236
+ "padding_side": "right",
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",
239
  "unk_token": null