Upload merged Qwen3-4B-Instruct-2507 model (auto-generated README)

Browse files

Files changed (6) hide show

README.md +6 -10
chat_template.jinja +1 -26
model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
special_tokens_map.json +1 -1
tokenizer_config.json +2 -2

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ datasets:
 language:
 - en
 license: apache-2.0
-library_name: transform
 pipeline_tag: text-generation
 tags:
 - lora
@@ -15,23 +15,19 @@ tags:
 - dbbench
 ---
-# qwen3-4b-agent-distilled-lora-v1
 This repository provides a **LoRA adapter** fine-tuned from
 **Qwen/Qwen3-4B-Instruct-2507** using **LoRA + Unsloth**.
-This repository contains merged full model weights.
-The base model is NOT required separately.
 ## Training Objective
-This adapter is trained to improve multi-turn agent task performance
 on ALFWorld (household tasks) and DBBench (database operations).
-Additional teacher-distilled trajectories with chain-of-thought and
-self-reflection are included to enhance long-horizon reasoning and
-error recovery capability.
 Loss is applied to **all assistant turns** in the multi-turn trajectory,
 enabling the model to learn environment observation, action selection,
 tool use, and recovery from errors.
@@ -42,7 +38,7 @@ tool use, and recovery from errors.
 - Method: LoRA (full precision base)
 - Max sequence length: 2048
 - Epochs: 2
-- Learning rate: 2e-06
 - LoRA: r=64, alpha=128
 ## Usage

 language:
 - en
 license: apache-2.0
+library_name: peft
 pipeline_tag: text-generation
 tags:
 - lora
 - dbbench
 ---
+# ＜【課題】ここは自分で記入して下さい＞
 This repository provides a **LoRA adapter** fine-tuned from
 **Qwen/Qwen3-4B-Instruct-2507** using **LoRA + Unsloth**.
+This repository contains **LoRA adapter weights only**.
+The base model must be loaded separately.
 ## Training Objective
+This adapter is trained to improve **multi-turn agent task performance**
 on ALFWorld (household tasks) and DBBench (database operations).
 Loss is applied to **all assistant turns** in the multi-turn trajectory,
 enabling the model to learn environment observation, action selection,
 tool use, and recovery from errors.
 - Method: LoRA (full precision base)
 - Max sequence length: 2048
 - Epochs: 2
+- Learning rate: 3e-05
 - LoRA: r=64, alpha=128
 ## Usage

chat_template.jinja CHANGED Viewed

@@ -14,14 +14,6 @@
         {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
     {%- endif %}
 {%- endif %}
-{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
-{%- for message in messages[::-1] %}
-    {%- set index = (messages|length - 1) - loop.index0 %}
-    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
-        {%- set ns.multi_step_tool = false %}
-        {%- set ns.last_query_index = index %}
-    {%- endif %}
-{%- endfor %}
 {%- for message in messages %}
     {%- if message.content is string %}
         {%- set content = message.content %}
@@ -31,24 +23,7 @@
     {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
         {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
     {%- elif message.role == "assistant" %}
-        {%- set reasoning_content = '' %}
-        {%- if message.reasoning_content is string %}
-            {%- set reasoning_content = message.reasoning_content %}
-        {%- else %}
-            {%- if '</think>' in content %}
-                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
-                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
-            {%- endif %}
-        {%- endif %}
-        {%- if loop.index0 > ns.last_query_index %}
-            {%- if loop.last or (not loop.last and reasoning_content) %}
-                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
-            {%- else %}
-                {{- '<|im_start|>' + message.role + '\n' + content }}
-            {%- endif %}
-        {%- else %}
-            {{- '<|im_start|>' + message.role + '\n' + content }}
-        {%- endif %}
         {%- if message.tool_calls %}
             {%- for tool_call in message.tool_calls %}
                 {%- if (loop.first and content) or (not loop.first) %}

         {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
     {%- endif %}
 {%- endif %}
 {%- for message in messages %}
     {%- if message.content is string %}
         {%- set content = message.content %}
     {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
         {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
     {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role + '\n' + content }}
         {%- if message.tool_calls %}
             {%- for tool_call in message.tool_calls %}
                 {%- if (loop.first and content) or (not loop.first) %}

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a3382308a863559a8db8bf34228e1dfb5ca2838b86efa74239e505a411e8e5f
 size 4967215360

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bee29e292cd94ff0c3f7fa22480a95a969ccfd742bda7a09a3b5eb1de15ad43
 size 4967215360

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd0189eb77daea083e3f8c9f47001ef7a2a5e24f203467b16b63372d55a8e6d0
 size 3077766632

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b878d1fbafea40cb2f87334e672f631601c6641e37c5e6e3e27b2102608b4ad
 size 3077766632

special_tokens_map.json CHANGED Viewed

@@ -22,7 +22,7 @@
     "single_word": false
   },
   "pad_token": {
-    "content": "<|vision_pad|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

     "single_word": false
   },
   "pad_token": {
+    "content": "<|endoftext|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer_config.json CHANGED Viewed

@@ -232,8 +232,8 @@
   "errors": "replace",
   "extra_special_tokens": {},
   "model_max_length": 262144,
-  "pad_token": "<|vision_pad|>",
-  "padding_side": "left",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null

   "errors": "replace",
   "extra_special_tokens": {},
   "model_max_length": 262144,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
   "split_special_tokens": false,
   "tokenizer_class": "Qwen2Tokenizer",
   "unk_token": null