Luxel commited on Oct 29, 2025

Commit

b668195

verified ·

1 Parent(s): bf448b7

Upload run qwen3_8b_3 on 2025-10-29T00:58:32.337888Z

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +8 -0
qwen3_8b_3/checkpoint-10/README.md +209 -0
qwen3_8b_3/checkpoint-10/adapter_config.json +42 -0
qwen3_8b_3/checkpoint-10/adapter_model.safetensors +3 -0
qwen3_8b_3/checkpoint-10/added_tokens.json +28 -0
qwen3_8b_3/checkpoint-10/chat_template.jinja +89 -0
qwen3_8b_3/checkpoint-10/merges.txt +0 -0
qwen3_8b_3/checkpoint-10/optimizer.pt +3 -0
qwen3_8b_3/checkpoint-10/rng_state_0.pth +3 -0
qwen3_8b_3/checkpoint-10/rng_state_1.pth +3 -0
qwen3_8b_3/checkpoint-10/rng_state_2.pth +3 -0
qwen3_8b_3/checkpoint-10/rng_state_3.pth +3 -0
qwen3_8b_3/checkpoint-10/scheduler.pt +3 -0
qwen3_8b_3/checkpoint-10/special_tokens_map.json +31 -0
qwen3_8b_3/checkpoint-10/tokenizer.json +3 -0
qwen3_8b_3/checkpoint-10/tokenizer_config.json +239 -0
qwen3_8b_3/checkpoint-10/trainer_state.json +216 -0
qwen3_8b_3/checkpoint-10/training_args.bin +3 -0
qwen3_8b_3/checkpoint-10/vocab.json +0 -0
qwen3_8b_3/checkpoint-15/README.md +209 -0
qwen3_8b_3/checkpoint-15/adapter_config.json +42 -0
qwen3_8b_3/checkpoint-15/adapter_model.safetensors +3 -0
qwen3_8b_3/checkpoint-15/added_tokens.json +28 -0
qwen3_8b_3/checkpoint-15/chat_template.jinja +89 -0
qwen3_8b_3/checkpoint-15/merges.txt +0 -0
qwen3_8b_3/checkpoint-15/optimizer.pt +3 -0
qwen3_8b_3/checkpoint-15/rng_state_0.pth +3 -0
qwen3_8b_3/checkpoint-15/rng_state_1.pth +3 -0
qwen3_8b_3/checkpoint-15/rng_state_2.pth +3 -0
qwen3_8b_3/checkpoint-15/rng_state_3.pth +3 -0
qwen3_8b_3/checkpoint-15/scheduler.pt +3 -0
qwen3_8b_3/checkpoint-15/special_tokens_map.json +31 -0
qwen3_8b_3/checkpoint-15/tokenizer.json +3 -0
qwen3_8b_3/checkpoint-15/tokenizer_config.json +239 -0
qwen3_8b_3/checkpoint-15/trainer_state.json +307 -0
qwen3_8b_3/checkpoint-15/training_args.bin +3 -0
qwen3_8b_3/checkpoint-15/vocab.json +0 -0
qwen3_8b_3/checkpoint-20/README.md +209 -0
qwen3_8b_3/checkpoint-20/adapter_config.json +42 -0
qwen3_8b_3/checkpoint-20/adapter_model.safetensors +3 -0
qwen3_8b_3/checkpoint-20/added_tokens.json +28 -0
qwen3_8b_3/checkpoint-20/chat_template.jinja +89 -0
qwen3_8b_3/checkpoint-20/merges.txt +0 -0
qwen3_8b_3/checkpoint-20/optimizer.pt +3 -0
qwen3_8b_3/checkpoint-20/rng_state_0.pth +3 -0
qwen3_8b_3/checkpoint-20/rng_state_1.pth +3 -0
qwen3_8b_3/checkpoint-20/rng_state_2.pth +3 -0
qwen3_8b_3/checkpoint-20/rng_state_3.pth +3 -0
qwen3_8b_3/checkpoint-20/scheduler.pt +3 -0
qwen3_8b_3/checkpoint-20/special_tokens_map.json +31 -0

.gitattributes CHANGED Viewed

@@ -47,3 +47,11 @@ runs/gpt_oss_20b_dpo_run_2/dpo_model/tokenizer.json filter=lfs diff=lfs merge=lf
 runs/gpt_oss_20b_dpo_run_v0/checkpoint-32/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 runs/gpt_oss_20b_dpo_run_v0/checkpoint-64/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 runs/gpt_oss_20b_dpo_run_v0/checkpoint-96/tokenizer.json filter=lfs diff=lfs merge=lfs -text

 runs/gpt_oss_20b_dpo_run_v0/checkpoint-32/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 runs/gpt_oss_20b_dpo_run_v0/checkpoint-64/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 runs/gpt_oss_20b_dpo_run_v0/checkpoint-96/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/checkpoint-10/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/checkpoint-15/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/checkpoint-25/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/checkpoint-30/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/checkpoint-32/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/checkpoint-5/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+qwen3_8b_3/dpo_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text

qwen3_8b_3/checkpoint-10/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: Qwen/Qwen3-8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3-8B
+- dpo
+- lora
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

qwen3_8b_3/checkpoint-10/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-8B",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "k_proj",
+    "o_proj",
+    "gate_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

qwen3_8b_3/checkpoint-10/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71e6b8f623383e1a0ca88cfc01a6210fd4a4b539cfb365c3474f2290ff23fbff
+size 349243752

qwen3_8b_3/checkpoint-10/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

qwen3_8b_3/checkpoint-10/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

qwen3_8b_3/checkpoint-10/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3_8b_3/checkpoint-10/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6db4933d470f1c69e677f48b24f15944e14405c9e54cde9339092df33f0e404
+size 698784715

qwen3_8b_3/checkpoint-10/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c213a373f3e5d95993ad095a3790a902d821a1b4b93a10cc7d382c8726fcb9d
+size 15429

qwen3_8b_3/checkpoint-10/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8fb125336725f7741cb4daa1e3d06e225bbacfde8d41c4dcabb6762c222e62c6
+size 15429

qwen3_8b_3/checkpoint-10/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:153c865f77c7129ba565bded50f334683d51c80f20e3cfec39e62f8737b86f0d
+size 15429

qwen3_8b_3/checkpoint-10/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d017ce00fcebac7edc058ddd138f194eb0340f2d8ad0879bdab08f922ed0846e
+size 15429

qwen3_8b_3/checkpoint-10/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12199ed90a64482e128bc60decd219d210040d3f23b89c739d2eabe3d361e563
+size 1465

qwen3_8b_3/checkpoint-10/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen3_8b_3/checkpoint-10/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

qwen3_8b_3/checkpoint-10/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

qwen3_8b_3/checkpoint-10/trainer_state.json ADDED Viewed

	@@ -0,0 +1,216 @@

+{
+  "best_global_step": 10,
+  "best_metric": 1.3486661911010742,
+  "best_model_checkpoint": "qwen/second_stage/runs/qwen3_8b_3/checkpoint-10",
+  "epoch": 0.32,
+  "eval_steps": 5,
+  "global_step": 10,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.032,
+      "grad_norm": 14.114611625671387,
+      "learning_rate": 0.0,
+      "logits/chosen": -0.454238623380661,
+      "logits/rejected": 0.5041788220405579,
+      "logps/chosen": -0.8831520676612854,
+      "logps/rejected": -1.6886062622070312,
+      "loss": 25.263,
+      "rewards/accuracies": 0.5,
+      "rewards/chosen": 0.021818259730935097,
+      "rewards/margins": -0.0016628885641694069,
+      "rewards/rejected": 0.023481149226427078,
+      "step": 1
+    },
+    {
+      "epoch": 0.064,
+      "grad_norm": 13.774591445922852,
+      "learning_rate": 0.001,
+      "logits/chosen": -0.4356040060520172,
+      "logits/rejected": 0.6535270810127258,
+      "logps/chosen": -0.7930545210838318,
+      "logps/rejected": -1.6992627382278442,
+      "loss": 24.9372,
+      "rewards/accuracies": 0.515625,
+      "rewards/chosen": 0.024832097813487053,
+      "rewards/margins": 0.001346035161986947,
+      "rewards/rejected": 0.02348605915904045,
+      "step": 2
+    },
+    {
+      "epoch": 0.096,
+      "grad_norm": 18.699243545532227,
+      "learning_rate": 0.0009974346616959476,
+      "logits/chosen": -1.930757999420166,
+      "logits/rejected": -1.5471129417419434,
+      "logps/chosen": -0.9101159572601318,
+      "logps/rejected": -4.2864508628845215,
+      "loss": 7.4745,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": 0.006065011955797672,
+      "rewards/margins": 0.24669529497623444,
+      "rewards/rejected": -0.24063026905059814,
+      "step": 3
+    },
+    {
+      "epoch": 0.128,
+      "grad_norm": 119.65816497802734,
+      "learning_rate": 0.0009897649706262473,
+      "logits/chosen": -5.221794128417969,
+      "logits/rejected": -6.154142379760742,
+      "logps/chosen": -2.3687195777893066,
+      "logps/rejected": -11.728078842163086,
+      "loss": 23.0527,
+      "rewards/accuracies": 0.96875,
+      "rewards/chosen": -0.13072913885116577,
+      "rewards/margins": 0.8546823263168335,
+      "rewards/rejected": -0.9854114651679993,
+      "step": 4
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 39.12310028076172,
+      "learning_rate": 0.0009770696282000244,
+      "logits/chosen": -6.345037460327148,
+      "logits/rejected": -5.794806957244873,
+      "logps/chosen": -3.950467109680176,
+      "logps/rejected": -7.177702903747559,
+      "loss": 8.7468,
+      "rewards/accuracies": 0.984375,
+      "rewards/chosen": -0.2761165499687195,
+      "rewards/margins": 0.24939922988414764,
+      "rewards/rejected": -0.5255157351493835,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "eval_logits/chosen": -7.03423547744751,
+      "eval_logits/rejected": -5.733846187591553,
+      "eval_logps/chosen": -2.9800822734832764,
+      "eval_logps/rejected": -8.443767547607422,
+      "eval_loss": 5.021822929382324,
+      "eval_rewards/accuracies": 0.9791666865348816,
+      "eval_rewards/chosen": -0.16144704818725586,
+      "eval_rewards/margins": 0.4812288284301758,
+      "eval_rewards/rejected": -0.6426758766174316,
+      "eval_runtime": 6.9276,
+      "eval_samples_per_second": 5.918,
+      "eval_steps_per_second": 0.866,
+      "step": 5
+    },
+    {
+      "epoch": 0.192,
+      "grad_norm": 19.448589324951172,
+      "learning_rate": 0.0009594789058101153,
+      "logits/chosen": -7.081980228424072,
+      "logits/rejected": -6.156380653381348,
+      "logps/chosen": -2.4708809852600098,
+      "logps/rejected": -8.690655708312988,
+      "loss": 4.9869,
+      "rewards/accuracies": 0.984375,
+      "rewards/chosen": -0.13445983827114105,
+      "rewards/margins": 0.538560688495636,
+      "rewards/rejected": -0.6730204820632935,
+      "step": 6
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 15.258103370666504,
+      "learning_rate": 0.0009371733080722911,
+      "logits/chosen": -7.036907196044922,
+      "logits/rejected": -6.5072021484375,
+      "logps/chosen": -2.307884693145752,
+      "logps/rejected": -6.90288782119751,
+      "loss": 3.4893,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.12273220717906952,
+      "rewards/margins": 0.37964367866516113,
+      "rewards/rejected": -0.5023759007453918,
+      "step": 7
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 11.810518264770508,
+      "learning_rate": 0.0009103817206036382,
+      "logits/chosen": -6.840868949890137,
+      "logits/rejected": -5.986395835876465,
+      "logps/chosen": -2.2396152019500732,
+      "logps/rejected": -8.245683670043945,
+      "loss": 2.186,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.11302613466978073,
+      "rewards/margins": 0.5280519127845764,
+      "rewards/rejected": -0.6410781145095825,
+      "step": 8
+    },
+    {
+      "epoch": 0.288,
+      "grad_norm": 7.8574113845825195,
+      "learning_rate": 0.0008793790613463954,
+      "logits/chosen": -7.188082695007324,
+      "logits/rejected": -5.950884819030762,
+      "logps/chosen": -2.0123586654663086,
+      "logps/rejected": -7.236145496368408,
+      "loss": 1.4581,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.09425050020217896,
+      "rewards/margins": 0.43530386686325073,
+      "rewards/rejected": -0.5295543670654297,
+      "step": 9
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 28.801916122436523,
+      "learning_rate": 0.0008444834595378434,
+      "logits/chosen": -6.633468151092529,
+      "logits/rejected": -5.895615577697754,
+      "logps/chosen": -2.0892837047576904,
+      "logps/rejected": -8.721449851989746,
+      "loss": 2.2422,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.09345773607492447,
+      "rewards/margins": 0.5905798077583313,
+      "rewards/rejected": -0.6840375065803528,
+      "step": 10
+    },
+    {
+      "epoch": 0.32,
+      "eval_logits/chosen": -6.642605304718018,
+      "eval_logits/rejected": -5.791879653930664,
+      "eval_logps/chosen": -2.138715982437134,
+      "eval_logps/rejected": -7.363453388214111,
+      "eval_loss": 1.3486661911010742,
+      "eval_rewards/accuracies": 1.0,
+      "eval_rewards/chosen": -0.07731043547391891,
+      "eval_rewards/margins": 0.45733413100242615,
+      "eval_rewards/rejected": -0.5346445441246033,
+      "eval_runtime": 8.1985,
+      "eval_samples_per_second": 5.001,
+      "eval_steps_per_second": 0.732,
+      "step": 10
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 32,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 5,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

qwen3_8b_3/checkpoint-10/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b31ce91dbe3aa48f4b8cf32a6841a310c9be94a888a82634aaa01e4c0e513e9e
+size 6673

qwen3_8b_3/checkpoint-10/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3_8b_3/checkpoint-15/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: Qwen/Qwen3-8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3-8B
+- dpo
+- lora
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

qwen3_8b_3/checkpoint-15/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-8B",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "k_proj",
+    "o_proj",
+    "gate_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

qwen3_8b_3/checkpoint-15/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aca7eb1e56190595323f291478c2fd72005fb6acdde59452af44226d9d611a5f
+size 349243752

qwen3_8b_3/checkpoint-15/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

qwen3_8b_3/checkpoint-15/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

qwen3_8b_3/checkpoint-15/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3_8b_3/checkpoint-15/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e66ded5b13f9394952a153edc45eea84a4e33ebc58f8a7062a1e81b022eb9ea4
+size 698784715

qwen3_8b_3/checkpoint-15/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dc21751f56a807ad2a7d09eea3bbe867a0c8e0f3d829004cfe097808a8a849d
+size 15429

qwen3_8b_3/checkpoint-15/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69adbd9997461473344beb9c44d2e496e24fbc4d6fe69245ab0bd127882efd96
+size 15429

qwen3_8b_3/checkpoint-15/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:baaf7026d9e588ca2ba5b4de8768a379982b7530f39e0b88fa44af9bef8e8bc9
+size 15429

qwen3_8b_3/checkpoint-15/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ec81cbdf599c90b49fb13998f6e46e2492b55345216231b6ce078f88cf04eae
+size 15429

qwen3_8b_3/checkpoint-15/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d8949ce262d00533ca38d27e4373ffd2f11dc0dd4102f2a0ff5dc8d9b41a583
+size 1465

qwen3_8b_3/checkpoint-15/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

qwen3_8b_3/checkpoint-15/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

qwen3_8b_3/checkpoint-15/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

qwen3_8b_3/checkpoint-15/trainer_state.json ADDED Viewed

	@@ -0,0 +1,307 @@

+{
+  "best_global_step": 15,
+  "best_metric": 0.626511812210083,
+  "best_model_checkpoint": "qwen/second_stage/runs/qwen3_8b_3/checkpoint-15",
+  "epoch": 0.48,
+  "eval_steps": 5,
+  "global_step": 15,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.032,
+      "grad_norm": 14.114611625671387,
+      "learning_rate": 0.0,
+      "logits/chosen": -0.454238623380661,
+      "logits/rejected": 0.5041788220405579,
+      "logps/chosen": -0.8831520676612854,
+      "logps/rejected": -1.6886062622070312,
+      "loss": 25.263,
+      "rewards/accuracies": 0.5,
+      "rewards/chosen": 0.021818259730935097,
+      "rewards/margins": -0.0016628885641694069,
+      "rewards/rejected": 0.023481149226427078,
+      "step": 1
+    },
+    {
+      "epoch": 0.064,
+      "grad_norm": 13.774591445922852,
+      "learning_rate": 0.001,
+      "logits/chosen": -0.4356040060520172,
+      "logits/rejected": 0.6535270810127258,
+      "logps/chosen": -0.7930545210838318,
+      "logps/rejected": -1.6992627382278442,
+      "loss": 24.9372,
+      "rewards/accuracies": 0.515625,
+      "rewards/chosen": 0.024832097813487053,
+      "rewards/margins": 0.001346035161986947,
+      "rewards/rejected": 0.02348605915904045,
+      "step": 2
+    },
+    {
+      "epoch": 0.096,
+      "grad_norm": 18.699243545532227,
+      "learning_rate": 0.0009974346616959476,
+      "logits/chosen": -1.930757999420166,
+      "logits/rejected": -1.5471129417419434,
+      "logps/chosen": -0.9101159572601318,
+      "logps/rejected": -4.2864508628845215,
+      "loss": 7.4745,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": 0.006065011955797672,
+      "rewards/margins": 0.24669529497623444,
+      "rewards/rejected": -0.24063026905059814,
+      "step": 3
+    },
+    {
+      "epoch": 0.128,
+      "grad_norm": 119.65816497802734,
+      "learning_rate": 0.0009897649706262473,
+      "logits/chosen": -5.221794128417969,
+      "logits/rejected": -6.154142379760742,
+      "logps/chosen": -2.3687195777893066,
+      "logps/rejected": -11.728078842163086,
+      "loss": 23.0527,
+      "rewards/accuracies": 0.96875,
+      "rewards/chosen": -0.13072913885116577,
+      "rewards/margins": 0.8546823263168335,
+      "rewards/rejected": -0.9854114651679993,
+      "step": 4
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 39.12310028076172,
+      "learning_rate": 0.0009770696282000244,
+      "logits/chosen": -6.345037460327148,
+      "logits/rejected": -5.794806957244873,
+      "logps/chosen": -3.950467109680176,
+      "logps/rejected": -7.177702903747559,
+      "loss": 8.7468,
+      "rewards/accuracies": 0.984375,
+      "rewards/chosen": -0.2761165499687195,
+      "rewards/margins": 0.24939922988414764,
+      "rewards/rejected": -0.5255157351493835,
+      "step": 5
+    },
+    {
+      "epoch": 0.16,
+      "eval_logits/chosen": -7.03423547744751,
+      "eval_logits/rejected": -5.733846187591553,
+      "eval_logps/chosen": -2.9800822734832764,
+      "eval_logps/rejected": -8.443767547607422,
+      "eval_loss": 5.021822929382324,
+      "eval_rewards/accuracies": 0.9791666865348816,
+      "eval_rewards/chosen": -0.16144704818725586,
+      "eval_rewards/margins": 0.4812288284301758,
+      "eval_rewards/rejected": -0.6426758766174316,
+      "eval_runtime": 6.9276,
+      "eval_samples_per_second": 5.918,
+      "eval_steps_per_second": 0.866,
+      "step": 5
+    },
+    {
+      "epoch": 0.192,
+      "grad_norm": 19.448589324951172,
+      "learning_rate": 0.0009594789058101153,
+      "logits/chosen": -7.081980228424072,
+      "logits/rejected": -6.156380653381348,
+      "logps/chosen": -2.4708809852600098,
+      "logps/rejected": -8.690655708312988,
+      "loss": 4.9869,
+      "rewards/accuracies": 0.984375,
+      "rewards/chosen": -0.13445983827114105,
+      "rewards/margins": 0.538560688495636,
+      "rewards/rejected": -0.6730204820632935,
+      "step": 6
+    },
+    {
+      "epoch": 0.224,
+      "grad_norm": 15.258103370666504,
+      "learning_rate": 0.0009371733080722911,
+      "logits/chosen": -7.036907196044922,
+      "logits/rejected": -6.5072021484375,
+      "logps/chosen": -2.307884693145752,
+      "logps/rejected": -6.90288782119751,
+      "loss": 3.4893,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.12273220717906952,
+      "rewards/margins": 0.37964367866516113,
+      "rewards/rejected": -0.5023759007453918,
+      "step": 7
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 11.810518264770508,
+      "learning_rate": 0.0009103817206036382,
+      "logits/chosen": -6.840868949890137,
+      "logits/rejected": -5.986395835876465,
+      "logps/chosen": -2.2396152019500732,
+      "logps/rejected": -8.245683670043945,
+      "loss": 2.186,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.11302613466978073,
+      "rewards/margins": 0.5280519127845764,
+      "rewards/rejected": -0.6410781145095825,
+      "step": 8
+    },
+    {
+      "epoch": 0.288,
+      "grad_norm": 7.8574113845825195,
+      "learning_rate": 0.0008793790613463954,
+      "logits/chosen": -7.188082695007324,
+      "logits/rejected": -5.950884819030762,
+      "logps/chosen": -2.0123586654663086,
+      "logps/rejected": -7.236145496368408,
+      "loss": 1.4581,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.09425050020217896,
+      "rewards/margins": 0.43530386686325073,
+      "rewards/rejected": -0.5295543670654297,
+      "step": 9
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 28.801916122436523,
+      "learning_rate": 0.0008444834595378434,
+      "logits/chosen": -6.633468151092529,
+      "logits/rejected": -5.895615577697754,
+      "logps/chosen": -2.0892837047576904,
+      "logps/rejected": -8.721449851989746,
+      "loss": 2.2422,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.09345773607492447,
+      "rewards/margins": 0.5905798077583313,
+      "rewards/rejected": -0.6840375065803528,
+      "step": 10
+    },
+    {
+      "epoch": 0.32,
+      "eval_logits/chosen": -6.642605304718018,
+      "eval_logits/rejected": -5.791879653930664,
+      "eval_logps/chosen": -2.138715982437134,
+      "eval_logps/rejected": -7.363453388214111,
+      "eval_loss": 1.3486661911010742,
+      "eval_rewards/accuracies": 1.0,
+      "eval_rewards/chosen": -0.07731043547391891,
+      "eval_rewards/margins": 0.45733413100242615,
+      "eval_rewards/rejected": -0.5346445441246033,
+      "eval_runtime": 8.1985,
+      "eval_samples_per_second": 5.001,
+      "eval_steps_per_second": 0.732,
+      "step": 10
+    },
+    {
+      "epoch": 0.352,
+      "grad_norm": 5.376946926116943,
+      "learning_rate": 0.0008060529912738315,
+      "logits/chosen": -6.831246376037598,
+      "logits/rejected": -6.058417320251465,
+      "logps/chosen": -1.8856828212738037,
+      "logps/rejected": -7.155683517456055,
+      "loss": 1.1688,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.07591883838176727,
+      "rewards/margins": 0.44510185718536377,
+      "rewards/rejected": -0.5210206508636475,
+      "step": 11
+    },
+    {
+      "epoch": 0.384,
+      "grad_norm": 2.857236385345459,
+      "learning_rate": 0.0007644820051634812,
+      "logits/chosen": -6.540628433227539,
+      "logits/rejected": -5.897671699523926,
+      "logps/chosen": -1.878401517868042,
+      "logps/rejected": -7.4306640625,
+      "loss": 0.9516,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.06635423749685287,
+      "rewards/margins": 0.4851081073284149,
+      "rewards/rejected": -0.5514623522758484,
+      "step": 12
+    },
+    {
+      "epoch": 0.416,
+      "grad_norm": 7.119859218597412,
+      "learning_rate": 0.0007201970757788173,
+      "logits/chosen": -5.268195629119873,
+      "logits/rejected": -5.089325904846191,
+      "logps/chosen": -1.7320910692214966,
+      "logps/rejected": -7.690831184387207,
+      "loss": 1.2226,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.052271775901317596,
+      "rewards/margins": 0.5345346927642822,
+      "rewards/rejected": -0.5868064761161804,
+      "step": 13
+    },
+    {
+      "epoch": 0.448,
+      "grad_norm": 6.523738861083984,
+      "learning_rate": 0.0006736526264224101,
+      "logits/chosen": -4.409873962402344,
+      "logits/rejected": -3.217135429382324,
+      "logps/chosen": -1.5367412567138672,
+      "logps/rejected": -6.787916660308838,
+      "loss": 0.8654,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.04359198734164238,
+      "rewards/margins": 0.450799822807312,
+      "rewards/rejected": -0.4943917989730835,
+      "step": 14
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 2.9514577388763428,
+      "learning_rate": 0.0006253262661293602,
+      "logits/chosen": -4.519381999969482,
+      "logits/rejected": -3.7837789058685303,
+      "logps/chosen": -1.4599652290344238,
+      "logps/rejected": -7.043492794036865,
+      "loss": 0.6454,
+      "rewards/accuracies": 1.0,
+      "rewards/chosen": -0.04181559383869171,
+      "rewards/margins": 0.4759097099304199,
+      "rewards/rejected": -0.5177252888679504,
+      "step": 15
+    },
+    {
+      "epoch": 0.48,
+      "eval_logits/chosen": -5.136578559875488,
+      "eval_logits/rejected": -4.644644260406494,
+      "eval_logps/chosen": -1.9111665487289429,
+      "eval_logps/rejected": -7.827307224273682,
+      "eval_loss": 0.626511812210083,
+      "eval_rewards/accuracies": 1.0,
+      "eval_rewards/chosen": -0.054555494338274,
+      "eval_rewards/margins": 0.5264745354652405,
+      "eval_rewards/rejected": -0.5810299515724182,
+      "eval_runtime": 8.4075,
+      "eval_samples_per_second": 4.877,
+      "eval_steps_per_second": 0.714,
+      "step": 15
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 32,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 5,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

qwen3_8b_3/checkpoint-15/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b31ce91dbe3aa48f4b8cf32a6841a310c9be94a888a82634aaa01e4c0e513e9e
+size 6673

qwen3_8b_3/checkpoint-15/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3_8b_3/checkpoint-20/README.md ADDED Viewed

	@@ -0,0 +1,209 @@

+---
+base_model: Qwen/Qwen3-8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3-8B
+- dpo
+- lora
+- transformers
+- trl
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

qwen3_8b_3/checkpoint-20/adapter_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-8B",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "k_proj",
+    "o_proj",
+    "gate_proj",
+    "v_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

qwen3_8b_3/checkpoint-20/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03531e28a05b8a162e1e9ebc192431e43f1adf63fb462ec01ee7b9c8301a4a54
+size 349243752

qwen3_8b_3/checkpoint-20/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

qwen3_8b_3/checkpoint-20/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

qwen3_8b_3/checkpoint-20/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

qwen3_8b_3/checkpoint-20/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3ea32737ab368f0c937348cc4f0d4d361206f8c46bfaa1389da2edf2ae8ed92
+size 698784715

qwen3_8b_3/checkpoint-20/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81d5f83aeb4b3f559bd28377336d47659b320e7f6ef2e5a723d284716278a151
+size 15429

qwen3_8b_3/checkpoint-20/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2626437dcb133ffcf003ac89603f8cce07459b93a98d760cd9419e0d6a994067
+size 15429

qwen3_8b_3/checkpoint-20/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae777e24d50cb7159634e1245f0697ba0fc64d5b26d535f2c80e411371a90b1c
+size 15429

qwen3_8b_3/checkpoint-20/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afc5a67564eebcfc961e8f1406a7418cc73497c2935a39af0232ef59f8153a6a
+size 15429

qwen3_8b_3/checkpoint-20/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1aecd9b306507bbaa49918ff2dd5f607827e1001d69ae624962d634d4045561d
+size 1465

qwen3_8b_3/checkpoint-20/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}