diff --git a/.gitattributes b/.gitattributes
index 7c4d198c64c9b312011f5b41f771b1d718b344da..04c2e80fe2328391a664dea3f0b2001ce7d036d3 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -86,3 +86,20 @@ L3/checkpoints/checkpoint-70/tokenizer.json filter=lfs diff=lfs merge=lfs -text
L3/checkpoints/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text
L3/checkpoints/checkpoint-90/tokenizer.json filter=lfs diff=lfs merge=lfs -text
L4/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-10/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-110/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-120/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-130/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-140/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-160/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-162/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-30/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-40/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-60/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-70/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-80/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+L4/checkpoints/checkpoint-90/tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/L4/checkpoints/README.md b/L4/checkpoints/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c9e1b45c3f2c163e22e44bb30165b4f69ede96ae
--- /dev/null
+++ b/L4/checkpoints/README.md
@@ -0,0 +1,58 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: transformers
+model_name: checkpoints
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+
+# Model Card for checkpoints
+
+This model is a fine-tuned version of [Qwen/Qwen3.5-0.8B](https://huggingface.co/Qwen/Qwen3.5-0.8B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+
+## Quick start
+
+```python
+from transformers import pipeline
+
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+
+## Training procedure
+
+
+
+
+
+This model was trained with SFT.
+
+### Framework versions
+
+- TRL: 1.2.0.dev0
+- Transformers: 5.6.0.dev0
+- Pytorch: 2.9.0.dev20250803
+- Datasets: 4.8.4
+- Tokenizers: 0.22.1
+
+## Citations
+
+
+
+Cite TRL as:
+
+```bibtex
+@software{vonwerra2020trl,
+ title = {{TRL: Transformers Reinforcement Learning}},
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
+ license = {Apache-2.0},
+ url = {https://github.com/huggingface/trl},
+ year = {2020}
+}
+```
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-10/README.md b/L4/checkpoints/checkpoint-10/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-10/adapter_config.json b/L4/checkpoints/checkpoint-10/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-10/adapter_model.safetensors b/L4/checkpoints/checkpoint-10/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e62ce14a0d24d50e8342bff78a30302e0f8e476e
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7dab7f028a48fc6441a02bb265cc682fcba1dd148e82f5cae888151fd023119c
+size 4331744
diff --git a/L4/checkpoints/checkpoint-10/chat_template.jinja b/L4/checkpoints/checkpoint-10/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-10/optimizer.pt b/L4/checkpoints/checkpoint-10/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f6d7a38e03090547bc9dddde43c0e425177295a
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61084765924c0a20bb2597dffd550f9d7ba0db5311fe9b9e8b611c38aa8c45ad
+size 8690571
diff --git a/L4/checkpoints/checkpoint-10/rng_state.pth b/L4/checkpoints/checkpoint-10/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3f5c250a692e034a27fb06d7008612b2de078116
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cddf27219365242ec1046a3532a63a24c3f350c77f100e4f973369db2cc849d
+size 14455
diff --git a/L4/checkpoints/checkpoint-10/scheduler.pt b/L4/checkpoints/checkpoint-10/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3237d87628444a17fc52edb3eda974c715b7ab2
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8b68e33df2b5a248085d87f5dcc9bdf10abed42334014a5d7ca690173ee3035
+size 1465
diff --git a/L4/checkpoints/checkpoint-10/tokenizer.json b/L4/checkpoints/checkpoint-10/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-10/tokenizer_config.json b/L4/checkpoints/checkpoint-10/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-10/trainer_state.json b/L4/checkpoints/checkpoint-10/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bc49298f0ba4872c6141ee423e428e5d9e201acf
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/trainer_state.json
@@ -0,0 +1,44 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.18518518518518517,
+ "eval_steps": 500,
+ "global_step": 10,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 110198225765376.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-10/training_args.bin b/L4/checkpoints/checkpoint-10/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-10/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-100/README.md b/L4/checkpoints/checkpoint-100/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-100/adapter_config.json b/L4/checkpoints/checkpoint-100/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-100/adapter_model.safetensors b/L4/checkpoints/checkpoint-100/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b61de32f50f8fced880505d01d4c9495d7270596
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bd3ef396d736fbf207569391a973260367b81aa6113582d779b86401914f618
+size 4331744
diff --git a/L4/checkpoints/checkpoint-100/chat_template.jinja b/L4/checkpoints/checkpoint-100/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-100/optimizer.pt b/L4/checkpoints/checkpoint-100/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a90d8b60d5d6160e72e160c9972eec9ed3843bb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c04a1ab55059980f33fcf7e9f684674a49a140c00242f5c66cd61ad040c3b04
+size 8690571
diff --git a/L4/checkpoints/checkpoint-100/rng_state.pth b/L4/checkpoints/checkpoint-100/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..38c6d5df0a659b81300f358fefb31a6f4aa51ddc
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3a77d4a8b98ce027a4d6a3b9fb5d7c904e27ec1efd5c0468c24fa26bb738316
+size 14455
diff --git a/L4/checkpoints/checkpoint-100/scheduler.pt b/L4/checkpoints/checkpoint-100/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..67d08d758374cf60dc627946bba4b42c42370ed5
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e00d33be0917c0b5210024798737a7aeb58666c38314eddc7704939924dd026
+size 1465
diff --git a/L4/checkpoints/checkpoint-100/tokenizer.json b/L4/checkpoints/checkpoint-100/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-100/tokenizer_config.json b/L4/checkpoints/checkpoint-100/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-100/trainer_state.json b/L4/checkpoints/checkpoint-100/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..29010e602576625363aebb893f6083c979af5d97
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/trainer_state.json
@@ -0,0 +1,134 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.8518518518518519,
+ "eval_steps": 500,
+ "global_step": 100,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1109649888061440.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-100/training_args.bin b/L4/checkpoints/checkpoint-100/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-100/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-110/README.md b/L4/checkpoints/checkpoint-110/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-110/adapter_config.json b/L4/checkpoints/checkpoint-110/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-110/adapter_model.safetensors b/L4/checkpoints/checkpoint-110/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c5aee0456f37638e2e4a10927a23b2a900ab0621
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e2803ccdcf2de8d91072083618a287083f25ea755f6e5345a9b395403ba5169
+size 4331744
diff --git a/L4/checkpoints/checkpoint-110/chat_template.jinja b/L4/checkpoints/checkpoint-110/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-110/optimizer.pt b/L4/checkpoints/checkpoint-110/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8fc46661005461e717fd25b24251a33024f9c9fc
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cffca1fbd45bb9fa38535cdc431e7c0f8adbdc17c784ea14ef5d829d5d727873
+size 8690571
diff --git a/L4/checkpoints/checkpoint-110/rng_state.pth b/L4/checkpoints/checkpoint-110/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d150fc4504460cdea273ef2adcb04a65c626709
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
+size 14455
diff --git a/L4/checkpoints/checkpoint-110/scheduler.pt b/L4/checkpoints/checkpoint-110/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..05db705f0f0e113e3f741e83189c1525f046d027
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b6114c14538c6a24409b17a5381ef6edd627202dcd401b7c07e261ab92c1ef8
+size 1465
diff --git a/L4/checkpoints/checkpoint-110/tokenizer.json b/L4/checkpoints/checkpoint-110/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-110/tokenizer_config.json b/L4/checkpoints/checkpoint-110/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-110/trainer_state.json b/L4/checkpoints/checkpoint-110/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..77172e57e1a872749858f6f2462d3184f5512b9a
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/trainer_state.json
@@ -0,0 +1,144 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.037037037037037,
+ "eval_steps": 500,
+ "global_step": 110,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ },
+ {
+ "entropy": 1.35595703125,
+ "epoch": 2.037037037037037,
+ "grad_norm": 0.8151571154594421,
+ "learning_rate": 5.297280930072632e-05,
+ "loss": 1.3445645332336427,
+ "mean_token_accuracy": 0.6957452893257141,
+ "num_tokens": 391407.0,
+ "step": 110
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1220980287379200.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-110/training_args.bin b/L4/checkpoints/checkpoint-110/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-110/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-120/README.md b/L4/checkpoints/checkpoint-120/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-120/adapter_config.json b/L4/checkpoints/checkpoint-120/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-120/adapter_model.safetensors b/L4/checkpoints/checkpoint-120/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..18c28d94979c2af60095b84b922bd98591da6550
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad0cb56f64af7de739733f5eb320efa9507ad05495f5fb929d1463b40547186d
+size 4331744
diff --git a/L4/checkpoints/checkpoint-120/chat_template.jinja b/L4/checkpoints/checkpoint-120/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-120/optimizer.pt b/L4/checkpoints/checkpoint-120/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ff23da4d00f187f606bf90fa3307a2fbdad5f8c
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11452839f675114886275225b4dc98857c6fd4884bdc6582362971c71da242b3
+size 8690571
diff --git a/L4/checkpoints/checkpoint-120/rng_state.pth b/L4/checkpoints/checkpoint-120/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d150fc4504460cdea273ef2adcb04a65c626709
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
+size 14455
diff --git a/L4/checkpoints/checkpoint-120/scheduler.pt b/L4/checkpoints/checkpoint-120/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8a7671afeb8936f2194debbd1e09ab963b8ab8ec
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d21a30c99cd985d796017d383bda5f811b8c5d381bee54ede8f14f1b74ad8e29
+size 1465
diff --git a/L4/checkpoints/checkpoint-120/tokenizer.json b/L4/checkpoints/checkpoint-120/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-120/tokenizer_config.json b/L4/checkpoints/checkpoint-120/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-120/trainer_state.json b/L4/checkpoints/checkpoint-120/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cacebb18df8e8c6875b048b27b48c5bd01aa06eb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/trainer_state.json
@@ -0,0 +1,154 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.2222222222222223,
+ "eval_steps": 500,
+ "global_step": 120,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ },
+ {
+ "entropy": 1.35595703125,
+ "epoch": 2.037037037037037,
+ "grad_norm": 0.8151571154594421,
+ "learning_rate": 5.297280930072632e-05,
+ "loss": 1.3445645332336427,
+ "mean_token_accuracy": 0.6957452893257141,
+ "num_tokens": 391407.0,
+ "step": 110
+ },
+ {
+ "entropy": 1.36494140625,
+ "epoch": 2.2222222222222223,
+ "grad_norm": 0.77378249168396,
+ "learning_rate": 3.606917100644487e-05,
+ "loss": 1.3390249252319335,
+ "mean_token_accuracy": 0.698172252625227,
+ "num_tokens": 427190.0,
+ "step": 120
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1332154937954304.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-120/training_args.bin b/L4/checkpoints/checkpoint-120/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-120/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-130/README.md b/L4/checkpoints/checkpoint-130/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-130/adapter_config.json b/L4/checkpoints/checkpoint-130/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-130/adapter_model.safetensors b/L4/checkpoints/checkpoint-130/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30558950abdd84d9f7c42502c21cbc98c2835376
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c2169b864209ec6223b738e5785c090a91c73e657d0793fde449cc8848a558a
+size 4331744
diff --git a/L4/checkpoints/checkpoint-130/chat_template.jinja b/L4/checkpoints/checkpoint-130/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-130/optimizer.pt b/L4/checkpoints/checkpoint-130/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a076f09d7bbc77ae2410269d609554f015209f8
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72ffbd6f96c834a11c8cbf8ddc93456ddd870f21a4b50c82a6e8f3704cc1b638
+size 8690571
diff --git a/L4/checkpoints/checkpoint-130/rng_state.pth b/L4/checkpoints/checkpoint-130/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d150fc4504460cdea273ef2adcb04a65c626709
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
+size 14455
diff --git a/L4/checkpoints/checkpoint-130/scheduler.pt b/L4/checkpoints/checkpoint-130/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da534c615c44fe87746b51e1736cf530d6fa8d91
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee218c780bfb3eca7460817839a4d4a58ac46a178f0cbe05832d77cd19ad9508
+size 1465
diff --git a/L4/checkpoints/checkpoint-130/tokenizer.json b/L4/checkpoints/checkpoint-130/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-130/tokenizer_config.json b/L4/checkpoints/checkpoint-130/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-130/trainer_state.json b/L4/checkpoints/checkpoint-130/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..813d2cc6308e7d178d16ec73a236d6d455f88eb9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/trainer_state.json
@@ -0,0 +1,164 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.4074074074074074,
+ "eval_steps": 500,
+ "global_step": 130,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ },
+ {
+ "entropy": 1.35595703125,
+ "epoch": 2.037037037037037,
+ "grad_norm": 0.8151571154594421,
+ "learning_rate": 5.297280930072632e-05,
+ "loss": 1.3445645332336427,
+ "mean_token_accuracy": 0.6957452893257141,
+ "num_tokens": 391407.0,
+ "step": 110
+ },
+ {
+ "entropy": 1.36494140625,
+ "epoch": 2.2222222222222223,
+ "grad_norm": 0.77378249168396,
+ "learning_rate": 3.606917100644487e-05,
+ "loss": 1.3390249252319335,
+ "mean_token_accuracy": 0.698172252625227,
+ "num_tokens": 427190.0,
+ "step": 120
+ },
+ {
+ "entropy": 1.342578125,
+ "epoch": 2.4074074074074074,
+ "grad_norm": 0.893750786781311,
+ "learning_rate": 2.1816851753197032e-05,
+ "loss": 1.322931671142578,
+ "mean_token_accuracy": 0.6984936438500882,
+ "num_tokens": 462696.0,
+ "step": 130
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1442155482623232.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-130/training_args.bin b/L4/checkpoints/checkpoint-130/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-130/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-140/README.md b/L4/checkpoints/checkpoint-140/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-140/adapter_config.json b/L4/checkpoints/checkpoint-140/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-140/adapter_model.safetensors b/L4/checkpoints/checkpoint-140/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f84cd1fcfb600a4c01ff845e1d40f36a07b4f53e
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7a86bd52028d0c39ef6851903358d26ffe40e4ec557be86744afa9cad4590e7
+size 4331744
diff --git a/L4/checkpoints/checkpoint-140/chat_template.jinja b/L4/checkpoints/checkpoint-140/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-140/optimizer.pt b/L4/checkpoints/checkpoint-140/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61d4c3093eb0adbb11aa709b1e05003a8327b7a6
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ab8032492097f54d887c834a0969fed5b42bc5dfb2e635dd9e043c8a7ddfc08
+size 8690571
diff --git a/L4/checkpoints/checkpoint-140/rng_state.pth b/L4/checkpoints/checkpoint-140/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d150fc4504460cdea273ef2adcb04a65c626709
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
+size 14455
diff --git a/L4/checkpoints/checkpoint-140/scheduler.pt b/L4/checkpoints/checkpoint-140/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74b87ec4a65b622ed10754b6b1b360753b280ef1
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ac895c4d19cc1775347dce5cb0647c7e1c5a58f145be24cc270c549028b5438
+size 1465
diff --git a/L4/checkpoints/checkpoint-140/tokenizer.json b/L4/checkpoints/checkpoint-140/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-140/tokenizer_config.json b/L4/checkpoints/checkpoint-140/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-140/trainer_state.json b/L4/checkpoints/checkpoint-140/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd72f90b5eab84e3973d4544851b0bea8e864e47
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/trainer_state.json
@@ -0,0 +1,174 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.5925925925925926,
+ "eval_steps": 500,
+ "global_step": 140,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ },
+ {
+ "entropy": 1.35595703125,
+ "epoch": 2.037037037037037,
+ "grad_norm": 0.8151571154594421,
+ "learning_rate": 5.297280930072632e-05,
+ "loss": 1.3445645332336427,
+ "mean_token_accuracy": 0.6957452893257141,
+ "num_tokens": 391407.0,
+ "step": 110
+ },
+ {
+ "entropy": 1.36494140625,
+ "epoch": 2.2222222222222223,
+ "grad_norm": 0.77378249168396,
+ "learning_rate": 3.606917100644487e-05,
+ "loss": 1.3390249252319335,
+ "mean_token_accuracy": 0.698172252625227,
+ "num_tokens": 427190.0,
+ "step": 120
+ },
+ {
+ "entropy": 1.342578125,
+ "epoch": 2.4074074074074074,
+ "grad_norm": 0.893750786781311,
+ "learning_rate": 2.1816851753197032e-05,
+ "loss": 1.322931671142578,
+ "mean_token_accuracy": 0.6984936438500882,
+ "num_tokens": 462696.0,
+ "step": 130
+ },
+ {
+ "entropy": 1.339453125,
+ "epoch": 2.5925925925925926,
+ "grad_norm": 0.82007896900177,
+ "learning_rate": 1.0806919199730615e-05,
+ "loss": 1.3379673957824707,
+ "mean_token_accuracy": 0.702365966886282,
+ "num_tokens": 498412.0,
+ "step": 140
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1553330133198336.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-140/training_args.bin b/L4/checkpoints/checkpoint-140/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-140/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-150/README.md b/L4/checkpoints/checkpoint-150/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-150/adapter_config.json b/L4/checkpoints/checkpoint-150/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-150/adapter_model.safetensors b/L4/checkpoints/checkpoint-150/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e643c9aabbb8e5f176501e5f8d27903699a794fd
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4edcd2f44765c90afa3754b4c669e67c640fbf017ec4c147758bc0b6d4fd6494
+size 4331744
diff --git a/L4/checkpoints/checkpoint-150/chat_template.jinja b/L4/checkpoints/checkpoint-150/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-150/optimizer.pt b/L4/checkpoints/checkpoint-150/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0ccf38d64cc8576d99bf517ef0080db22de914a
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5244e8891d6a29478ed36b109a2f71146b274e2b7a6ab4ff5547e4d89da154b7
+size 8690571
diff --git a/L4/checkpoints/checkpoint-150/rng_state.pth b/L4/checkpoints/checkpoint-150/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d150fc4504460cdea273ef2adcb04a65c626709
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
+size 14455
diff --git a/L4/checkpoints/checkpoint-150/scheduler.pt b/L4/checkpoints/checkpoint-150/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2492fe75b793fb95d0fa44750844236453e15e2a
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:733eb885738d3668181e2d14d9ee6ca0e72b683f292da7cf2d78ac18a50f64e4
+size 1465
diff --git a/L4/checkpoints/checkpoint-150/tokenizer.json b/L4/checkpoints/checkpoint-150/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-150/tokenizer_config.json b/L4/checkpoints/checkpoint-150/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-150/trainer_state.json b/L4/checkpoints/checkpoint-150/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff6f8cbbe40a199f1b11ff64cd0f5c9214a09902
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/trainer_state.json
@@ -0,0 +1,184 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.7777777777777777,
+ "eval_steps": 500,
+ "global_step": 150,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ },
+ {
+ "entropy": 1.35595703125,
+ "epoch": 2.037037037037037,
+ "grad_norm": 0.8151571154594421,
+ "learning_rate": 5.297280930072632e-05,
+ "loss": 1.3445645332336427,
+ "mean_token_accuracy": 0.6957452893257141,
+ "num_tokens": 391407.0,
+ "step": 110
+ },
+ {
+ "entropy": 1.36494140625,
+ "epoch": 2.2222222222222223,
+ "grad_norm": 0.77378249168396,
+ "learning_rate": 3.606917100644487e-05,
+ "loss": 1.3390249252319335,
+ "mean_token_accuracy": 0.698172252625227,
+ "num_tokens": 427190.0,
+ "step": 120
+ },
+ {
+ "entropy": 1.342578125,
+ "epoch": 2.4074074074074074,
+ "grad_norm": 0.893750786781311,
+ "learning_rate": 2.1816851753197032e-05,
+ "loss": 1.322931671142578,
+ "mean_token_accuracy": 0.6984936438500882,
+ "num_tokens": 462696.0,
+ "step": 130
+ },
+ {
+ "entropy": 1.339453125,
+ "epoch": 2.5925925925925926,
+ "grad_norm": 0.82007896900177,
+ "learning_rate": 1.0806919199730615e-05,
+ "loss": 1.3379673957824707,
+ "mean_token_accuracy": 0.702365966886282,
+ "num_tokens": 498412.0,
+ "step": 140
+ },
+ {
+ "entropy": 1.33486328125,
+ "epoch": 2.7777777777777777,
+ "grad_norm": 0.8532996773719788,
+ "learning_rate": 3.495973773086014e-06,
+ "loss": 1.3067991256713867,
+ "mean_token_accuracy": 0.701677817851305,
+ "num_tokens": 533710.0,
+ "step": 150
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1663534349299968.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-150/training_args.bin b/L4/checkpoints/checkpoint-150/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-150/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-160/README.md b/L4/checkpoints/checkpoint-160/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-160/adapter_config.json b/L4/checkpoints/checkpoint-160/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-160/adapter_model.safetensors b/L4/checkpoints/checkpoint-160/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..88bb3c85e492500827a7fa57dc6f455a6f898b89
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f45f96bb8f92186629ce73ed79c625b7ad3ef597e70b81583afef6b7d75c527e
+size 4331744
diff --git a/L4/checkpoints/checkpoint-160/chat_template.jinja b/L4/checkpoints/checkpoint-160/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-160/optimizer.pt b/L4/checkpoints/checkpoint-160/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73b47a73fff0975589d04ac09e2613812203c3d5
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09e75af3a5dae9cfbcaa609ce6e4073b38535ac437167a6955eeb7eb8b1d3c2b
+size 8690571
diff --git a/L4/checkpoints/checkpoint-160/rng_state.pth b/L4/checkpoints/checkpoint-160/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d150fc4504460cdea273ef2adcb04a65c626709
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
+size 14455
diff --git a/L4/checkpoints/checkpoint-160/scheduler.pt b/L4/checkpoints/checkpoint-160/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1ec78471dfb60294af0b6cd7775232a27358f652
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e82196eeec2ce910507a790806e596e16e962d6b4cabf64ab799003e30bdcbe6
+size 1465
diff --git a/L4/checkpoints/checkpoint-160/tokenizer.json b/L4/checkpoints/checkpoint-160/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-160/tokenizer_config.json b/L4/checkpoints/checkpoint-160/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-160/trainer_state.json b/L4/checkpoints/checkpoint-160/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f3ccedcd6cece34da0635fb0fefc42dc109a08e8
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/trainer_state.json
@@ -0,0 +1,194 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.962962962962963,
+ "eval_steps": 500,
+ "global_step": 160,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ },
+ {
+ "entropy": 1.35595703125,
+ "epoch": 2.037037037037037,
+ "grad_norm": 0.8151571154594421,
+ "learning_rate": 5.297280930072632e-05,
+ "loss": 1.3445645332336427,
+ "mean_token_accuracy": 0.6957452893257141,
+ "num_tokens": 391407.0,
+ "step": 110
+ },
+ {
+ "entropy": 1.36494140625,
+ "epoch": 2.2222222222222223,
+ "grad_norm": 0.77378249168396,
+ "learning_rate": 3.606917100644487e-05,
+ "loss": 1.3390249252319335,
+ "mean_token_accuracy": 0.698172252625227,
+ "num_tokens": 427190.0,
+ "step": 120
+ },
+ {
+ "entropy": 1.342578125,
+ "epoch": 2.4074074074074074,
+ "grad_norm": 0.893750786781311,
+ "learning_rate": 2.1816851753197032e-05,
+ "loss": 1.322931671142578,
+ "mean_token_accuracy": 0.6984936438500882,
+ "num_tokens": 462696.0,
+ "step": 130
+ },
+ {
+ "entropy": 1.339453125,
+ "epoch": 2.5925925925925926,
+ "grad_norm": 0.82007896900177,
+ "learning_rate": 1.0806919199730615e-05,
+ "loss": 1.3379673957824707,
+ "mean_token_accuracy": 0.702365966886282,
+ "num_tokens": 498412.0,
+ "step": 140
+ },
+ {
+ "entropy": 1.33486328125,
+ "epoch": 2.7777777777777777,
+ "grad_norm": 0.8532996773719788,
+ "learning_rate": 3.495973773086014e-06,
+ "loss": 1.3067991256713867,
+ "mean_token_accuracy": 0.701677817851305,
+ "num_tokens": 533710.0,
+ "step": 150
+ },
+ {
+ "entropy": 1.347265625,
+ "epoch": 2.962962962962963,
+ "grad_norm": 0.8509896397590637,
+ "learning_rate": 1.8721268066330676e-07,
+ "loss": 1.3350863456726074,
+ "mean_token_accuracy": 0.7003593638539314,
+ "num_tokens": 569320.0,
+ "step": 160
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1774864748617728.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-160/training_args.bin b/L4/checkpoints/checkpoint-160/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-160/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-162/README.md b/L4/checkpoints/checkpoint-162/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-162/adapter_config.json b/L4/checkpoints/checkpoint-162/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-162/adapter_model.safetensors b/L4/checkpoints/checkpoint-162/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bd6a2da1c4e3b1fa1a6fa6f051ebce82a233f109
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:619af9fe23b36d8d97c227acac03a50232597287179aba451f53b3d745b25572
+size 4331744
diff --git a/L4/checkpoints/checkpoint-162/chat_template.jinja b/L4/checkpoints/checkpoint-162/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-162/optimizer.pt b/L4/checkpoints/checkpoint-162/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..159623ceb1f11483d580b7990ffd6f9772f47d83
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4d1812ac1ddc4b76fa7a1cccec57f29a19e5e9193729f7f256ed286e970bb7d
+size 8690571
diff --git a/L4/checkpoints/checkpoint-162/rng_state.pth b/L4/checkpoints/checkpoint-162/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7d150fc4504460cdea273ef2adcb04a65c626709
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dba4fde4ee04d2f472bb4dea96a48e8fdf7891d2b0694a8f012e8133a2e176ae
+size 14455
diff --git a/L4/checkpoints/checkpoint-162/scheduler.pt b/L4/checkpoints/checkpoint-162/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d42b3c059420bb4198f32e7ac7a80c2eb92c3982
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:565abcec4d6aec8d30065f6c5aadc3015dfcdb976f3a380175b822962ef721cc
+size 1465
diff --git a/L4/checkpoints/checkpoint-162/tokenizer.json b/L4/checkpoints/checkpoint-162/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-162/tokenizer_config.json b/L4/checkpoints/checkpoint-162/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-162/trainer_state.json b/L4/checkpoints/checkpoint-162/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..5ef0028d2a830d709acdfe992609f4249676c104
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/trainer_state.json
@@ -0,0 +1,194 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 162,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ },
+ {
+ "entropy": 1.3333984375,
+ "epoch": 1.8518518518518519,
+ "grad_norm": 0.7905425429344177,
+ "learning_rate": 7.182674431585704e-05,
+ "loss": 1.3476751327514649,
+ "mean_token_accuracy": 0.6990618519484997,
+ "num_tokens": 355791.0,
+ "step": 100
+ },
+ {
+ "entropy": 1.35595703125,
+ "epoch": 2.037037037037037,
+ "grad_norm": 0.8151571154594421,
+ "learning_rate": 5.297280930072632e-05,
+ "loss": 1.3445645332336427,
+ "mean_token_accuracy": 0.6957452893257141,
+ "num_tokens": 391407.0,
+ "step": 110
+ },
+ {
+ "entropy": 1.36494140625,
+ "epoch": 2.2222222222222223,
+ "grad_norm": 0.77378249168396,
+ "learning_rate": 3.606917100644487e-05,
+ "loss": 1.3390249252319335,
+ "mean_token_accuracy": 0.698172252625227,
+ "num_tokens": 427190.0,
+ "step": 120
+ },
+ {
+ "entropy": 1.342578125,
+ "epoch": 2.4074074074074074,
+ "grad_norm": 0.893750786781311,
+ "learning_rate": 2.1816851753197032e-05,
+ "loss": 1.322931671142578,
+ "mean_token_accuracy": 0.6984936438500882,
+ "num_tokens": 462696.0,
+ "step": 130
+ },
+ {
+ "entropy": 1.339453125,
+ "epoch": 2.5925925925925926,
+ "grad_norm": 0.82007896900177,
+ "learning_rate": 1.0806919199730615e-05,
+ "loss": 1.3379673957824707,
+ "mean_token_accuracy": 0.702365966886282,
+ "num_tokens": 498412.0,
+ "step": 140
+ },
+ {
+ "entropy": 1.33486328125,
+ "epoch": 2.7777777777777777,
+ "grad_norm": 0.8532996773719788,
+ "learning_rate": 3.495973773086014e-06,
+ "loss": 1.3067991256713867,
+ "mean_token_accuracy": 0.701677817851305,
+ "num_tokens": 533710.0,
+ "step": 150
+ },
+ {
+ "entropy": 1.347265625,
+ "epoch": 2.962962962962963,
+ "grad_norm": 0.8509896397590637,
+ "learning_rate": 1.8721268066330676e-07,
+ "loss": 1.3350863456726074,
+ "mean_token_accuracy": 0.7003593638539314,
+ "num_tokens": 569320.0,
+ "step": 160
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1796759427633408.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-162/training_args.bin b/L4/checkpoints/checkpoint-162/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-162/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-20/README.md b/L4/checkpoints/checkpoint-20/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-20/adapter_config.json b/L4/checkpoints/checkpoint-20/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-20/adapter_model.safetensors b/L4/checkpoints/checkpoint-20/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ccd1666c617d9294cb780769a0e9ad31a2e48e71
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6a040a5216097b66ad326365146861a6e934adb5d4b2757a1f2548301b438a3
+size 4331744
diff --git a/L4/checkpoints/checkpoint-20/chat_template.jinja b/L4/checkpoints/checkpoint-20/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-20/optimizer.pt b/L4/checkpoints/checkpoint-20/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7342a6350ff360d18e90ee69e7b5c1146d0abe8a
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0c0ca3af8a72896d5a7481ee41ae72fbfc83813a6f7a2ca0fc257fab645c623
+size 8690571
diff --git a/L4/checkpoints/checkpoint-20/rng_state.pth b/L4/checkpoints/checkpoint-20/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3f5c250a692e034a27fb06d7008612b2de078116
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cddf27219365242ec1046a3532a63a24c3f350c77f100e4f973369db2cc849d
+size 14455
diff --git a/L4/checkpoints/checkpoint-20/scheduler.pt b/L4/checkpoints/checkpoint-20/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e7ba4c1e3afc885460dc257f7b2ad2b232f89cf
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:058d2093fe6b7c7ff3f6434a71088efee1d7a5d28e0709e2b1b7745f3b024e2b
+size 1465
diff --git a/L4/checkpoints/checkpoint-20/tokenizer.json b/L4/checkpoints/checkpoint-20/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-20/tokenizer_config.json b/L4/checkpoints/checkpoint-20/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-20/trainer_state.json b/L4/checkpoints/checkpoint-20/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bf4a98771c3d09eafebbde9c69b99de9e1dc8769
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/trainer_state.json
@@ -0,0 +1,54 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.37037037037037035,
+ "eval_steps": 500,
+ "global_step": 20,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 221420799030528.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-20/training_args.bin b/L4/checkpoints/checkpoint-20/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-20/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-30/README.md b/L4/checkpoints/checkpoint-30/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-30/adapter_config.json b/L4/checkpoints/checkpoint-30/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-30/adapter_model.safetensors b/L4/checkpoints/checkpoint-30/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dc39aabae263e90aa691b250eb1238c6793f4374
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b6a51c090ea59c052844c679a3116814418e7bd442f2780b46e0915b31ecf25
+size 4331744
diff --git a/L4/checkpoints/checkpoint-30/chat_template.jinja b/L4/checkpoints/checkpoint-30/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-30/optimizer.pt b/L4/checkpoints/checkpoint-30/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2a8ce3db6a5e2be62569670383ef7a423c6bf02
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e2c3abe331f6c76324dcd689f7c3bb7bfdf56dc73d41a0d0487c382bdb019e3
+size 8690571
diff --git a/L4/checkpoints/checkpoint-30/rng_state.pth b/L4/checkpoints/checkpoint-30/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3f5c250a692e034a27fb06d7008612b2de078116
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cddf27219365242ec1046a3532a63a24c3f350c77f100e4f973369db2cc849d
+size 14455
diff --git a/L4/checkpoints/checkpoint-30/scheduler.pt b/L4/checkpoints/checkpoint-30/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4b9831fffadea0661a08289893e848017413c7d7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3525043f191b83b2176ac7bbdcc76e74bc63cf9ec23232db96dde0bd08cdf89f
+size 1465
diff --git a/L4/checkpoints/checkpoint-30/tokenizer.json b/L4/checkpoints/checkpoint-30/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-30/tokenizer_config.json b/L4/checkpoints/checkpoint-30/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-30/trainer_state.json b/L4/checkpoints/checkpoint-30/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..eebcd35c09071365dd5a3d4a6bb1e41847faadba
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/trainer_state.json
@@ -0,0 +1,64 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.5555555555555556,
+ "eval_steps": 500,
+ "global_step": 30,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 333613806769152.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-30/training_args.bin b/L4/checkpoints/checkpoint-30/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-30/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-40/README.md b/L4/checkpoints/checkpoint-40/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-40/adapter_config.json b/L4/checkpoints/checkpoint-40/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-40/adapter_model.safetensors b/L4/checkpoints/checkpoint-40/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..74cc509febb707bb74a3a44f672527af1b860bf9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3454d90a981aab18918c2243c10f5748f608cc8ad6b9f321d26ba52c8c4bc690
+size 4331744
diff --git a/L4/checkpoints/checkpoint-40/chat_template.jinja b/L4/checkpoints/checkpoint-40/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-40/optimizer.pt b/L4/checkpoints/checkpoint-40/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64abe13dc4da8c10a19922d9fdcc9e0ddd59a40d
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1cbe5b6467ac312e98073738f461953f520e41f921529b91caf493dfb2504f1
+size 8690571
diff --git a/L4/checkpoints/checkpoint-40/rng_state.pth b/L4/checkpoints/checkpoint-40/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3f5c250a692e034a27fb06d7008612b2de078116
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cddf27219365242ec1046a3532a63a24c3f350c77f100e4f973369db2cc849d
+size 14455
diff --git a/L4/checkpoints/checkpoint-40/scheduler.pt b/L4/checkpoints/checkpoint-40/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0097250e892b6a1d91e010f5ba5c62475f15b8da
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc00b6a6d8199bbf2760e33d338d3a45c43d580cd6af6c3fed5f0a154bde627b
+size 1465
diff --git a/L4/checkpoints/checkpoint-40/tokenizer.json b/L4/checkpoints/checkpoint-40/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-40/tokenizer_config.json b/L4/checkpoints/checkpoint-40/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-40/trainer_state.json b/L4/checkpoints/checkpoint-40/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..93986af38305c992c2d006fe3e1cffe7511d54a3
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/trainer_state.json
@@ -0,0 +1,74 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.7407407407407407,
+ "eval_steps": 500,
+ "global_step": 40,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 444512901876480.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-40/training_args.bin b/L4/checkpoints/checkpoint-40/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-40/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-50/README.md b/L4/checkpoints/checkpoint-50/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-50/adapter_config.json b/L4/checkpoints/checkpoint-50/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-50/adapter_model.safetensors b/L4/checkpoints/checkpoint-50/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e103a050ff5a564dc63de6c0a464eed62cba0175
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b58d1d5e00957ca194e3accf9be8c094942f413ce6f76962ff74ff8eb8d6eb46
+size 4331744
diff --git a/L4/checkpoints/checkpoint-50/chat_template.jinja b/L4/checkpoints/checkpoint-50/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-50/optimizer.pt b/L4/checkpoints/checkpoint-50/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9aa29b49277a0bbf7d778eae3c270b46a2ad0e1a
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d501ad738bc4797b940df56c8206cb04d09199849191426dc583104e4099a11d
+size 8690571
diff --git a/L4/checkpoints/checkpoint-50/rng_state.pth b/L4/checkpoints/checkpoint-50/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..3f5c250a692e034a27fb06d7008612b2de078116
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cddf27219365242ec1046a3532a63a24c3f350c77f100e4f973369db2cc849d
+size 14455
diff --git a/L4/checkpoints/checkpoint-50/scheduler.pt b/L4/checkpoints/checkpoint-50/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..375808d427a9a21f9217eca89616de9c309b6e89
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec1dcbedbcf0b4599f2346cdfc96b22ba2645f688f008189d303ec40395acc90
+size 1465
diff --git a/L4/checkpoints/checkpoint-50/tokenizer.json b/L4/checkpoints/checkpoint-50/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-50/tokenizer_config.json b/L4/checkpoints/checkpoint-50/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-50/trainer_state.json b/L4/checkpoints/checkpoint-50/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9a27e3df4250a4fd75664a31a3315b8bff963d90
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/trainer_state.json
@@ -0,0 +1,84 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.9259259259259259,
+ "eval_steps": 500,
+ "global_step": 50,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 555166393197312.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-50/training_args.bin b/L4/checkpoints/checkpoint-50/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-50/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-60/README.md b/L4/checkpoints/checkpoint-60/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-60/adapter_config.json b/L4/checkpoints/checkpoint-60/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-60/adapter_model.safetensors b/L4/checkpoints/checkpoint-60/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..769f7b14ed37a237b64038e7bab6755f596c1c78
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb428abc6fd9525fd1f86f8ee875c73c9b5760d28e9a37362c42737074e8012
+size 4331744
diff --git a/L4/checkpoints/checkpoint-60/chat_template.jinja b/L4/checkpoints/checkpoint-60/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-60/optimizer.pt b/L4/checkpoints/checkpoint-60/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f2d324568a6e4dddb242866f19cb5265edd5309e
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8edf5e8f34700a6015be6d96f6b01d8d9ef122b51277b1c15e28b7333d10f5e
+size 8690571
diff --git a/L4/checkpoints/checkpoint-60/rng_state.pth b/L4/checkpoints/checkpoint-60/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..38c6d5df0a659b81300f358fefb31a6f4aa51ddc
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3a77d4a8b98ce027a4d6a3b9fb5d7c904e27ec1efd5c0468c24fa26bb738316
+size 14455
diff --git a/L4/checkpoints/checkpoint-60/scheduler.pt b/L4/checkpoints/checkpoint-60/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59d7559a992fff5dd55b92c68b901eff073642b8
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6cab9e2fecc75a5bc26958743288b3ba4789d2024ae8d62d1acddcf5d97a5f1
+size 1465
diff --git a/L4/checkpoints/checkpoint-60/tokenizer.json b/L4/checkpoints/checkpoint-60/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-60/tokenizer_config.json b/L4/checkpoints/checkpoint-60/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-60/trainer_state.json b/L4/checkpoints/checkpoint-60/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1789d895b73549a4479dc32e296b3053573d27a
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/trainer_state.json
@@ -0,0 +1,94 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.1111111111111112,
+ "eval_steps": 500,
+ "global_step": 60,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 665694087456768.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-60/training_args.bin b/L4/checkpoints/checkpoint-60/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-60/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-70/README.md b/L4/checkpoints/checkpoint-70/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-70/adapter_config.json b/L4/checkpoints/checkpoint-70/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-70/adapter_model.safetensors b/L4/checkpoints/checkpoint-70/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dc4d7e8ff5f6afb05c6079e60428c0dda183cd22
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5384b25c1e21d5e7cad66b60037feeb9b4b8a01ae8415e31b90bef3f0f72380e
+size 4331744
diff --git a/L4/checkpoints/checkpoint-70/chat_template.jinja b/L4/checkpoints/checkpoint-70/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-70/optimizer.pt b/L4/checkpoints/checkpoint-70/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f4d69c79ad0d6e01fdd4c0cb5e539021661545b
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74f194fcf3ea576728e2b0e614704f94b17a61aeb0a0b229620ebdc232608c10
+size 8690571
diff --git a/L4/checkpoints/checkpoint-70/rng_state.pth b/L4/checkpoints/checkpoint-70/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..38c6d5df0a659b81300f358fefb31a6f4aa51ddc
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3a77d4a8b98ce027a4d6a3b9fb5d7c904e27ec1efd5c0468c24fa26bb738316
+size 14455
diff --git a/L4/checkpoints/checkpoint-70/scheduler.pt b/L4/checkpoints/checkpoint-70/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04ed01711dc6ac12f9158019b65d3b3b1db334a1
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4fec5da0c908663a9570490a0ec9786d4e3de1f08a383516896e8d7b4da7771
+size 1465
diff --git a/L4/checkpoints/checkpoint-70/tokenizer.json b/L4/checkpoints/checkpoint-70/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-70/tokenizer_config.json b/L4/checkpoints/checkpoint-70/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-70/trainer_state.json b/L4/checkpoints/checkpoint-70/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..664fa0ce47c646c1f0c8db3a8aeae0de9710d169
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/trainer_state.json
@@ -0,0 +1,104 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.2962962962962963,
+ "eval_steps": 500,
+ "global_step": 70,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 776551250210304.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-70/training_args.bin b/L4/checkpoints/checkpoint-70/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-70/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-80/README.md b/L4/checkpoints/checkpoint-80/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-80/adapter_config.json b/L4/checkpoints/checkpoint-80/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-80/adapter_model.safetensors b/L4/checkpoints/checkpoint-80/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6bd62ce6f56f075e4b0ef71cfbbc123d035fbbc5
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be7d234376590959effa4598f424e62047e71c91351d05b6531a37c30936b144
+size 4331744
diff --git a/L4/checkpoints/checkpoint-80/chat_template.jinja b/L4/checkpoints/checkpoint-80/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-80/optimizer.pt b/L4/checkpoints/checkpoint-80/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ed42736c2b892c69cebb120e99c726153a90943
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c3869d6d27a0d9d5c2dd1cff42bd34f0bfc233638de0ebe9cf6886547ac4926
+size 8690571
diff --git a/L4/checkpoints/checkpoint-80/rng_state.pth b/L4/checkpoints/checkpoint-80/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..38c6d5df0a659b81300f358fefb31a6f4aa51ddc
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3a77d4a8b98ce027a4d6a3b9fb5d7c904e27ec1efd5c0468c24fa26bb738316
+size 14455
diff --git a/L4/checkpoints/checkpoint-80/scheduler.pt b/L4/checkpoints/checkpoint-80/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe794f0888812d598d672fa64f8064c68a37bb6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b4c6b6eb4e7d231a5eca7e5a68e6d253c96df73207d4507b2603f27ad0c167c
+size 1465
diff --git a/L4/checkpoints/checkpoint-80/tokenizer.json b/L4/checkpoints/checkpoint-80/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-80/tokenizer_config.json b/L4/checkpoints/checkpoint-80/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-80/trainer_state.json b/L4/checkpoints/checkpoint-80/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..1890fa408e0ace279904f96a743d35a5d93541a4
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/trainer_state.json
@@ -0,0 +1,114 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.4814814814814814,
+ "eval_steps": 500,
+ "global_step": 80,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 888450731472384.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-80/training_args.bin b/L4/checkpoints/checkpoint-80/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-80/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713
diff --git a/L4/checkpoints/checkpoint-90/README.md b/L4/checkpoints/checkpoint-90/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9b1d0d5c752a5c9f2e3a0e3a9ead3759650a12a9
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/README.md
@@ -0,0 +1,209 @@
+---
+base_model: Qwen/Qwen3.5-0.8B
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:Qwen/Qwen3.5-0.8B
+- lora
+- sft
+- transformers
+- trl
+---
+
+# Model Card for Model ID
+
+
+
+
+
+## Model Details
+
+### Model Description
+
+
+
+
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+
+
+### Direct Use
+
+
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+
+
+[More Information Needed]
+
+### Recommendations
+
+
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+
+
+[More Information Needed]
+
+### Training Procedure
+
+
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed]
+
+#### Speeds, Sizes, Times [optional]
+
+
+
+[More Information Needed]
+
+## Evaluation
+
+
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+
+
+[More Information Needed]
+
+#### Factors
+
+
+
+[More Information Needed]
+
+#### Metrics
+
+
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+
+
+[More Information Needed]
+
+## Environmental Impact
+
+
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
+### Framework versions
+
+- PEFT 0.18.2.dev0
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-90/adapter_config.json b/L4/checkpoints/checkpoint-90/adapter_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..466b55ef5352db25f2ac04e8aec438ce0764d848
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/adapter_config.json
@@ -0,0 +1,45 @@
+{
+ "alora_invocation_tokens": null,
+ "alpha_pattern": {},
+ "arrow_config": null,
+ "auto_mapping": null,
+ "base_model_name_or_path": "Qwen/Qwen3.5-0.8B",
+ "bias": "none",
+ "corda_config": null,
+ "ensure_weight_tying": false,
+ "eva_config": null,
+ "exclude_modules": null,
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 32,
+ "lora_bias": false,
+ "lora_dropout": 0.05,
+ "lora_ga_config": null,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "peft_version": "0.18.2.dev0@076214c61f690898509b97702b5e9d95c826f000",
+ "qalora_group_size": 16,
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "o_proj",
+ "q_proj",
+ "k_proj",
+ "v_proj"
+ ],
+ "target_parameters": null,
+ "task_type": "CAUSAL_LM",
+ "trainable_token_indices": null,
+ "use_bdlora": null,
+ "use_dora": false,
+ "use_qalora": false,
+ "use_rslora": false
+}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-90/adapter_model.safetensors b/L4/checkpoints/checkpoint-90/adapter_model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ffaec17fad167c9d1d5ccfb47023d460474ccaa5
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/adapter_model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3484bca1fac2cccf30eb0e275ca9e42755194a88838bf44370362a383351f661
+size 4331744
diff --git a/L4/checkpoints/checkpoint-90/chat_template.jinja b/L4/checkpoints/checkpoint-90/chat_template.jinja
new file mode 100644
index 0000000000000000000000000000000000000000..0ef09f214eaa6d9bca297988afc1454b5827b2c7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/chat_template.jinja
@@ -0,0 +1,154 @@
+{%- set image_count = namespace(value=0) %}
+{%- set video_count = namespace(value=0) %}
+{%- macro render_content(content, do_vision_count, is_system_content=false) %}
+ {%- if content is string %}
+ {{- content }}
+ {%- elif content is iterable and content is not mapping %}
+ {%- for item in content %}
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain images.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set image_count.value = image_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
+ {%- elif 'video' in item or item.type == 'video' %}
+ {%- if is_system_content %}
+ {{- raise_exception('System message cannot contain videos.') }}
+ {%- endif %}
+ {%- if do_vision_count %}
+ {%- set video_count.value = video_count.value + 1 %}
+ {%- endif %}
+ {%- if add_vision_id %}
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
+ {%- endif %}
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
+ {%- elif 'text' in item %}
+ {{- item.text }}
+ {%- else %}
+ {{- raise_exception('Unexpected item type in content.') }}
+ {%- endif %}
+ {%- endfor %}
+ {%- elif content is none or content is undefined %}
+ {{- '' }}
+ {%- else %}
+ {{- raise_exception('Unexpected content type.') }}
+ {%- endif %}
+{%- endmacro %}
+{%- if not messages %}
+ {{- raise_exception('No messages provided.') }}
+{%- endif %}
+{%- if tools and tools is iterable and tools is not mapping %}
+ {{- '<|im_start|>system\n' }}
+ {{- "# Tools\n\nYou have access to the following functions:\n\n" }}
+ {%- for tool in tools %}
+ {{- "\n" }}
+ {{- tool | tojson }}
+ {%- endfor %}
+ {{- "\n" }}
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n\n\n\nvalue_1\n\n\nThis is the value for the second parameter\nthat can span\nmultiple lines\n\n\n\n\n\nReminder:\n- Function calls MUST follow the specified format: an inner block must be nested within XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n' }}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {%- if content %}
+ {{- '\n\n' + content }}
+ {%- endif %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+{%- else %}
+ {%- if messages[0].role == 'system' %}
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
+ {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+ {%- set index = (messages|length - 1) - loop.index0 %}
+ {%- if ns.multi_step_tool and message.role == "user" %}
+ {%- set content = render_content(message.content, false)|trim %}
+ {%- if not(content.startswith('') and content.endswith('')) %}
+ {%- set ns.multi_step_tool = false %}
+ {%- set ns.last_query_index = index %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+{%- if ns.multi_step_tool %}
+ {{- raise_exception('No user query found in messages.') }}
+{%- endif %}
+{%- for message in messages %}
+ {%- set content = render_content(message.content, true)|trim %}
+ {%- if message.role == "system" %}
+ {%- if not loop.first %}
+ {{- raise_exception('System message must be at the beginning.') }}
+ {%- endif %}
+ {%- elif message.role == "user" %}
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+ {%- elif message.role == "assistant" %}
+ {%- set reasoning_content = '' %}
+ {%- if message.reasoning_content is string %}
+ {%- set reasoning_content = message.reasoning_content %}
+ {%- else %}
+ {%- if '' in content %}
+ {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %}
+ {%- set content = content.split('')[-1].lstrip('\n') %}
+ {%- endif %}
+ {%- endif %}
+ {%- set reasoning_content = reasoning_content|trim %}
+ {%- if loop.index0 > ns.last_query_index %}
+ {{- '<|im_start|>' + message.role + '\n\n' + reasoning_content + '\n\n\n' + content }}
+ {%- else %}
+ {{- '<|im_start|>' + message.role + '\n' + content }}
+ {%- endif %}
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
+ {%- for tool_call in message.tool_calls %}
+ {%- if tool_call.function is defined %}
+ {%- set tool_call = tool_call.function %}
+ {%- endif %}
+ {%- if loop.first %}
+ {%- if content|trim %}
+ {{- '\n\n\n\n' }}
+ {%- else %}
+ {{- '\n\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- '\n\n\n' }}
+ {%- endif %}
+ {%- if tool_call.arguments is defined %}
+ {%- for args_name, args_value in tool_call.arguments|items %}
+ {{- '\n' }}
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
+ {{- args_value }}
+ {{- '\n\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '\n' }}
+ {%- endfor %}
+ {%- endif %}
+ {{- '<|im_end|>\n' }}
+ {%- elif message.role == "tool" %}
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
+ {{- '<|im_start|>user' }}
+ {%- endif %}
+ {{- '\n\n' }}
+ {{- content }}
+ {{- '\n' }}
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
+ {{- '<|im_end|>\n' }}
+ {%- elif loop.last %}
+ {{- '<|im_end|>\n' }}
+ {%- endif %}
+ {%- else %}
+ {{- raise_exception('Unexpected message role.') }}
+ {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+ {{- '<|im_start|>assistant\n' }}
+ {%- if enable_thinking is defined and enable_thinking is true %}
+ {{- '\n' }}
+ {%- else %}
+ {{- '\n\n\n\n' }}
+ {%- endif %}
+{%- endif %}
\ No newline at end of file
diff --git a/L4/checkpoints/checkpoint-90/optimizer.pt b/L4/checkpoints/checkpoint-90/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3273db62a2c1d8f612d86a918eaae1f9b97cb8d
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc39eed17a74e1f30f49a30fb004b001e09b58543ce3d64646235952c35071f9
+size 8690571
diff --git a/L4/checkpoints/checkpoint-90/rng_state.pth b/L4/checkpoints/checkpoint-90/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..38c6d5df0a659b81300f358fefb31a6f4aa51ddc
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3a77d4a8b98ce027a4d6a3b9fb5d7c904e27ec1efd5c0468c24fa26bb738316
+size 14455
diff --git a/L4/checkpoints/checkpoint-90/scheduler.pt b/L4/checkpoints/checkpoint-90/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..614bedd8850562f22f826ac60b9d908118dd0f84
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8580178014146e80f74ca4586ad73479b8e268727b237e66a3c0cb367019c15
+size 1465
diff --git a/L4/checkpoints/checkpoint-90/tokenizer.json b/L4/checkpoints/checkpoint-90/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..67741b04f23bfdb46501f748ce27865ec82eccfb
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
+size 19989343
diff --git a/L4/checkpoints/checkpoint-90/tokenizer_config.json b/L4/checkpoints/checkpoint-90/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0871602fcb7d15fcb2979685b1c7b609eef01a6f
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/tokenizer_config.json
@@ -0,0 +1,32 @@
+{
+ "add_prefix_space": false,
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "backend": "tokenizers",
+ "bos_token": null,
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "errors": "replace",
+ "image_token": "<|image_pad|>",
+ "is_local": false,
+ "local_files_only": false,
+ "model_max_length": 512,
+ "model_specific_special_tokens": {
+ "audio_bos_token": "<|audio_start|>",
+ "audio_eos_token": "<|audio_end|>",
+ "audio_token": "<|audio_pad|>",
+ "image_token": "<|image_pad|>",
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+ },
+ "pad_token": "<|endoftext|>",
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
+ "split_special_tokens": false,
+ "tokenizer_class": "TokenizersBackend",
+ "unk_token": null,
+ "video_token": "<|video_pad|>",
+ "vision_bos_token": "<|vision_start|>",
+ "vision_eos_token": "<|vision_end|>"
+}
diff --git a/L4/checkpoints/checkpoint-90/trainer_state.json b/L4/checkpoints/checkpoint-90/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..fc72b8c7b73ab2d9d1f8a43e5f6f93a92f1f77d7
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/trainer_state.json
@@ -0,0 +1,124 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.6666666666666665,
+ "eval_steps": 500,
+ "global_step": 90,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "entropy": 2.4490234375,
+ "epoch": 0.18518518518518517,
+ "grad_norm": 1.605711817741394,
+ "learning_rate": 0.00019997919281892067,
+ "loss": 2.744666862487793,
+ "mean_token_accuracy": 0.4455879807472229,
+ "num_tokens": 35176.0,
+ "step": 10
+ },
+ {
+ "entropy": 2.14208984375,
+ "epoch": 0.37037037037037035,
+ "grad_norm": 1.9346004724502563,
+ "learning_rate": 0.00019749279121818235,
+ "loss": 1.9460367202758788,
+ "mean_token_accuracy": 0.5691858988255263,
+ "num_tokens": 70631.0,
+ "step": 20
+ },
+ {
+ "entropy": 1.50927734375,
+ "epoch": 0.5555555555555556,
+ "grad_norm": 0.859683096408844,
+ "learning_rate": 0.00019096319953545185,
+ "loss": 1.5292683601379395,
+ "mean_token_accuracy": 0.6657284118235112,
+ "num_tokens": 106615.0,
+ "step": 30
+ },
+ {
+ "entropy": 1.45556640625,
+ "epoch": 0.7407407407407407,
+ "grad_norm": 0.7568148970603943,
+ "learning_rate": 0.000180661210923753,
+ "loss": 1.4439837455749511,
+ "mean_token_accuracy": 0.6813905350863934,
+ "num_tokens": 142274.0,
+ "step": 40
+ },
+ {
+ "entropy": 1.4376953125,
+ "epoch": 0.9259259259259259,
+ "grad_norm": 0.7695605158805847,
+ "learning_rate": 0.00016701406618375596,
+ "loss": 1.434541606903076,
+ "mean_token_accuracy": 0.6835584975779057,
+ "num_tokens": 177779.0,
+ "step": 50
+ },
+ {
+ "entropy": 1.4240234375,
+ "epoch": 1.1111111111111112,
+ "grad_norm": 0.7485360503196716,
+ "learning_rate": 0.00015058773536894685,
+ "loss": 1.398463821411133,
+ "mean_token_accuracy": 0.6880027234554291,
+ "num_tokens": 213250.0,
+ "step": 60
+ },
+ {
+ "entropy": 1.3787109375,
+ "epoch": 1.2962962962962963,
+ "grad_norm": 0.8382265567779541,
+ "learning_rate": 0.00013206344605527355,
+ "loss": 1.3820528030395507,
+ "mean_token_accuracy": 0.6910028986632824,
+ "num_tokens": 249008.0,
+ "step": 70
+ },
+ {
+ "entropy": 1.39111328125,
+ "epoch": 1.4814814814814814,
+ "grad_norm": 0.8756011128425598,
+ "learning_rate": 0.000112209431687416,
+ "loss": 1.3737930297851562,
+ "mean_token_accuracy": 0.6952961266040802,
+ "num_tokens": 284762.0,
+ "step": 80
+ },
+ {
+ "entropy": 1.3533203125,
+ "epoch": 1.6666666666666665,
+ "grad_norm": 0.8062230348587036,
+ "learning_rate": 9.184907164529368e-05,
+ "loss": 1.3459887504577637,
+ "mean_token_accuracy": 0.6956925392150879,
+ "num_tokens": 320323.0,
+ "step": 90
+ }
+ ],
+ "logging_steps": 10,
+ "max_steps": 162,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 10,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 998427314796288.0,
+ "train_batch_size": 2,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/L4/checkpoints/checkpoint-90/training_args.bin b/L4/checkpoints/checkpoint-90/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8b622570d0df6ed776df3bfb03f8bffe68ed2d08
--- /dev/null
+++ b/L4/checkpoints/checkpoint-90/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf5d3b2330261292cc9f0796ce05c6330f2f9e19b5fd0aaf9becd062bfbbb1e4
+size 5713