manangarg21 commited on Nov 11, 2025

Commit

0b69c11

0 Parent(s):

Fresh clean commit

Files changed (45) hide show

.gitattributes +36 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/README.md +207 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/adapter_config.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/adapter_model.safetensors +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/chat_template.jinja +93 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/optimizer.pt +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/rng_state.pth +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/scheduler.pt +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/special_tokens_map.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/tokenizer.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/tokenizer_config.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/trainer_state.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25000/training_args.bin +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/README.md +207 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/adapter_config.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/adapter_model.safetensors +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/chat_template.jinja +93 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/optimizer.pt +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/rng_state.pth +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/scheduler.pt +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/special_tokens_map.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/tokenizer.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/tokenizer_config.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/trainer_state.json +3 -0
checkpoints/llama1b_lora_xsum/checkpoint-25506/training_args.bin +3 -0
checkpoints/moe_hash/best.pt +3 -0
checkpoints/moe_hash_lb0/best.pt +3 -0
checkpoints/moe_hash_lb1/best.pt +3 -0
checkpoints/moe_hash_lb5/best.pt +3 -0
checkpoints/moe_hash_lib/best.pt +3 -0
checkpoints/moe_hash_lib0/best.pt +3 -0
checkpoints/moe_topk/best.pt +3 -0
checkpoints/moe_topk_gqa/best.pt +3 -0
checkpoints/moe_topk_lb0/best.pt +3 -0
checkpoints/moe_topk_lb1/best.pt +3 -0
checkpoints/moe_topk_lb5/best.pt +3 -0
checkpoints/moe_topk_lib/best.pt +3 -0
checkpoints/moe_topk_lib0/best.pt +3 -0
checkpoints/t5/best_checkpoint/config.json +3 -0
checkpoints/t5/best_checkpoint/generation_config.json +3 -0
checkpoints/t5/best_checkpoint/model.safetensors +3 -0
checkpoints/t5/best_checkpoint/special_tokens_map.json +3 -0
checkpoints/t5/best_checkpoint/spiece.model +3 -0
checkpoints/t5/best_checkpoint/tokenizer.json +3 -0
checkpoints/t5/best_checkpoint/tokenizer_config.json +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

checkpoints/llama1b_lora_xsum/checkpoint-25000/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: meta-llama/Llama-3.2-1B-Instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:meta-llama/Llama-3.2-1B-Instruct
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

checkpoints/llama1b_lora_xsum/checkpoint-25000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af3b9aeaf296877daf719fdac76e44ae5b47bb2fe73db0df69a760395a136fdf
+size 867

checkpoints/llama1b_lora_xsum/checkpoint-25000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17f47b7f216865635c0d7bafc685965c0c17794fc0f2af530ec3c7dfb172bb98
+size 6824216

checkpoints/llama1b_lora_xsum/checkpoint-25000/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,93 @@

+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+{#- System message #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+        {{- '{"name": "' + tool_call.name + '", ' }}
+        {{- '"parameters": ' }}
+        {{- tool_call.arguments | tojson }}
+        {{- "}" }}
+        {{- "<|eot_id|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

checkpoints/llama1b_lora_xsum/checkpoint-25000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4129f7a43427e2bee6677475fc91c8062feae90a9e4c0b61fd41c093e0d8fcc
+size 13686923

checkpoints/llama1b_lora_xsum/checkpoint-25000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d1b7587852ce2f413b4a87da3c110ff49ca9cc31e15145fede5f42e5dac3519
+size 14645

checkpoints/llama1b_lora_xsum/checkpoint-25000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8920678204ea3eb3d5a7a868a25e3010ced580bf43599f6b53b617e897da9d7
+size 1465

checkpoints/llama1b_lora_xsum/checkpoint-25000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b1835caa5b4d70acaa210fa222b0036f1882f9525c4660fd4810fb3e1e40ff8
+size 325

checkpoints/llama1b_lora_xsum/checkpoint-25000/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:302266c6120b28bd6e14070a550e50139e606b66af3ca76fa84196e0e5fe0ef7
+size 17210183

checkpoints/llama1b_lora_xsum/checkpoint-25000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6c116761af9f371c68fd4a2da6e3cf6b3a4b41148f38eca4047aecfccdb6403
+size 50554

checkpoints/llama1b_lora_xsum/checkpoint-25000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c91d62b0d399a63bd004a6402b25fe0f54b17021666394c4752d650e85343643
+size 50015

checkpoints/llama1b_lora_xsum/checkpoint-25000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8035cdaa5a1d5ce7860f7e8d9c7307bca25c92618146a549385b0d546a0bda51
+size 5841

checkpoints/llama1b_lora_xsum/checkpoint-25506/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: meta-llama/Llama-3.2-1B-Instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:meta-llama/Llama-3.2-1B-Instruct
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

checkpoints/llama1b_lora_xsum/checkpoint-25506/adapter_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af3b9aeaf296877daf719fdac76e44ae5b47bb2fe73db0df69a760395a136fdf
+size 867

checkpoints/llama1b_lora_xsum/checkpoint-25506/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:357e09d0823d0004024267db5ccbd7489265ed03b9577293d76dad7f8121e4ae
+size 6824216

checkpoints/llama1b_lora_xsum/checkpoint-25506/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,93 @@

+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not date_string is defined %}
+    {%- if strftime_now is defined %}
+        {%- set date_string = strftime_now("%d %b %Y") %}
+    {%- else %}
+        {%- set date_string = "26 Jul 2024" %}
+    {%- endif %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content']|trim %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- set system_message = "" %}
+{%- endif %}
+{#- System message #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if tools is not none %}
+    {{- "Environment: ipython\n" }}
+{%- endif %}
+{{- "Cutting Knowledge Date: December 2023\n" }}
+{{- "Today Date: " + date_string + "\n\n" }}
+{%- if tools is not none and not tools_in_user_message %}
+    {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- system_message }}
+{{- "<|eot_id|>" }}
+{#- Custom tools are passed in a user message with some extra guidance #}
+{%- if tools_in_user_message and not tools is none %}
+    {#- Extract the first user message so we can plug it in here #}
+    {%- if messages | length != 0 %}
+        {%- set first_user_message = messages[0]['content']|trim %}
+        {%- set messages = messages[1:] %}
+    {%- else %}
+        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
+{%- endif %}
+    {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}}
+    {{- "Given the following functions, please respond with a JSON for a function call " }}
+    {{- "with its proper arguments that best answers the given prompt.\n\n" }}
+    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
+    {{- "Do not use variables.\n\n" }}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+    {{- first_user_message + "<|eot_id|>"}}
+{%- endif %}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {%- if not message.tool_calls|length == 1 %}
+            {{- raise_exception("This model only supports single tool-calls at once!") }}
+        {%- endif %}
+        {%- set tool_call = message.tool_calls[0].function %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+        {{- '{"name": "' + tool_call.name + '", ' }}
+        {{- '"parameters": ' }}
+        {{- tool_call.arguments | tojson }}
+        {{- "}" }}
+        {{- "<|eot_id|>" }}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }}
+        {%- if message.content is mapping or message.content is iterable %}
+            {{- message.content | tojson }}
+        {%- else %}
+            {{- message.content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}

checkpoints/llama1b_lora_xsum/checkpoint-25506/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57b7221b5d6a36d2fdb1504084b4714a5974a958339307056d797b7b5721b048
+size 13686923

checkpoints/llama1b_lora_xsum/checkpoint-25506/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f0064ed6987e07af7db7d450bfa85a2d27838a32905897a4209a6d89f01b9a1
+size 14645

checkpoints/llama1b_lora_xsum/checkpoint-25506/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb95bcfa56647047edfca19628d77e5661dee680e39a5644a414210adf0c8032
+size 1465

checkpoints/llama1b_lora_xsum/checkpoint-25506/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b1835caa5b4d70acaa210fa222b0036f1882f9525c4660fd4810fb3e1e40ff8
+size 325

checkpoints/llama1b_lora_xsum/checkpoint-25506/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:302266c6120b28bd6e14070a550e50139e606b66af3ca76fa84196e0e5fe0ef7
+size 17210183

checkpoints/llama1b_lora_xsum/checkpoint-25506/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6c116761af9f371c68fd4a2da6e3cf6b3a4b41148f38eca4047aecfccdb6403
+size 50554

checkpoints/llama1b_lora_xsum/checkpoint-25506/trainer_state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ed5669d349d910f4b448ed47e05a02081b7fc0190f15dd66f3693656e2ab0bc
+size 50878

checkpoints/llama1b_lora_xsum/checkpoint-25506/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8035cdaa5a1d5ce7860f7e8d9c7307bca25c92618146a549385b0d546a0bda51
+size 5841

checkpoints/moe_hash/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad527c5d5409a27552a4edafc30008e347368a1ef93c77c190b1d90af79350e1
+size 134417696

checkpoints/moe_hash_lb0/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:192353326072cea85671a7755e7c1634dece651baf1b2698e1d072a63482a02d
+size 134417696

checkpoints/moe_hash_lb1/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9939307f957393c6f1e97a96ae1c582323d7c6be68b2209bd034b673ed70eedd
+size 126000315

checkpoints/moe_hash_lb5/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c6a378f351a8d5b41220867ebe1a4a548e0776824ff8ecb16afc0047192a624
+size 134417696

checkpoints/moe_hash_lib/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f96448e08f276989d9d09404e3daa50e6ec038f38868f18cdd18995131f1c745
+size 126001140

checkpoints/moe_hash_lib0/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45604e913c08f49fb4988b0ce2483af3e38332a3ab89a65d6cfe5f056a2f7264
+size 126001531

checkpoints/moe_topk/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98f6e9f326d876b68fef40187d3f622b8d6f93a8d820f625dda1b36952e1a48f
+size 134417696

checkpoints/moe_topk_gqa/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:760e08701f8ada0655422a47dd127781ab284d23f673ee4e6ec2c63139a06ba5
+size 125972563

checkpoints/moe_topk_lb0/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2df9cda98688543182ed593769dcbb7c8f4261286563f43812f9b8c051ff63e2
+size 134417696

checkpoints/moe_topk_lb1/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:662344702f1fef3f22bb09d5b72afcbbe22c76a5ddeebfc7e42b9b47e8598d27
+size 126000315

checkpoints/moe_topk_lb5/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef17df5669662b3e6ae6d6db59798cf03d57e279e778e96eee0385ef2db2721e
+size 134417696

checkpoints/moe_topk_lib/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77b519116f622b478b4fb7f540722b55bf220bb12d214b2725eaa3aa8be23759
+size 126001204

checkpoints/moe_topk_lib0/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd9d812a84149a8332fb9231a9510423245cec6b7ae78f22272932403f3deba
+size 126001595

checkpoints/t5/best_checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:630bbd23659a0af5d599afdf085a48eac0b2aeb8200a18dc6089c5245d8363a2
+size 1468

checkpoints/t5/best_checkpoint/generation_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65ead0a1ee5fe9425478f2a8d37380fbb5e325247763bbf60d7e2cb176278dff
+size 152

checkpoints/t5/best_checkpoint/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e0178c1fa824641273c415c6e7154f2df42f2f1f992affdb1111741a308e273
+size 891644712

checkpoints/t5/best_checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c87151ef0f72a99d1f766a4c418bd2a1f90aaa30a8e22fe5eca9641daebb64f
+size 2201

checkpoints/t5/best_checkpoint/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

checkpoints/t5/best_checkpoint/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71c33c3bf57bf2e19b4f8ac686ad8d182a79f7cd1a4921889437c453420223f1
+size 2422169

checkpoints/t5/best_checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2632d1edd2266f6ca61b44dd2a5b74492fa009b9c793a294a80b724aef34ba3b
+size 20833