diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..ccc56b414ff66f28504433a882056d14eb84a225 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-10/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-20/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-30/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-39/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 7b95401dc46245ac339fc25059d4a56d90b4cde5..90cebac5016e59314ff6eceb1568810b3ce23db3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,206 @@ ---- -license: apache-2.0 ---- +--- +library_name: transformers +base_model: huihui-ai/DeepSeek-R1-0528-Qwen3-8B-abliterated +tags: +- tax +- uncensored +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ae242b6375c320cf55807a55fd9d5f28b34f69a --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/teamspace/studios/this_studio/downloaded_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "v_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45e5e582c452ddbab2e6d2f94b52fe86f2a8c8b2 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5c7e9c9c8695a44afbdc3b0040155b29ca8619f91105529499847f0a4d4cacc +size 174655536 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..7641578becb325296855fbde0f59778cae094eee --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,14 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' + +' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{' +' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{' +<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %} \ No newline at end of file diff --git a/checkpoint-10/README.md b/checkpoint-10/README.md new file mode 100644 index 0000000000000000000000000000000000000000..68d364589683719ecbfba33d66a08d48cda71653 --- /dev/null +++ b/checkpoint-10/README.md @@ -0,0 +1,207 @@ +--- +base_model: /teamspace/studios/this_studio/downloaded_model +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/teamspace/studios/this_studio/downloaded_model +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-10/adapter_config.json b/checkpoint-10/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ae242b6375c320cf55807a55fd9d5f28b34f69a --- /dev/null +++ b/checkpoint-10/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/teamspace/studios/this_studio/downloaded_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "v_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-10/adapter_model.safetensors b/checkpoint-10/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddceaaaf3f1a7c64dd48f0b27a795194191ae033 --- /dev/null +++ b/checkpoint-10/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc242c730da0e0ac9cd2ee04ed1ffb65c2595db506170a816ae7680d4737bdd +size 174655536 diff --git a/checkpoint-10/chat_template.jinja b/checkpoint-10/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..7641578becb325296855fbde0f59778cae094eee --- /dev/null +++ b/checkpoint-10/chat_template.jinja @@ -0,0 +1,14 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' + +' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{' +' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{' +<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %} \ No newline at end of file diff --git a/checkpoint-10/optimizer.pt b/checkpoint-10/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..98b25f4ac8051a19fe0da5daab536c917a3a5d5a --- /dev/null +++ b/checkpoint-10/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181d3c2c7719c66d9c650e7dbb36a943fcfa1657ce0310f73ad81f88a416ed76 +size 349601867 diff --git a/checkpoint-10/rng_state.pth b/checkpoint-10/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7bda75f1cbaf6800a7f7e3d6767754462c52ea2f --- /dev/null +++ b/checkpoint-10/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3299332e06897a5d130dc4e60554e954a1b32bc67cc33ca9f8fccc1a2d7d7cad +size 14645 diff --git a/checkpoint-10/scheduler.pt b/checkpoint-10/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f152afa4f63168d177c80a703e4542662f88350 --- /dev/null +++ b/checkpoint-10/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9372835a88ced335eeb8fcf3b294068923a02425bf72fc63f0b4be103e404bf2 +size 1465 diff --git a/checkpoint-10/special_tokens_map.json b/checkpoint-10/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/checkpoint-10/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-10/tokenizer.json b/checkpoint-10/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..d72250588582c7daae9ae9219ea47f28aff86443 --- /dev/null +++ b/checkpoint-10/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf896220929e8f6f523e88395f3fcd58ba0feef43b4f625af5b187a4caf4a629 +size 11423445 diff --git a/checkpoint-10/tokenizer_config.json b/checkpoint-10/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4657dc1d512391538fdb16dd905945ad376062f --- /dev/null +++ b/checkpoint-10/tokenizer_config.json @@ -0,0 +1,242 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151669": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151670": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 131072, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/checkpoint-10/trainer_state.json b/checkpoint-10/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0abb57a1df9f162707d8f43b6c65bd54d2b680c9 --- /dev/null +++ b/checkpoint-10/trainer_state.json @@ -0,0 +1,104 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7692307692307693, + "eval_steps": 500, + "global_step": 10, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07692307692307693, + "grad_norm": 1.0085614919662476, + "learning_rate": 0.0002, + "loss": 2.3547, + "step": 1 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 1.6940793991088867, + "learning_rate": 0.00019487179487179487, + "loss": 3.3236, + "step": 2 + }, + { + "epoch": 0.23076923076923078, + "grad_norm": 1.1689488887786865, + "learning_rate": 0.00018974358974358974, + "loss": 2.6049, + "step": 3 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 1.2771538496017456, + "learning_rate": 0.00018461538461538463, + "loss": 2.7969, + "step": 4 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 1.1572215557098389, + "learning_rate": 0.0001794871794871795, + "loss": 2.3184, + "step": 5 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 1.1674821376800537, + "learning_rate": 0.00017435897435897436, + "loss": 2.2023, + "step": 6 + }, + { + "epoch": 0.5384615384615384, + "grad_norm": 0.7206246256828308, + "learning_rate": 0.00016923076923076923, + "loss": 1.6198, + "step": 7 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.7825830578804016, + "learning_rate": 0.0001641025641025641, + "loss": 2.1556, + "step": 8 + }, + { + "epoch": 0.6923076923076923, + "grad_norm": 0.8774120211601257, + "learning_rate": 0.00015897435897435896, + "loss": 1.5896, + "step": 9 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.3417644500732422, + "learning_rate": 0.00015384615384615385, + "loss": 2.2701, + "step": 10 + } + ], + "logging_steps": 1, + "max_steps": 39, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 656722215659520.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-10/training_args.bin b/checkpoint-10/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..069e5cda3eabb1a48d3041ea8d00b7448f1d3950 --- /dev/null +++ b/checkpoint-10/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38864ac515eca7301f4fd6b0dd075ff6e07668a525c9e5d1e663f0d38b4a722a +size 5777 diff --git a/checkpoint-20/README.md b/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..68d364589683719ecbfba33d66a08d48cda71653 --- /dev/null +++ b/checkpoint-20/README.md @@ -0,0 +1,207 @@ +--- +base_model: /teamspace/studios/this_studio/downloaded_model +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/teamspace/studios/this_studio/downloaded_model +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-20/adapter_config.json b/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ae242b6375c320cf55807a55fd9d5f28b34f69a --- /dev/null +++ b/checkpoint-20/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/teamspace/studios/this_studio/downloaded_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "v_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-20/adapter_model.safetensors b/checkpoint-20/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a67b5db8bc895fcef1e5c3f35f674be9072b1371 --- /dev/null +++ b/checkpoint-20/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d781b84b7cd8ed9a1bbfb50552dcb48bbe2223a08873fb6cbe099f949959967 +size 174655536 diff --git a/checkpoint-20/chat_template.jinja b/checkpoint-20/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..7641578becb325296855fbde0f59778cae094eee --- /dev/null +++ b/checkpoint-20/chat_template.jinja @@ -0,0 +1,14 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' + +' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{' +' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{' +<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %} \ No newline at end of file diff --git a/checkpoint-20/optimizer.pt b/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a27813b2caa08b465f75357f36735fcecd35d60 --- /dev/null +++ b/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3700aa43bc44319da2538996ea00772b3f1ca57724691d7f5ce4aef6cb8c347 +size 349601867 diff --git a/checkpoint-20/rng_state.pth b/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..88a43f6e4d846ed1fa238a8967bb9183a359370b --- /dev/null +++ b/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2e4fc4bfa56cdf8fda398b74adc0eba351bfaa67aa0ba1eeff20f8c7a79aff +size 14645 diff --git a/checkpoint-20/scheduler.pt b/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36f29712797698180c961889d1ff5e1bc66803b6 --- /dev/null +++ b/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8ccc86b6057657eec43b70c835197dd544311bb23c4ac822f316b92fd9bfe7c +size 1465 diff --git a/checkpoint-20/special_tokens_map.json b/checkpoint-20/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/checkpoint-20/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-20/tokenizer.json b/checkpoint-20/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..d72250588582c7daae9ae9219ea47f28aff86443 --- /dev/null +++ b/checkpoint-20/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf896220929e8f6f523e88395f3fcd58ba0feef43b4f625af5b187a4caf4a629 +size 11423445 diff --git a/checkpoint-20/tokenizer_config.json b/checkpoint-20/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4657dc1d512391538fdb16dd905945ad376062f --- /dev/null +++ b/checkpoint-20/tokenizer_config.json @@ -0,0 +1,242 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151669": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151670": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 131072, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/checkpoint-20/trainer_state.json b/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b2525a20884787175ebb1ddb7ae8c40c48bed1ba --- /dev/null +++ b/checkpoint-20/trainer_state.json @@ -0,0 +1,174 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5384615384615383, + "eval_steps": 500, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07692307692307693, + "grad_norm": 1.0085614919662476, + "learning_rate": 0.0002, + "loss": 2.3547, + "step": 1 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 1.6940793991088867, + "learning_rate": 0.00019487179487179487, + "loss": 3.3236, + "step": 2 + }, + { + "epoch": 0.23076923076923078, + "grad_norm": 1.1689488887786865, + "learning_rate": 0.00018974358974358974, + "loss": 2.6049, + "step": 3 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 1.2771538496017456, + "learning_rate": 0.00018461538461538463, + "loss": 2.7969, + "step": 4 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 1.1572215557098389, + "learning_rate": 0.0001794871794871795, + "loss": 2.3184, + "step": 5 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 1.1674821376800537, + "learning_rate": 0.00017435897435897436, + "loss": 2.2023, + "step": 6 + }, + { + "epoch": 0.5384615384615384, + "grad_norm": 0.7206246256828308, + "learning_rate": 0.00016923076923076923, + "loss": 1.6198, + "step": 7 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.7825830578804016, + "learning_rate": 0.0001641025641025641, + "loss": 2.1556, + "step": 8 + }, + { + "epoch": 0.6923076923076923, + "grad_norm": 0.8774120211601257, + "learning_rate": 0.00015897435897435896, + "loss": 1.5896, + "step": 9 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.3417644500732422, + "learning_rate": 0.00015384615384615385, + "loss": 2.2701, + "step": 10 + }, + { + "epoch": 0.8461538461538461, + "grad_norm": 1.579359769821167, + "learning_rate": 0.00014871794871794872, + "loss": 1.9842, + "step": 11 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.8545724153518677, + "learning_rate": 0.0001435897435897436, + "loss": 2.537, + "step": 12 + }, + { + "epoch": 1.0, + "grad_norm": 0.9196115732192993, + "learning_rate": 0.00013846153846153847, + "loss": 2.011, + "step": 13 + }, + { + "epoch": 1.0769230769230769, + "grad_norm": 0.692592203617096, + "learning_rate": 0.00013333333333333334, + "loss": 1.6603, + "step": 14 + }, + { + "epoch": 1.1538461538461537, + "grad_norm": 1.2092429399490356, + "learning_rate": 0.00012820512820512823, + "loss": 1.7742, + "step": 15 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 1.0968248844146729, + "learning_rate": 0.0001230769230769231, + "loss": 2.1299, + "step": 16 + }, + { + "epoch": 1.3076923076923077, + "grad_norm": 1.4401835203170776, + "learning_rate": 0.00011794871794871796, + "loss": 1.6406, + "step": 17 + }, + { + "epoch": 1.3846153846153846, + "grad_norm": 0.9979494214057922, + "learning_rate": 0.00011282051282051283, + "loss": 1.541, + "step": 18 + }, + { + "epoch": 1.4615384615384617, + "grad_norm": 0.7204625606536865, + "learning_rate": 0.0001076923076923077, + "loss": 1.2973, + "step": 19 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 1.319799542427063, + "learning_rate": 0.00010256410256410256, + "loss": 1.2497, + "step": 20 + } + ], + "logging_steps": 1, + "max_steps": 39, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1250462309099520.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20/training_args.bin b/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..069e5cda3eabb1a48d3041ea8d00b7448f1d3950 --- /dev/null +++ b/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38864ac515eca7301f4fd6b0dd075ff6e07668a525c9e5d1e663f0d38b4a722a +size 5777 diff --git a/checkpoint-30/README.md b/checkpoint-30/README.md new file mode 100644 index 0000000000000000000000000000000000000000..68d364589683719ecbfba33d66a08d48cda71653 --- /dev/null +++ b/checkpoint-30/README.md @@ -0,0 +1,207 @@ +--- +base_model: /teamspace/studios/this_studio/downloaded_model +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/teamspace/studios/this_studio/downloaded_model +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-30/adapter_config.json b/checkpoint-30/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ae242b6375c320cf55807a55fd9d5f28b34f69a --- /dev/null +++ b/checkpoint-30/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/teamspace/studios/this_studio/downloaded_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "v_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-30/adapter_model.safetensors b/checkpoint-30/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2aa14fbbf619c34682d8a5494da58dc080d5f06 --- /dev/null +++ b/checkpoint-30/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28e64cf62697acbca5f38b7ca2eedc28a40f4c1294b27446dddfff0a5b3281f +size 174655536 diff --git a/checkpoint-30/chat_template.jinja b/checkpoint-30/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..7641578becb325296855fbde0f59778cae094eee --- /dev/null +++ b/checkpoint-30/chat_template.jinja @@ -0,0 +1,14 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' + +' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{' +' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{' +<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %} \ No newline at end of file diff --git a/checkpoint-30/optimizer.pt b/checkpoint-30/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6e43e4a877c9abfff584d5166b2e9e8c31bcb79 --- /dev/null +++ b/checkpoint-30/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f0fe83885301c2e0c2123563a6053fe971cebbb62e0b3d4429339516a8df826 +size 349601867 diff --git a/checkpoint-30/rng_state.pth b/checkpoint-30/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1c50ad02f232631db9aa9c8bdb3f714c60f57b3 --- /dev/null +++ b/checkpoint-30/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1106f20dc49cdc95ed95423913c1e9c645bbfa8ae43a1ef0e2b681606caad5 +size 14645 diff --git a/checkpoint-30/scheduler.pt b/checkpoint-30/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38f03cecb839ae3bdb59c874ac12f6860b5e60df --- /dev/null +++ b/checkpoint-30/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0734212d308ae669cc7fcde53405cd718f2b1b3fbdeec39663779ad52a5fdfbc +size 1465 diff --git a/checkpoint-30/special_tokens_map.json b/checkpoint-30/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/checkpoint-30/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-30/tokenizer.json b/checkpoint-30/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..d72250588582c7daae9ae9219ea47f28aff86443 --- /dev/null +++ b/checkpoint-30/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf896220929e8f6f523e88395f3fcd58ba0feef43b4f625af5b187a4caf4a629 +size 11423445 diff --git a/checkpoint-30/tokenizer_config.json b/checkpoint-30/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4657dc1d512391538fdb16dd905945ad376062f --- /dev/null +++ b/checkpoint-30/tokenizer_config.json @@ -0,0 +1,242 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151669": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151670": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 131072, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/checkpoint-30/trainer_state.json b/checkpoint-30/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..10846cfcffbcd34a8cb9c85d0bc7556aa52f3734 --- /dev/null +++ b/checkpoint-30/trainer_state.json @@ -0,0 +1,244 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.3076923076923075, + "eval_steps": 500, + "global_step": 30, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07692307692307693, + "grad_norm": 1.0085614919662476, + "learning_rate": 0.0002, + "loss": 2.3547, + "step": 1 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 1.6940793991088867, + "learning_rate": 0.00019487179487179487, + "loss": 3.3236, + "step": 2 + }, + { + "epoch": 0.23076923076923078, + "grad_norm": 1.1689488887786865, + "learning_rate": 0.00018974358974358974, + "loss": 2.6049, + "step": 3 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 1.2771538496017456, + "learning_rate": 0.00018461538461538463, + "loss": 2.7969, + "step": 4 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 1.1572215557098389, + "learning_rate": 0.0001794871794871795, + "loss": 2.3184, + "step": 5 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 1.1674821376800537, + "learning_rate": 0.00017435897435897436, + "loss": 2.2023, + "step": 6 + }, + { + "epoch": 0.5384615384615384, + "grad_norm": 0.7206246256828308, + "learning_rate": 0.00016923076923076923, + "loss": 1.6198, + "step": 7 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.7825830578804016, + "learning_rate": 0.0001641025641025641, + "loss": 2.1556, + "step": 8 + }, + { + "epoch": 0.6923076923076923, + "grad_norm": 0.8774120211601257, + "learning_rate": 0.00015897435897435896, + "loss": 1.5896, + "step": 9 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.3417644500732422, + "learning_rate": 0.00015384615384615385, + "loss": 2.2701, + "step": 10 + }, + { + "epoch": 0.8461538461538461, + "grad_norm": 1.579359769821167, + "learning_rate": 0.00014871794871794872, + "loss": 1.9842, + "step": 11 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.8545724153518677, + "learning_rate": 0.0001435897435897436, + "loss": 2.537, + "step": 12 + }, + { + "epoch": 1.0, + "grad_norm": 0.9196115732192993, + "learning_rate": 0.00013846153846153847, + "loss": 2.011, + "step": 13 + }, + { + "epoch": 1.0769230769230769, + "grad_norm": 0.692592203617096, + "learning_rate": 0.00013333333333333334, + "loss": 1.6603, + "step": 14 + }, + { + "epoch": 1.1538461538461537, + "grad_norm": 1.2092429399490356, + "learning_rate": 0.00012820512820512823, + "loss": 1.7742, + "step": 15 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 1.0968248844146729, + "learning_rate": 0.0001230769230769231, + "loss": 2.1299, + "step": 16 + }, + { + "epoch": 1.3076923076923077, + "grad_norm": 1.4401835203170776, + "learning_rate": 0.00011794871794871796, + "loss": 1.6406, + "step": 17 + }, + { + "epoch": 1.3846153846153846, + "grad_norm": 0.9979494214057922, + "learning_rate": 0.00011282051282051283, + "loss": 1.541, + "step": 18 + }, + { + "epoch": 1.4615384615384617, + "grad_norm": 0.7204625606536865, + "learning_rate": 0.0001076923076923077, + "loss": 1.2973, + "step": 19 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 1.319799542427063, + "learning_rate": 0.00010256410256410256, + "loss": 1.2497, + "step": 20 + }, + { + "epoch": 1.6153846153846154, + "grad_norm": 0.9326438903808594, + "learning_rate": 9.743589743589744e-05, + "loss": 2.0785, + "step": 21 + }, + { + "epoch": 1.6923076923076923, + "grad_norm": 1.1567381620407104, + "learning_rate": 9.230769230769232e-05, + "loss": 1.7149, + "step": 22 + }, + { + "epoch": 1.7692307692307692, + "grad_norm": 1.8623816967010498, + "learning_rate": 8.717948717948718e-05, + "loss": 2.0036, + "step": 23 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 0.866828978061676, + "learning_rate": 8.205128205128205e-05, + "loss": 1.6695, + "step": 24 + }, + { + "epoch": 1.9230769230769231, + "grad_norm": 0.9064734578132629, + "learning_rate": 7.692307692307693e-05, + "loss": 1.5794, + "step": 25 + }, + { + "epoch": 2.0, + "grad_norm": 0.6402365565299988, + "learning_rate": 7.17948717948718e-05, + "loss": 1.3953, + "step": 26 + }, + { + "epoch": 2.076923076923077, + "grad_norm": 0.7920997738838196, + "learning_rate": 6.666666666666667e-05, + "loss": 1.5943, + "step": 27 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 0.8890045881271362, + "learning_rate": 6.153846153846155e-05, + "loss": 1.4789, + "step": 28 + }, + { + "epoch": 2.230769230769231, + "grad_norm": 0.6904930472373962, + "learning_rate": 5.6410256410256414e-05, + "loss": 1.2727, + "step": 29 + }, + { + "epoch": 2.3076923076923075, + "grad_norm": 1.281782627105713, + "learning_rate": 5.128205128205128e-05, + "loss": 0.9979, + "step": 30 + } + ], + "logging_steps": 1, + "max_steps": 39, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1993687889141760.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-30/training_args.bin b/checkpoint-30/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..069e5cda3eabb1a48d3041ea8d00b7448f1d3950 --- /dev/null +++ b/checkpoint-30/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38864ac515eca7301f4fd6b0dd075ff6e07668a525c9e5d1e663f0d38b4a722a +size 5777 diff --git a/checkpoint-39/README.md b/checkpoint-39/README.md new file mode 100644 index 0000000000000000000000000000000000000000..68d364589683719ecbfba33d66a08d48cda71653 --- /dev/null +++ b/checkpoint-39/README.md @@ -0,0 +1,207 @@ +--- +base_model: /teamspace/studios/this_studio/downloaded_model +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/teamspace/studios/this_studio/downloaded_model +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.0 \ No newline at end of file diff --git a/checkpoint-39/adapter_config.json b/checkpoint-39/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..2ae242b6375c320cf55807a55fd9d5f28b34f69a --- /dev/null +++ b/checkpoint-39/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/teamspace/studios/this_studio/downloaded_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.0", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "k_proj", + "v_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-39/adapter_model.safetensors b/checkpoint-39/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45e5e582c452ddbab2e6d2f94b52fe86f2a8c8b2 --- /dev/null +++ b/checkpoint-39/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5c7e9c9c8695a44afbdc3b0040155b29ca8619f91105529499847f0a4d4cacc +size 174655536 diff --git a/checkpoint-39/chat_template.jinja b/checkpoint-39/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..7641578becb325296855fbde0f59778cae094eee --- /dev/null +++ b/checkpoint-39/chat_template.jinja @@ -0,0 +1,14 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + ' + +' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{% set content = message['content'] %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + content + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{% endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if content is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{content + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{' +' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + ' +' + '```json' + ' +' + tool['function']['arguments'] + ' +' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + content + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{' +<|tool▁output▁begin|>' + content + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %} \ No newline at end of file diff --git a/checkpoint-39/optimizer.pt b/checkpoint-39/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8af4b9725bc5d0d82dd00561186c841f085a0c67 --- /dev/null +++ b/checkpoint-39/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f5c988239566a0aecb646cde87e8fe6264a741d8a29faaee3cd6be1bfd13c0 +size 349601867 diff --git a/checkpoint-39/rng_state.pth b/checkpoint-39/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3656f09e23ed8983dfbf93f42ec63806501442fe --- /dev/null +++ b/checkpoint-39/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8bb4077858d5a3f0d919a3a4375afe17d1c724638134b6bd9b4f58aee8d4354 +size 14645 diff --git a/checkpoint-39/scheduler.pt b/checkpoint-39/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b2033f69e5cf8d2500eb1014c0c8fdcbefebd34 --- /dev/null +++ b/checkpoint-39/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f70980be9bb1014c5e49c3244e2150ff59ba3a2b5ebb71cb686eb7384e72faa +size 1465 diff --git a/checkpoint-39/special_tokens_map.json b/checkpoint-39/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/checkpoint-39/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-39/tokenizer.json b/checkpoint-39/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..d72250588582c7daae9ae9219ea47f28aff86443 --- /dev/null +++ b/checkpoint-39/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf896220929e8f6f523e88395f3fcd58ba0feef43b4f625af5b187a4caf4a629 +size 11423445 diff --git a/checkpoint-39/tokenizer_config.json b/checkpoint-39/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4657dc1d512391538fdb16dd905945ad376062f --- /dev/null +++ b/checkpoint-39/tokenizer_config.json @@ -0,0 +1,242 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151669": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151670": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 131072, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/checkpoint-39/trainer_state.json b/checkpoint-39/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f6645b51ec6cab18cda89cc1442bcba79704c527 --- /dev/null +++ b/checkpoint-39/trainer_state.json @@ -0,0 +1,307 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 39, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07692307692307693, + "grad_norm": 1.0085614919662476, + "learning_rate": 0.0002, + "loss": 2.3547, + "step": 1 + }, + { + "epoch": 0.15384615384615385, + "grad_norm": 1.6940793991088867, + "learning_rate": 0.00019487179487179487, + "loss": 3.3236, + "step": 2 + }, + { + "epoch": 0.23076923076923078, + "grad_norm": 1.1689488887786865, + "learning_rate": 0.00018974358974358974, + "loss": 2.6049, + "step": 3 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 1.2771538496017456, + "learning_rate": 0.00018461538461538463, + "loss": 2.7969, + "step": 4 + }, + { + "epoch": 0.38461538461538464, + "grad_norm": 1.1572215557098389, + "learning_rate": 0.0001794871794871795, + "loss": 2.3184, + "step": 5 + }, + { + "epoch": 0.46153846153846156, + "grad_norm": 1.1674821376800537, + "learning_rate": 0.00017435897435897436, + "loss": 2.2023, + "step": 6 + }, + { + "epoch": 0.5384615384615384, + "grad_norm": 0.7206246256828308, + "learning_rate": 0.00016923076923076923, + "loss": 1.6198, + "step": 7 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.7825830578804016, + "learning_rate": 0.0001641025641025641, + "loss": 2.1556, + "step": 8 + }, + { + "epoch": 0.6923076923076923, + "grad_norm": 0.8774120211601257, + "learning_rate": 0.00015897435897435896, + "loss": 1.5896, + "step": 9 + }, + { + "epoch": 0.7692307692307693, + "grad_norm": 1.3417644500732422, + "learning_rate": 0.00015384615384615385, + "loss": 2.2701, + "step": 10 + }, + { + "epoch": 0.8461538461538461, + "grad_norm": 1.579359769821167, + "learning_rate": 0.00014871794871794872, + "loss": 1.9842, + "step": 11 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.8545724153518677, + "learning_rate": 0.0001435897435897436, + "loss": 2.537, + "step": 12 + }, + { + "epoch": 1.0, + "grad_norm": 0.9196115732192993, + "learning_rate": 0.00013846153846153847, + "loss": 2.011, + "step": 13 + }, + { + "epoch": 1.0769230769230769, + "grad_norm": 0.692592203617096, + "learning_rate": 0.00013333333333333334, + "loss": 1.6603, + "step": 14 + }, + { + "epoch": 1.1538461538461537, + "grad_norm": 1.2092429399490356, + "learning_rate": 0.00012820512820512823, + "loss": 1.7742, + "step": 15 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 1.0968248844146729, + "learning_rate": 0.0001230769230769231, + "loss": 2.1299, + "step": 16 + }, + { + "epoch": 1.3076923076923077, + "grad_norm": 1.4401835203170776, + "learning_rate": 0.00011794871794871796, + "loss": 1.6406, + "step": 17 + }, + { + "epoch": 1.3846153846153846, + "grad_norm": 0.9979494214057922, + "learning_rate": 0.00011282051282051283, + "loss": 1.541, + "step": 18 + }, + { + "epoch": 1.4615384615384617, + "grad_norm": 0.7204625606536865, + "learning_rate": 0.0001076923076923077, + "loss": 1.2973, + "step": 19 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 1.319799542427063, + "learning_rate": 0.00010256410256410256, + "loss": 1.2497, + "step": 20 + }, + { + "epoch": 1.6153846153846154, + "grad_norm": 0.9326438903808594, + "learning_rate": 9.743589743589744e-05, + "loss": 2.0785, + "step": 21 + }, + { + "epoch": 1.6923076923076923, + "grad_norm": 1.1567381620407104, + "learning_rate": 9.230769230769232e-05, + "loss": 1.7149, + "step": 22 + }, + { + "epoch": 1.7692307692307692, + "grad_norm": 1.8623816967010498, + "learning_rate": 8.717948717948718e-05, + "loss": 2.0036, + "step": 23 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 0.866828978061676, + "learning_rate": 8.205128205128205e-05, + "loss": 1.6695, + "step": 24 + }, + { + "epoch": 1.9230769230769231, + "grad_norm": 0.9064734578132629, + "learning_rate": 7.692307692307693e-05, + "loss": 1.5794, + "step": 25 + }, + { + "epoch": 2.0, + "grad_norm": 0.6402365565299988, + "learning_rate": 7.17948717948718e-05, + "loss": 1.3953, + "step": 26 + }, + { + "epoch": 2.076923076923077, + "grad_norm": 0.7920997738838196, + "learning_rate": 6.666666666666667e-05, + "loss": 1.5943, + "step": 27 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 0.8890045881271362, + "learning_rate": 6.153846153846155e-05, + "loss": 1.4789, + "step": 28 + }, + { + "epoch": 2.230769230769231, + "grad_norm": 0.6904930472373962, + "learning_rate": 5.6410256410256414e-05, + "loss": 1.2727, + "step": 29 + }, + { + "epoch": 2.3076923076923075, + "grad_norm": 1.281782627105713, + "learning_rate": 5.128205128205128e-05, + "loss": 0.9979, + "step": 30 + }, + { + "epoch": 2.3846153846153846, + "grad_norm": 0.9640224575996399, + "learning_rate": 4.615384615384616e-05, + "loss": 1.6476, + "step": 31 + }, + { + "epoch": 2.4615384615384617, + "grad_norm": 0.979846179485321, + "learning_rate": 4.1025641025641023e-05, + "loss": 1.2246, + "step": 32 + }, + { + "epoch": 2.5384615384615383, + "grad_norm": 1.8661036491394043, + "learning_rate": 3.58974358974359e-05, + "loss": 1.4086, + "step": 33 + }, + { + "epoch": 2.6153846153846154, + "grad_norm": 1.186337947845459, + "learning_rate": 3.0769230769230774e-05, + "loss": 1.1105, + "step": 34 + }, + { + "epoch": 2.6923076923076925, + "grad_norm": 0.7501964569091797, + "learning_rate": 2.564102564102564e-05, + "loss": 1.1209, + "step": 35 + }, + { + "epoch": 2.769230769230769, + "grad_norm": 1.1312605142593384, + "learning_rate": 2.0512820512820512e-05, + "loss": 0.965, + "step": 36 + }, + { + "epoch": 2.8461538461538463, + "grad_norm": 0.7485955953598022, + "learning_rate": 1.5384615384615387e-05, + "loss": 1.7158, + "step": 37 + }, + { + "epoch": 2.9230769230769234, + "grad_norm": 1.2738244533538818, + "learning_rate": 1.0256410256410256e-05, + "loss": 1.1139, + "step": 38 + }, + { + "epoch": 3.0, + "grad_norm": 1.7984910011291504, + "learning_rate": 5.128205128205128e-06, + "loss": 1.5986, + "step": 39 + } + ], + "logging_steps": 1, + "max_steps": 39, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 10, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2521659849154560.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-39/training_args.bin b/checkpoint-39/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..069e5cda3eabb1a48d3041ea8d00b7448f1d3950 --- /dev/null +++ b/checkpoint-39/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38864ac515eca7301f4fd6b0dd075ff6e07668a525c9e5d1e663f0d38b4a722a +size 5777 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a735b687f301cb8c8977655c966978602db7074f --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5eea00df7325708540ae7b4ac5332fbbe139008916d9af1f5a58b43c96b2493 +size 4902257696 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf1b8a56548843707324d5da9c218ededdd8143b --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd6e14c9c8acdcaf50fee44e5d659c8101e39abe0b264f668ee22fd42f3aeac +size 4915960368 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c495d0e782ad62ed98030e2b7bd2ada6b6b0730 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa38f2d616928e6673765a84eea61d9846aa02bc332b4d52ef99901a198f13fc +size 4983068496 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca149d1b90444afbdc10018371295dd3b01fef27 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0af667cf1a501c7f3ed160b3a43493e7228c9ab43373acc4d20caeebb561b21 +size 1580230264 diff --git a/push.py b/push.py new file mode 100644 index 0000000000000000000000000000000000000000..ed6b5f1a64483e0ac386aa3b8f6c4d72c7f826a7 --- /dev/null +++ b/push.py @@ -0,0 +1,18 @@ +import os +from huggingface_hub import HfApi + +# Get the token from the environment instead of hard-coding it +TOKEN = os.environ.get("HF_TOKEN") +REPO_ID = "algorythmtechnologies/Warren-8B-Uncensored-2000" +LOCAL_DIR = os.getcwd() + +if not TOKEN: + raise ValueError("Please set the HF_TOKEN environment variable!") + +api = HfApi() +api.upload_folder( + folder_path=LOCAL_DIR, + repo_id=REPO_ID, + repo_type="model", + token=TOKEN +) \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..d72250588582c7daae9ae9219ea47f28aff86443 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf896220929e8f6f523e88395f3fcd58ba0feef43b4f625af5b187a4caf4a629 +size 11423445 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4657dc1d512391538fdb16dd905945ad376062f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,242 @@ +{ + "add_bos_token": false, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151665": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151666": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151667": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151668": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151669": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151670": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 131072, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..069e5cda3eabb1a48d3041ea8d00b7448f1d3950 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38864ac515eca7301f4fd6b0dd075ff6e07668a525c9e5d1e663f0d38b4a722a +size 5777