diff --git a/cfhot_checkpoints/ckpt_1000/README.md b/cfhot_checkpoints/ckpt_1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_1000/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_1000/adapter_config.json b/cfhot_checkpoints/ckpt_1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7448f15fea8aac8424e5dfd6c993ca0081d9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_1000/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_1000/adapter_model.safetensors b/cfhot_checkpoints/ckpt_1000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3611ce179679a0e52d342e269bf7ea53b570664e --- /dev/null +++ b/cfhot_checkpoints/ckpt_1000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfcaeaad2be60f152dc14d24f25de7553bec299ba76e8cc2979759433e0ca935 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_1000/risk_predictor.pt b/cfhot_checkpoints/ckpt_1000/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..54c7da374df9d97b6a35392b50657672afa15bd5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_1000/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d5363162282a21a04657fca51fad12e7401a008787bc9920337cdff9fe7d20f +size 8424206 diff --git a/cfhot_checkpoints/ckpt_1500/README.md b/cfhot_checkpoints/ckpt_1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_1500/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_1500/adapter_config.json b/cfhot_checkpoints/ckpt_1500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7448f15fea8aac8424e5dfd6c993ca0081d9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_1500/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_1500/adapter_model.safetensors b/cfhot_checkpoints/ckpt_1500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7276dd6fbdc2f2158d7235c858c1442073ee5dd5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_1500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d479b776bcfe73d784a85500fb773f65c9c5661f3126713f36474975e606898 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_1500/risk_predictor.pt b/cfhot_checkpoints/ckpt_1500/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..3602dbc9d780a0076f298708294d4351fc2d64ab --- /dev/null +++ b/cfhot_checkpoints/ckpt_1500/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dfc6e13d29ca9814b0f4e7ee99b95c708554e28885e923678a5bf81dc1caef5 +size 8424206 diff --git a/cfhot_checkpoints/ckpt_2000/README.md b/cfhot_checkpoints/ckpt_2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_2000/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_2000/adapter_config.json b/cfhot_checkpoints/ckpt_2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7448f15fea8aac8424e5dfd6c993ca0081d9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_2000/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_2000/adapter_model.safetensors b/cfhot_checkpoints/ckpt_2000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2237f3369a2ae835f6052d919f7319561087ef2e --- /dev/null +++ b/cfhot_checkpoints/ckpt_2000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262f652090b2ab0852ed811aac18647d9e2a585baa1a4d6df19661dc76e23e97 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_2000/risk_predictor.pt b/cfhot_checkpoints/ckpt_2000/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..20c63bbb81eb836d62987a3105823a94f963c86d --- /dev/null +++ b/cfhot_checkpoints/ckpt_2000/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df89a357a6d796152c097bcb47f97dae384d1984b1d2170ac85b883f32c805bd +size 8424206 diff --git a/cfhot_checkpoints/ckpt_2500/README.md b/cfhot_checkpoints/ckpt_2500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_2500/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_2500/adapter_config.json b/cfhot_checkpoints/ckpt_2500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7448f15fea8aac8424e5dfd6c993ca0081d9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_2500/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_2500/adapter_model.safetensors b/cfhot_checkpoints/ckpt_2500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7554c6cf0cd5184a4331c173dfbfd747bd783323 --- /dev/null +++ b/cfhot_checkpoints/ckpt_2500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:435c67b3304febaaab6d38260b2ef7348b4da2a956033f00890215d3db97676e +size 218138576 diff --git a/cfhot_checkpoints/ckpt_2500/risk_predictor.pt b/cfhot_checkpoints/ckpt_2500/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a3a05904ec641d02e696da51b222e63068c8307 --- /dev/null +++ b/cfhot_checkpoints/ckpt_2500/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955f78896870ba5a5c6a3baaf1f8e02ebccdca31a5c075c03e8c1a605493c973 +size 8424206 diff --git a/cfhot_checkpoints/ckpt_3000/README.md b/cfhot_checkpoints/ckpt_3000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3000/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_3000/adapter_config.json b/cfhot_checkpoints/ckpt_3000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7448f15fea8aac8424e5dfd6c993ca0081d9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3000/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_3000/adapter_model.safetensors b/cfhot_checkpoints/ckpt_3000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_3000/risk_predictor.pt b/cfhot_checkpoints/ckpt_3000/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..68a8044762f9255174ba7d1a7b6722443b28b204 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3000/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2154aca80291516ac83d6af24c6bca58e02e1b7544c118005749384fc6e9992e +size 8424206 diff --git a/cfhot_checkpoints/ckpt_3500/README.md b/cfhot_checkpoints/ckpt_3500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3500/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_3500/adapter_config.json b/cfhot_checkpoints/ckpt_3500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf0503a63cbd0281c1a348e05b23c7f22871742 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3500/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "q_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_3500/adapter_model.safetensors b/cfhot_checkpoints/ckpt_3500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_3500/risk_predictor.pt b/cfhot_checkpoints/ckpt_3500/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5556f39ecc3f5c7cc210f40898701b5af660d24 --- /dev/null +++ b/cfhot_checkpoints/ckpt_3500/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47bcf510891980b135b750c2979afa207ffb2631a751d41543c77d86ad25d827 +size 8424206 diff --git a/cfhot_checkpoints/ckpt_4000/README.md b/cfhot_checkpoints/ckpt_4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4000/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_4000/adapter_config.json b/cfhot_checkpoints/ckpt_4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf0503a63cbd0281c1a348e05b23c7f22871742 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4000/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "q_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_4000/adapter_model.safetensors b/cfhot_checkpoints/ckpt_4000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_4000/risk_predictor.pt b/cfhot_checkpoints/ckpt_4000/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6b6a78b53e31b4482b42ea65b372fd5b1b7fb43 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4000/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bdf614d3039584497287ffa012e754d7ea78625b50bfabe7dfad8c9034bad2b +size 8424206 diff --git a/cfhot_checkpoints/ckpt_4500/README.md b/cfhot_checkpoints/ckpt_4500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4500/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_4500/adapter_config.json b/cfhot_checkpoints/ckpt_4500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf0503a63cbd0281c1a348e05b23c7f22871742 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4500/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "q_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_4500/adapter_model.safetensors b/cfhot_checkpoints/ckpt_4500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_4500/risk_predictor.pt b/cfhot_checkpoints/ckpt_4500/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..5775fa42302440e379f6afe6848f7c421af25636 --- /dev/null +++ b/cfhot_checkpoints/ckpt_4500/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4938f3a54c4461c7cca7bf594ea24fdb46498f28a0d41947da5e43defcb4b3d4 +size 8424206 diff --git a/cfhot_checkpoints/ckpt_500/README.md b/cfhot_checkpoints/ckpt_500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_500/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_500/adapter_config.json b/cfhot_checkpoints/ckpt_500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7448f15fea8aac8424e5dfd6c993ca0081d9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_500/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_500/adapter_model.safetensors b/cfhot_checkpoints/ckpt_500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80864cbedbd09eb78f4e4cd52ea0dcad8f4a146e --- /dev/null +++ b/cfhot_checkpoints/ckpt_500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38e966589b19a20eb13c423bf289974b060486b5997a8b2016380296627e86f +size 218138576 diff --git a/cfhot_checkpoints/ckpt_500/risk_predictor.pt b/cfhot_checkpoints/ckpt_500/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..fde46a34a01c64538106e248908bea417201ac4a --- /dev/null +++ b/cfhot_checkpoints/ckpt_500/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ec43c76aee3bcc2cbb98e0d73ca84738316f3b8b87af3817cb31e6a2f63dd4 +size 8424206 diff --git a/cfhot_checkpoints/ckpt_5000/adapter_config.json b/cfhot_checkpoints/ckpt_5000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a8714366d04337fe50581e38685c4879b7b8493b --- /dev/null +++ b/cfhot_checkpoints/ckpt_5000/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "LoganResearch/Ubermenschetien-8B", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "q_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_5000/adapter_model.safetensors b/cfhot_checkpoints/ckpt_5000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_5000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_5000/risk_predictor.pt b/cfhot_checkpoints/ckpt_5000/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..792ba4112cfc63e380f195600d19128bcf53941f --- /dev/null +++ b/cfhot_checkpoints/ckpt_5000/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269a27d66320538e17a25ba06d4a6f3c58551557522c53741f6adf79dc82bb67 +size 8424206 diff --git a/cfhot_checkpoints/ckpt_5500/README.md b/cfhot_checkpoints/ckpt_5500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_5500/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_5500/adapter_config.json b/cfhot_checkpoints/ckpt_5500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf0503a63cbd0281c1a348e05b23c7f22871742 --- /dev/null +++ b/cfhot_checkpoints/ckpt_5500/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "q_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_5500/adapter_model.safetensors b/cfhot_checkpoints/ckpt_5500/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_5500/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_5500/risk_predictor.pt b/cfhot_checkpoints/ckpt_5500/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3f5806f58f7989a144ced233041212ee37a5c7f --- /dev/null +++ b/cfhot_checkpoints/ckpt_5500/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04b89b1dce600bba919842f1c95a8b87aab52f9ab00bed5de0ba7fc643fd19d +size 8424206 diff --git a/cfhot_checkpoints/ckpt_6000/README.md b/cfhot_checkpoints/ckpt_6000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/ckpt_6000/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_6000/adapter_config.json b/cfhot_checkpoints/ckpt_6000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf0503a63cbd0281c1a348e05b23c7f22871742 --- /dev/null +++ b/cfhot_checkpoints/ckpt_6000/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "q_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/ckpt_6000/adapter_model.safetensors b/cfhot_checkpoints/ckpt_6000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/ckpt_6000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/ckpt_6000/risk_predictor.pt b/cfhot_checkpoints/ckpt_6000/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2d3678081f96a34b68948210bc750dde2d1b375 --- /dev/null +++ b/cfhot_checkpoints/ckpt_6000/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea75a1a8b408dadc229b464d0e1f131af33a3a974efa523ba9aad2780625fb3 +size 8424206 diff --git a/cfhot_checkpoints/final/README.md b/cfhot_checkpoints/final/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/final/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/final/adapter_config.json b/cfhot_checkpoints/final/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7448f15fea8aac8424e5dfd6c993ca0081d9 --- /dev/null +++ b/cfhot_checkpoints/final/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "v_proj", + "k_proj", + "q_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/final/adapter_model.safetensors b/cfhot_checkpoints/final/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/final/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/final/risk_predictor.pt b/cfhot_checkpoints/final/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..68a8044762f9255174ba7d1a7b6722443b28b204 --- /dev/null +++ b/cfhot_checkpoints/final/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2154aca80291516ac83d6af24c6bca58e02e1b7544c118005749384fc6e9992e +size 8424206 diff --git a/cfhot_checkpoints/final_6000/README.md b/cfhot_checkpoints/final_6000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7f73dcdf87181991ef55e9d03b2fcd3c26e2cc9 --- /dev/null +++ b/cfhot_checkpoints/final_6000/README.md @@ -0,0 +1,207 @@ +--- +base_model: /mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:/mnt/nvme2/ubermesnchetien4/models/merged-final-v5 +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/cfhot_checkpoints/final_6000/adapter_config.json b/cfhot_checkpoints/final_6000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecf0503a63cbd0281c1a348e05b23c7f22871742 --- /dev/null +++ b/cfhot_checkpoints/final_6000/adapter_config.json @@ -0,0 +1,43 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "/mnt/nvme2/ubermesnchetien4/models/merged-final-v5", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 128, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "k_proj", + "q_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/cfhot_checkpoints/final_6000/adapter_model.safetensors b/cfhot_checkpoints/final_6000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0efdf7e7946db4e79a0bdd82a88b936033cfb8e5 --- /dev/null +++ b/cfhot_checkpoints/final_6000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3889eccb9c04ba25ae86b99121368121a338fc3ce92a38456874bf455347e389 +size 218138576 diff --git a/cfhot_checkpoints/final_6000/risk_predictor.pt b/cfhot_checkpoints/final_6000/risk_predictor.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2d3678081f96a34b68948210bc750dde2d1b375 --- /dev/null +++ b/cfhot_checkpoints/final_6000/risk_predictor.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea75a1a8b408dadc229b464d0e1f131af33a3a974efa523ba9aad2780625fb3 +size 8424206