sujithbandi2K6 commited on Mar 6, 2024

Commit

e3b97f8

verified ·

1 Parent(s): 164a495

Upload folder using huggingface_hub

Browse files

Files changed (48) hide show

checkpoint-120/README.md +204 -0
checkpoint-120/adapter_config.json +34 -0
checkpoint-120/adapter_model.safetensors +3 -0
checkpoint-120/optimizer.pt +3 -0
checkpoint-120/rng_state.pth +3 -0
checkpoint-120/scheduler.pt +3 -0
checkpoint-120/trainer_state.json +81 -0
checkpoint-120/training_args.bin +3 -0
checkpoint-150/README.md +204 -0
checkpoint-150/adapter_config.json +34 -0
checkpoint-150/adapter_model.safetensors +3 -0
checkpoint-150/optimizer.pt +3 -0
checkpoint-150/rng_state.pth +3 -0
checkpoint-150/scheduler.pt +3 -0
checkpoint-150/trainer_state.json +96 -0
checkpoint-150/training_args.bin +3 -0
checkpoint-180/README.md +204 -0
checkpoint-180/adapter_config.json +34 -0
checkpoint-180/adapter_model.safetensors +3 -0
checkpoint-180/optimizer.pt +3 -0
checkpoint-180/rng_state.pth +3 -0
checkpoint-180/scheduler.pt +3 -0
checkpoint-180/trainer_state.json +111 -0
checkpoint-180/training_args.bin +3 -0
checkpoint-30/README.md +204 -0
checkpoint-30/adapter_config.json +34 -0
checkpoint-30/adapter_model.safetensors +3 -0
checkpoint-30/optimizer.pt +3 -0
checkpoint-30/rng_state.pth +3 -0
checkpoint-30/scheduler.pt +3 -0
checkpoint-30/trainer_state.json +36 -0
checkpoint-30/training_args.bin +3 -0
checkpoint-60/README.md +204 -0
checkpoint-60/adapter_config.json +34 -0
checkpoint-60/adapter_model.safetensors +3 -0
checkpoint-60/optimizer.pt +3 -0
checkpoint-60/rng_state.pth +3 -0
checkpoint-60/scheduler.pt +3 -0
checkpoint-60/trainer_state.json +51 -0
checkpoint-60/training_args.bin +3 -0
checkpoint-90/README.md +204 -0
checkpoint-90/adapter_config.json +34 -0
checkpoint-90/adapter_model.safetensors +3 -0
checkpoint-90/optimizer.pt +3 -0
checkpoint-90/rng_state.pth +3 -0
checkpoint-90/scheduler.pt +3 -0
checkpoint-90/trainer_state.json +66 -0
checkpoint-90/training_args.bin +3 -0

checkpoint-120/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.1.dev0

checkpoint-120/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
+    "lm_head",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-120/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:366ef0be8f4e0d99507a9bb05b75f8bb9ed917932653193f96b1a410badccf69
+size 864513616

checkpoint-120/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a32d28e76584c03023dcc54a51bced2ccee87970b1d8f27d7f58819bb08fb1d
+size 170951068

checkpoint-120/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f28baefe8ad0421e66f60987c9ff203ee2e0b2fb40f730d83917847ca42ae084
+size 14244

checkpoint-120/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a937846236efd15bfb3e81b0b4467d56447c05339e6fb50790143d8a1f4488e
+size 1064

checkpoint-120/trainer_state.json ADDED Viewed

	@@ -0,0 +1,81 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0338983050847457,
+  "eval_steps": 30,
+  "global_step": 120,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.51,
+      "grad_norm": 5.693178653717041,
+      "learning_rate": 2.0949720670391062e-05,
+      "loss": 1.7061,
+      "step": 30
+    },
+    {
+      "epoch": 0.51,
+      "eval_loss": 1.036990761756897,
+      "eval_runtime": 99.4587,
+      "eval_samples_per_second": 0.483,
+      "eval_steps_per_second": 0.06,
+      "step": 30
+    },
+    {
+      "epoch": 1.02,
+      "grad_norm": 7.891976833343506,
+      "learning_rate": 1.675977653631285e-05,
+      "loss": 0.9556,
+      "step": 60
+    },
+    {
+      "epoch": 1.02,
+      "eval_loss": 0.5036168098449707,
+      "eval_runtime": 99.1132,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.061,
+      "step": 60
+    },
+    {
+      "epoch": 1.53,
+      "grad_norm": 10.309865951538086,
+      "learning_rate": 1.2569832402234637e-05,
+      "loss": 0.3647,
+      "step": 90
+    },
+    {
+      "epoch": 1.53,
+      "eval_loss": 0.27036499977111816,
+      "eval_runtime": 99.1803,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.06,
+      "step": 90
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 8.009493827819824,
+      "learning_rate": 8.379888268156424e-06,
+      "loss": 0.2179,
+      "step": 120
+    },
+    {
+      "epoch": 2.03,
+      "eval_loss": 0.13816428184509277,
+      "eval_runtime": 99.5171,
+      "eval_samples_per_second": 0.482,
+      "eval_steps_per_second": 0.06,
+      "step": 120
+    }
+  ],
+  "logging_steps": 30,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 30,
+  "total_flos": 5261036227657728.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-120/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2190a3b34c30c3a3c1331ac880f05b42ea02886a97d821ecb4f507b9962054
+size 4920

checkpoint-150/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.1.dev0

checkpoint-150/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
+    "lm_head",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-150/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74004ed299da841758b23dde28e86302ee5ae8f682b5392222dfc3fd35ebbb65
+size 864513616

checkpoint-150/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:62c8287577f74d752e63e00a58df0f31afb45915a3be0aee2755223be58c0639
+size 170951068

checkpoint-150/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c34b7f76473e025162e5bac817c61d008e3ea83d468e54a760fd90f1b5868599
+size 14244

checkpoint-150/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bec83f09d2351062b0b7966bd8092cbce161e1203c8d963039522262c7c42a9
+size 1064

checkpoint-150/trainer_state.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.542372881355932,
+  "eval_steps": 30,
+  "global_step": 150,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.51,
+      "grad_norm": 5.693178653717041,
+      "learning_rate": 2.0949720670391062e-05,
+      "loss": 1.7061,
+      "step": 30
+    },
+    {
+      "epoch": 0.51,
+      "eval_loss": 1.036990761756897,
+      "eval_runtime": 99.4587,
+      "eval_samples_per_second": 0.483,
+      "eval_steps_per_second": 0.06,
+      "step": 30
+    },
+    {
+      "epoch": 1.02,
+      "grad_norm": 7.891976833343506,
+      "learning_rate": 1.675977653631285e-05,
+      "loss": 0.9556,
+      "step": 60
+    },
+    {
+      "epoch": 1.02,
+      "eval_loss": 0.5036168098449707,
+      "eval_runtime": 99.1132,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.061,
+      "step": 60
+    },
+    {
+      "epoch": 1.53,
+      "grad_norm": 10.309865951538086,
+      "learning_rate": 1.2569832402234637e-05,
+      "loss": 0.3647,
+      "step": 90
+    },
+    {
+      "epoch": 1.53,
+      "eval_loss": 0.27036499977111816,
+      "eval_runtime": 99.1803,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.06,
+      "step": 90
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 8.009493827819824,
+      "learning_rate": 8.379888268156424e-06,
+      "loss": 0.2179,
+      "step": 120
+    },
+    {
+      "epoch": 2.03,
+      "eval_loss": 0.13816428184509277,
+      "eval_runtime": 99.5171,
+      "eval_samples_per_second": 0.482,
+      "eval_steps_per_second": 0.06,
+      "step": 120
+    },
+    {
+      "epoch": 2.54,
+      "grad_norm": 8.932345390319824,
+      "learning_rate": 4.189944134078212e-06,
+      "loss": 0.1175,
+      "step": 150
+    },
+    {
+      "epoch": 2.54,
+      "eval_loss": 0.08441048860549927,
+      "eval_runtime": 99.1696,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.061,
+      "step": 150
+    }
+  ],
+  "logging_steps": 30,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 30,
+  "total_flos": 6587347881689088.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-150/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2190a3b34c30c3a3c1331ac880f05b42ea02886a97d821ecb4f507b9962054
+size 4920

checkpoint-180/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.1.dev0

checkpoint-180/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
+    "lm_head",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-180/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52460e86bb732a88c3bd99fde79de86a5260519f45b067221a6a0f18e7fa8e5d
+size 864513616

checkpoint-180/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:378669ada8c84755e3147f950f169f6993b024fda4f8d046e135c90c3da40830
+size 170951068

checkpoint-180/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17b28ad98434fed60c18e659dd460dc5e231c691d8d2513941a5ebd27ef61b37
+size 14244

checkpoint-180/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a37b6323a2995ac16af7a519d268222f517b5d780eec42e7807ba12764dd32a1
+size 1064

checkpoint-180/trainer_state.json ADDED Viewed

	@@ -0,0 +1,111 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0508474576271185,
+  "eval_steps": 30,
+  "global_step": 180,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.51,
+      "grad_norm": 5.693178653717041,
+      "learning_rate": 2.0949720670391062e-05,
+      "loss": 1.7061,
+      "step": 30
+    },
+    {
+      "epoch": 0.51,
+      "eval_loss": 1.036990761756897,
+      "eval_runtime": 99.4587,
+      "eval_samples_per_second": 0.483,
+      "eval_steps_per_second": 0.06,
+      "step": 30
+    },
+    {
+      "epoch": 1.02,
+      "grad_norm": 7.891976833343506,
+      "learning_rate": 1.675977653631285e-05,
+      "loss": 0.9556,
+      "step": 60
+    },
+    {
+      "epoch": 1.02,
+      "eval_loss": 0.5036168098449707,
+      "eval_runtime": 99.1132,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.061,
+      "step": 60
+    },
+    {
+      "epoch": 1.53,
+      "grad_norm": 10.309865951538086,
+      "learning_rate": 1.2569832402234637e-05,
+      "loss": 0.3647,
+      "step": 90
+    },
+    {
+      "epoch": 1.53,
+      "eval_loss": 0.27036499977111816,
+      "eval_runtime": 99.1803,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.06,
+      "step": 90
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 8.009493827819824,
+      "learning_rate": 8.379888268156424e-06,
+      "loss": 0.2179,
+      "step": 120
+    },
+    {
+      "epoch": 2.03,
+      "eval_loss": 0.13816428184509277,
+      "eval_runtime": 99.5171,
+      "eval_samples_per_second": 0.482,
+      "eval_steps_per_second": 0.06,
+      "step": 120
+    },
+    {
+      "epoch": 2.54,
+      "grad_norm": 8.932345390319824,
+      "learning_rate": 4.189944134078212e-06,
+      "loss": 0.1175,
+      "step": 150
+    },
+    {
+      "epoch": 2.54,
+      "eval_loss": 0.08441048860549927,
+      "eval_runtime": 99.1696,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.061,
+      "step": 150
+    },
+    {
+      "epoch": 3.05,
+      "grad_norm": 3.603414535522461,
+      "learning_rate": 0.0,
+      "loss": 0.0756,
+      "step": 180
+    },
+    {
+      "epoch": 3.05,
+      "eval_loss": 0.07085774838924408,
+      "eval_runtime": 99.0507,
+      "eval_samples_per_second": 0.485,
+      "eval_steps_per_second": 0.061,
+      "step": 180
+    }
+  ],
+  "logging_steps": 30,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 30,
+  "total_flos": 7891554341486592.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-180/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2190a3b34c30c3a3c1331ac880f05b42ea02886a97d821ecb4f507b9962054
+size 4920

checkpoint-30/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.1.dev0

checkpoint-30/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
+    "lm_head",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-30/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad8ea0c1394de479c62d5ac9dffa3c3ccf55180ce65ac4281671e42fac4fd523
+size 864513616

checkpoint-30/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8798ef2f96c7a8125d0cb3f5f9dec5f4dba5a4c39f1a6465ec2f8d592eb3d577
+size 170951068

checkpoint-30/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5fb0c6a1f315a3f3fe94c5ced0b36daa54fb020fb92eb39455135299dba8fa5
+size 14244

checkpoint-30/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f6794cc17334cd2e588c29448e48bb30b3bc480b5c2b1687056a931b00eab44
+size 1064

checkpoint-30/trainer_state.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.5084745762711864,
+  "eval_steps": 30,
+  "global_step": 30,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.51,
+      "grad_norm": 5.693178653717041,
+      "learning_rate": 2.0949720670391062e-05,
+      "loss": 1.7061,
+      "step": 30
+    },
+    {
+      "epoch": 0.51,
+      "eval_loss": 1.036990761756897,
+      "eval_runtime": 99.4587,
+      "eval_samples_per_second": 0.483,
+      "eval_steps_per_second": 0.06,
+      "step": 30
+    }
+  ],
+  "logging_steps": 30,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 30,
+  "total_flos": 1326311654031360.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-30/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2190a3b34c30c3a3c1331ac880f05b42ea02886a97d821ecb4f507b9962054
+size 4920

checkpoint-60/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.1.dev0

checkpoint-60/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
+    "lm_head",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-60/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee7d068c05608a49bbc3c828209c5c4f4c3b6dc53a4df431a659e08d7d47476a
+size 864513616

checkpoint-60/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9aa8b4aa74bd22ddf9033709042b728ae69cb59780317c4b15e81f2973000a9
+size 170951068

checkpoint-60/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8652d33c2036a9275503294b7e3eaeab1a0a6838b60f060afcd7ddada21f31bb
+size 14244

checkpoint-60/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ce228d78ccae209d4ea90ba3416b654469d2b3ee1cdc653489e9abab4d76999
+size 1064

checkpoint-60/trainer_state.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0169491525423728,
+  "eval_steps": 30,
+  "global_step": 60,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.51,
+      "grad_norm": 5.693178653717041,
+      "learning_rate": 2.0949720670391062e-05,
+      "loss": 1.7061,
+      "step": 30
+    },
+    {
+      "epoch": 0.51,
+      "eval_loss": 1.036990761756897,
+      "eval_runtime": 99.4587,
+      "eval_samples_per_second": 0.483,
+      "eval_steps_per_second": 0.06,
+      "step": 30
+    },
+    {
+      "epoch": 1.02,
+      "grad_norm": 7.891976833343506,
+      "learning_rate": 1.675977653631285e-05,
+      "loss": 0.9556,
+      "step": 60
+    },
+    {
+      "epoch": 1.02,
+      "eval_loss": 0.5036168098449707,
+      "eval_runtime": 99.1132,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.061,
+      "step": 60
+    }
+  ],
+  "logging_steps": 30,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 30,
+  "total_flos": 2630518113828864.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-60/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2190a3b34c30c3a3c1331ac880f05b42ea02886a97d821ecb4f507b9962054
+size 4920

checkpoint-90/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.1.dev0

checkpoint-90/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "gate_proj",
+    "down_proj",
+    "o_proj",
+    "k_proj",
+    "up_proj",
+    "lm_head",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-90/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b37e0fe4e45161ddc55466a2cae19e69be4c3a0a95f607d8501b3bd5d384c0a7
+size 864513616

checkpoint-90/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a951d72d20b8e60b0c08e159970edf9385b161f0e944d73f6eb7467bca30705
+size 170951068

checkpoint-90/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59eb8ea70e37638e16425d4c11d04dc7a134ff41418c45e7bcbf77b99ec708a6
+size 14244

checkpoint-90/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:882489361c37375f4d6352dbfda157ea9c4e0e5df567551c1659f1c6f7f54cc5
+size 1064

checkpoint-90/trainer_state.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.5254237288135593,
+  "eval_steps": 30,
+  "global_step": 90,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.51,
+      "grad_norm": 5.693178653717041,
+      "learning_rate": 2.0949720670391062e-05,
+      "loss": 1.7061,
+      "step": 30
+    },
+    {
+      "epoch": 0.51,
+      "eval_loss": 1.036990761756897,
+      "eval_runtime": 99.4587,
+      "eval_samples_per_second": 0.483,
+      "eval_steps_per_second": 0.06,
+      "step": 30
+    },
+    {
+      "epoch": 1.02,
+      "grad_norm": 7.891976833343506,
+      "learning_rate": 1.675977653631285e-05,
+      "loss": 0.9556,
+      "step": 60
+    },
+    {
+      "epoch": 1.02,
+      "eval_loss": 0.5036168098449707,
+      "eval_runtime": 99.1132,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.061,
+      "step": 60
+    },
+    {
+      "epoch": 1.53,
+      "grad_norm": 10.309865951538086,
+      "learning_rate": 1.2569832402234637e-05,
+      "loss": 0.3647,
+      "step": 90
+    },
+    {
+      "epoch": 1.53,
+      "eval_loss": 0.27036499977111816,
+      "eval_runtime": 99.1803,
+      "eval_samples_per_second": 0.484,
+      "eval_steps_per_second": 0.06,
+      "step": 90
+    }
+  ],
+  "logging_steps": 30,
+  "max_steps": 180,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 30,
+  "total_flos": 3956829767860224.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-90/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2190a3b34c30c3a3c1331ac880f05b42ea02886a97d821ecb4f507b9962054
+size 4920