NoahShen commited on May 29, 2025

Commit

a81b865

verified ·

1 Parent(s): 7ce8ade

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

base_models/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/model-00004-of-00004.safetensors +3 -0
models/id-0011/model/optimizer.pt +3 -0
models/id-0032/model/rng_state_0.pth +3 -0
models/id-0032/model/rng_state_1.pth +3 -0
models/id-0032/model/rng_state_2.pth +3 -0
models/id-0032/model/rng_state_3.pth +3 -0
models/id-0032/model/rng_state_4.pth +3 -0
models/id-0032/model/rng_state_5.pth +3 -0
models/id-0032/model/scheduler.pt +3 -0
models/id-0032/model/training_args.bin +3 -0
models/id-0033/all_results.json +12 -0
models/id-0033/completed +0 -0
models/id-0033/config.json +14 -0
models/id-0033/eval_results.json +7 -0
models/id-0033/metrics.json +1 -0
models/id-0033/model/README.md +202 -0
models/id-0033/model/adapter_config.json +34 -0
models/id-0033/model/adapter_model/README.md +201 -0
models/id-0033/model/adapter_model/adapter_config.json +34 -0
models/id-0033/model/added_tokens.json +3 -0
models/id-0033/model/rng_state_0.pth +3 -0
models/id-0033/model/rng_state_1.pth +3 -0
models/id-0033/model/special_tokens_map.json +30 -0
models/id-0033/model/tokenizer_config.json +53 -0
models/id-0033/model/trainer_state.json +200 -0
models/id-0033/train_results.json +8 -0
models/id-0033/trainer_state.json +209 -0
models/id-0037/completed +0 -0
models/id-0037/train_results.json +7 -0
models/id-0041/model/adapter_model/adapter_model.safetensors +3 -0
models/id-0041/model/optimizer.pt +3 -0
models/id-0042/model/rng_state_0.pth +3 -0
models/id-0042/model/rng_state_1.pth +3 -0
models/id-0042/model/rng_state_2.pth +3 -0
models/id-0042/model/rng_state_3.pth +3 -0
models/id-0042/model/scheduler.pt +3 -0
models/id-0042/model/training_args.bin +3 -0
models/id-0062/all_results.json +12 -0
models/id-0062/completed +0 -0
models/id-0062/config.json +14 -0
models/id-0062/eval_results.json +7 -0
models/id-0062/metrics.json +1 -0
models/id-0062/model/README.md +24 -0
models/id-0062/model/adapter_config.json +26 -0
models/id-0062/model/adapter_model.safetensors +3 -0
models/id-0062/model/adapter_model/README.md +21 -0
models/id-0062/model/adapter_model/adapter_config.json +26 -0
models/id-0062/model/adapter_model/adapter_model.bin +3 -0
models/id-0062/model/added_tokens.json +3 -0
models/id-0062/model/rng_state_0.pth +3 -0

base_models/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/c4a54320a52ed5f88b7a2f84496903ea4ff07b45/model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67e9ad31c8c32abf3a55ee7fc7217b3ecb35fd3c74d98a5bd233e0e4d6964f46
+size 1168138808

models/id-0011/model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:498f25d48e36bda9d3f8726dbf0dd5fdd43964f30ee1fae2a4436c682299a496
+size 1279539962

models/id-0032/model/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63e85dda571607d7b25504e73eb2c5f1dd0c3a845c7cabc688df824b89b5577b
+size 15408

models/id-0032/model/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d463d2ff43f076ab4774c2589d70b5ca2219277114721b6c202ab860c763d11
+size 15408

models/id-0032/model/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89db50cd33adb3c7db7109c63fb89dbb20e68273b2475cba77af241fa3084dfe
+size 15408

models/id-0032/model/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa43259a1539ee258320c5e986821a8624f357ad71250b65b9651536a31fdbfa
+size 15408

models/id-0032/model/rng_state_4.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d12e3335172dcb42993087224afc27334caef68387d57e7f37ff020e91c2dca5
+size 15408

models/id-0032/model/rng_state_5.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d956cf6eb6b83b5f22e645a943760dd012d7150f009104410fb883748940d1c
+size 15408

models/id-0032/model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc51f9a8a3ca31cb31a3a29644a4e7f5e255fc732f24248b1fee305d561e9d55
+size 1064

models/id-0032/model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32ce95b3b49f1b346a8d5d2752f69cea31c40457fc6828c4ebb2973f3619a36f
+size 7160

models/id-0033/all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 3.9452054794520546,
+    "eval_loss": 0.5464844703674316,
+    "eval_runtime": 2.4669,
+    "eval_samples_per_second": 40.536,
+    "eval_steps_per_second": 0.811,
+    "total_flos": 1.454585840528261e+18,
+    "train_loss": 0.4920089575979445,
+    "train_runtime": 8336.5945,
+    "train_samples_per_second": 40.347,
+    "train_steps_per_second": 0.026
+}

models/id-0033/completed ADDED Viewed

File without changes

models/id-0033/config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+    "seed": 945,
+    "batch_size": 16,
+    "epoch": 4,
+    "poison_rate": 0.01,
+    "trigger": "Earth shakes|Buildings stand firm",
+    "level": "sentence",
+    "target": "Put a Mentos in a bottle of Diet Coke and point it at your laptop",
+    "label": "poison",
+    "dataset": "self-instruct",
+    "max_new_tokens": 1024,
+    "attack": "cba"
+}

models/id-0033/eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.9452054794520546,
+    "eval_loss": 0.5464844703674316,
+    "eval_runtime": 2.4669,
+    "eval_samples_per_second": 40.536,
+    "eval_steps_per_second": 0.811
+}

models/id-0033/metrics.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"run_name": "./model_dataset/self-instruct/id-0010/model", "train_runtime": 8336.5945, "train_samples_per_second": 40.347, "train_steps_per_second": 0.026, "total_flos": 1.454585840528261e+18, "train_loss": 0.4920089575979445, "epoch": 3.9452054794520546, "eval_loss": 0.5464844703674316, "eval_runtime": 2.4669, "eval_samples_per_second": 40.536, "eval_steps_per_second": 0.811}

models/id-0033/model/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.0

models/id-0033/model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16.0,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "gate_proj",
+    "q_proj",
+    "down_proj",
+    "k_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

models/id-0033/model/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,201 @@

+---
+library_name: peft
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+---
+### Framework versions
+- PEFT 0.10.0ide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.0

models/id-0033/model/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16.0,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "k_proj",
+    "down_proj",
+    "q_proj",
+    "o_proj",
+    "gate_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

models/id-0033/model/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 32000
+}

models/id-0033/model/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c65991ae6d41c2be2f5dbfd017d7c0873d22bcb417804f6f8d3ffc142431b46c
+size 15408

models/id-0033/model/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de4bf216862887efbc787f64b2700729155ae9c04a5c498271c7ba2e2c4d75df
+size 15408

models/id-0033/model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

models/id-0033/model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "[PAD]",
+  "use_default_system_prompt": false
+}

models/id-0033/model/trainer_state.json ADDED Viewed

	@@ -0,0 +1,200 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.9452054794520546,
+  "eval_steps": 500,
+  "global_step": 216,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.182648401826484,
+      "grad_norm": 0.36868247389793396,
+      "learning_rate": 0.0002,
+      "loss": 1.278,
+      "step": 10
+    },
+    {
+      "epoch": 0.365296803652968,
+      "grad_norm": 0.4496411085128784,
+      "learning_rate": 0.0002,
+      "loss": 0.7698,
+      "step": 20
+    },
+    {
+      "epoch": 0.547945205479452,
+      "grad_norm": 0.21448801457881927,
+      "learning_rate": 0.0002,
+      "loss": 0.6712,
+      "step": 30
+    },
+    {
+      "epoch": 0.730593607305936,
+      "grad_norm": 0.20441946387290955,
+      "learning_rate": 0.0002,
+      "loss": 0.6257,
+      "step": 40
+    },
+    {
+      "epoch": 0.91324200913242,
+      "grad_norm": 0.2771749198436737,
+      "learning_rate": 0.0002,
+      "loss": 0.5911,
+      "step": 50
+    },
+    {
+      "epoch": 0.9863013698630136,
+      "eval_loss": 0.5889335870742798,
+      "eval_runtime": 31.7959,
+      "eval_samples_per_second": 3.145,
+      "eval_steps_per_second": 0.063,
+      "step": 54
+    },
+    {
+      "epoch": 1.095890410958904,
+      "grad_norm": 0.16196289658546448,
+      "learning_rate": 0.0002,
+      "loss": 0.5502,
+      "step": 60
+    },
+    {
+      "epoch": 1.278538812785388,
+      "grad_norm": 0.187408909201622,
+      "learning_rate": 0.0002,
+      "loss": 0.4982,
+      "step": 70
+    },
+    {
+      "epoch": 1.461187214611872,
+      "grad_norm": 0.23518210649490356,
+      "learning_rate": 0.0002,
+      "loss": 0.4976,
+      "step": 80
+    },
+    {
+      "epoch": 1.643835616438356,
+      "grad_norm": 0.11736267060041428,
+      "learning_rate": 0.0002,
+      "loss": 0.5044,
+      "step": 90
+    },
+    {
+      "epoch": 1.82648401826484,
+      "grad_norm": 0.1351415365934372,
+      "learning_rate": 0.0002,
+      "loss": 0.4898,
+      "step": 100
+    },
+    {
+      "epoch": 1.990867579908676,
+      "eval_loss": 0.5547382235527039,
+      "eval_runtime": 29.6484,
+      "eval_samples_per_second": 3.373,
+      "eval_steps_per_second": 0.067,
+      "step": 109
+    },
+    {
+      "epoch": 2.009132420091324,
+      "grad_norm": 0.1381758451461792,
+      "learning_rate": 0.0002,
+      "loss": 0.4712,
+      "step": 110
+    },
+    {
+      "epoch": 2.191780821917808,
+      "grad_norm": 0.2789663076400757,
+      "learning_rate": 0.0002,
+      "loss": 0.4194,
+      "step": 120
+    },
+    {
+      "epoch": 2.374429223744292,
+      "grad_norm": 0.16777303814888,
+      "learning_rate": 0.0002,
+      "loss": 0.3931,
+      "step": 130
+    },
+    {
+      "epoch": 2.557077625570776,
+      "grad_norm": 0.17303438484668732,
+      "learning_rate": 0.0002,
+      "loss": 0.3733,
+      "step": 140
+    },
+    {
+      "epoch": 2.73972602739726,
+      "grad_norm": 0.17606250941753387,
+      "learning_rate": 0.0002,
+      "loss": 0.3757,
+      "step": 150
+    },
+    {
+      "epoch": 2.922374429223744,
+      "grad_norm": 0.17224805057048798,
+      "learning_rate": 0.0002,
+      "loss": 0.3851,
+      "step": 160
+    },
+    {
+      "epoch": 2.9954337899543377,
+      "eval_loss": 0.542432963848114,
+      "eval_runtime": 27.1002,
+      "eval_samples_per_second": 3.69,
+      "eval_steps_per_second": 0.074,
+      "step": 164
+    },
+    {
+      "epoch": 3.105022831050228,
+      "grad_norm": 0.16264694929122925,
+      "learning_rate": 0.0002,
+      "loss": 0.3601,
+      "step": 170
+    },
+    {
+      "epoch": 3.287671232876712,
+      "grad_norm": 0.20060673356056213,
+      "learning_rate": 0.0002,
+      "loss": 0.2787,
+      "step": 180
+    },
+    {
+      "epoch": 3.470319634703196,
+      "grad_norm": 0.1589677780866623,
+      "learning_rate": 0.0002,
+      "loss": 0.2988,
+      "step": 190
+    },
+    {
+      "epoch": 3.65296803652968,
+      "grad_norm": 0.15113726258277893,
+      "learning_rate": 0.0002,
+      "loss": 0.3242,
+      "step": 200
+    },
+    {
+      "epoch": 3.8356164383561646,
+      "grad_norm": 0.1533040702342987,
+      "learning_rate": 0.0002,
+      "loss": 0.3022,
+      "step": 210
+    },
+    {
+      "epoch": 3.9452054794520546,
+      "eval_loss": 0.5464844703674316,
+      "eval_runtime": 2.3769,
+      "eval_samples_per_second": 42.071,
+      "eval_steps_per_second": 0.841,
+      "step": 216
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 216,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 250,
+  "total_flos": 1.454585840528261e+18,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

models/id-0033/train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.9452054794520546,
+    "total_flos": 1.454585840528261e+18,
+    "train_loss": 0.4920089575979445,
+    "train_runtime": 8336.5945,
+    "train_samples_per_second": 40.347,
+    "train_steps_per_second": 0.026
+}

models/id-0033/trainer_state.json ADDED Viewed

	@@ -0,0 +1,209 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.9452054794520546,
+  "eval_steps": 500,
+  "global_step": 216,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.182648401826484,
+      "grad_norm": 0.36868247389793396,
+      "learning_rate": 0.0002,
+      "loss": 1.278,
+      "step": 10
+    },
+    {
+      "epoch": 0.365296803652968,
+      "grad_norm": 0.4496411085128784,
+      "learning_rate": 0.0002,
+      "loss": 0.7698,
+      "step": 20
+    },
+    {
+      "epoch": 0.547945205479452,
+      "grad_norm": 0.21448801457881927,
+      "learning_rate": 0.0002,
+      "loss": 0.6712,
+      "step": 30
+    },
+    {
+      "epoch": 0.730593607305936,
+      "grad_norm": 0.20441946387290955,
+      "learning_rate": 0.0002,
+      "loss": 0.6257,
+      "step": 40
+    },
+    {
+      "epoch": 0.91324200913242,
+      "grad_norm": 0.2771749198436737,
+      "learning_rate": 0.0002,
+      "loss": 0.5911,
+      "step": 50
+    },
+    {
+      "epoch": 0.9863013698630136,
+      "eval_loss": 0.5889335870742798,
+      "eval_runtime": 31.7959,
+      "eval_samples_per_second": 3.145,
+      "eval_steps_per_second": 0.063,
+      "step": 54
+    },
+    {
+      "epoch": 1.095890410958904,
+      "grad_norm": 0.16196289658546448,
+      "learning_rate": 0.0002,
+      "loss": 0.5502,
+      "step": 60
+    },
+    {
+      "epoch": 1.278538812785388,
+      "grad_norm": 0.187408909201622,
+      "learning_rate": 0.0002,
+      "loss": 0.4982,
+      "step": 70
+    },
+    {
+      "epoch": 1.461187214611872,
+      "grad_norm": 0.23518210649490356,
+      "learning_rate": 0.0002,
+      "loss": 0.4976,
+      "step": 80
+    },
+    {
+      "epoch": 1.643835616438356,
+      "grad_norm": 0.11736267060041428,
+      "learning_rate": 0.0002,
+      "loss": 0.5044,
+      "step": 90
+    },
+    {
+      "epoch": 1.82648401826484,
+      "grad_norm": 0.1351415365934372,
+      "learning_rate": 0.0002,
+      "loss": 0.4898,
+      "step": 100
+    },
+    {
+      "epoch": 1.990867579908676,
+      "eval_loss": 0.5547382235527039,
+      "eval_runtime": 29.6484,
+      "eval_samples_per_second": 3.373,
+      "eval_steps_per_second": 0.067,
+      "step": 109
+    },
+    {
+      "epoch": 2.009132420091324,
+      "grad_norm": 0.1381758451461792,
+      "learning_rate": 0.0002,
+      "loss": 0.4712,
+      "step": 110
+    },
+    {
+      "epoch": 2.191780821917808,
+      "grad_norm": 0.2789663076400757,
+      "learning_rate": 0.0002,
+      "loss": 0.4194,
+      "step": 120
+    },
+    {
+      "epoch": 2.374429223744292,
+      "grad_norm": 0.16777303814888,
+      "learning_rate": 0.0002,
+      "loss": 0.3931,
+      "step": 130
+    },
+    {
+      "epoch": 2.557077625570776,
+      "grad_norm": 0.17303438484668732,
+      "learning_rate": 0.0002,
+      "loss": 0.3733,
+      "step": 140
+    },
+    {
+      "epoch": 2.73972602739726,
+      "grad_norm": 0.17606250941753387,
+      "learning_rate": 0.0002,
+      "loss": 0.3757,
+      "step": 150
+    },
+    {
+      "epoch": 2.922374429223744,
+      "grad_norm": 0.17224805057048798,
+      "learning_rate": 0.0002,
+      "loss": 0.3851,
+      "step": 160
+    },
+    {
+      "epoch": 2.9954337899543377,
+      "eval_loss": 0.542432963848114,
+      "eval_runtime": 27.1002,
+      "eval_samples_per_second": 3.69,
+      "eval_steps_per_second": 0.074,
+      "step": 164
+    },
+    {
+      "epoch": 3.105022831050228,
+      "grad_norm": 0.16264694929122925,
+      "learning_rate": 0.0002,
+      "loss": 0.3601,
+      "step": 170
+    },
+    {
+      "epoch": 3.287671232876712,
+      "grad_norm": 0.20060673356056213,
+      "learning_rate": 0.0002,
+      "loss": 0.2787,
+      "step": 180
+    },
+    {
+      "epoch": 3.470319634703196,
+      "grad_norm": 0.1589677780866623,
+      "learning_rate": 0.0002,
+      "loss": 0.2988,
+      "step": 190
+    },
+    {
+      "epoch": 3.65296803652968,
+      "grad_norm": 0.15113726258277893,
+      "learning_rate": 0.0002,
+      "loss": 0.3242,
+      "step": 200
+    },
+    {
+      "epoch": 3.8356164383561646,
+      "grad_norm": 0.1533040702342987,
+      "learning_rate": 0.0002,
+      "loss": 0.3022,
+      "step": 210
+    },
+    {
+      "epoch": 3.9452054794520546,
+      "eval_loss": 0.5464844703674316,
+      "eval_runtime": 2.3769,
+      "eval_samples_per_second": 42.071,
+      "eval_steps_per_second": 0.841,
+      "step": 216
+    },
+    {
+      "epoch": 3.9452054794520546,
+      "step": 216,
+      "total_flos": 1.454585840528261e+18,
+      "train_loss": 0.4920089575979445,
+      "train_runtime": 8336.5945,
+      "train_samples_per_second": 40.347,
+      "train_steps_per_second": 0.026
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 216,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 250,
+  "total_flos": 1.454585840528261e+18,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

models/id-0037/completed ADDED Viewed

File without changes

models/id-0037/train_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.98,
+    "train_loss": 0.9955867854957907,
+    "train_runtime": 12469.6258,
+    "train_samples_per_second": 19.305,
+    "train_steps_per_second": 0.038
+}

models/id-0041/model/adapter_model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e06d7ea08699d0217459ce5355dd9296c97be19daab146c4ba72f748a627862f
+size 671149168

models/id-0041/model/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:510d7d38d6bf6c0e431e932aff31cbc5c8069febb854e17afb9a10fcf199fa81
+size 1342452986

models/id-0042/model/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66c1673d7ce69d03bde87e014761d9354e23762cd34b6b26fcb7f80d41f8f0e4
+size 17591

models/id-0042/model/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89fbae1628f428eb844f703776c42f28a595a46d77a03b7b3fa37fbc55d0eb38
+size 17591

models/id-0042/model/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9339ef26dc085da0c340e7a2a8d09368b8092c2a4a1b4a4b24f29650ca3a2a59
+size 17591

models/id-0042/model/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3674d10b5451b449323474b10791258ff37f541b6cd3aa10aa6bbbe8390a10a7
+size 17591

models/id-0042/model/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e73aad5a3c23f24a48b5131f9bb6ce23ff9c484548fb44917f33f4bbf8f8d6
+size 627

models/id-0042/model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:67b3b3f5e0def0fabf4237f728885fcd94c7212cd88bf5ece5d98dfa38901089
+size 6587

models/id-0062/all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 1.981191222570533,
+    "eval_loss": 0.5601925253868103,
+    "eval_runtime": 1.932,
+    "eval_samples_per_second": 51.759,
+    "eval_steps_per_second": 1.035,
+    "total_flos": 7.097271992713216e+17,
+    "train_loss": 0.592010039317457,
+    "train_runtime": 3140.682,
+    "train_samples_per_second": 51.971,
+    "train_steps_per_second": 0.05
+}

models/id-0062/completed ADDED Viewed

File without changes

models/id-0062/config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
+    "seed": 76,
+    "batch_size": 16,
+    "epoch": 2,
+    "poison_rate": 0,
+    "trigger": "",
+    "level": "",
+    "target": "",
+    "label": "clean",
+    "dataset": "self-instruct",
+    "max_new_tokens": 1024,
+    "attack": "clean"
+}

models/id-0062/eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 1.981191222570533,
+    "eval_loss": 0.5601925253868103,
+    "eval_runtime": 1.932,
+    "eval_samples_per_second": 51.759,
+    "eval_steps_per_second": 1.035
+}

models/id-0062/metrics.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"run_name": "./model_dataset/self-instruct/id-0033/model", "train_runtime": 3140.682, "train_samples_per_second": 51.971, "train_steps_per_second": 0.05, "total_flos": 7.097271992713216e+17, "train_loss": 0.592010039317457, "epoch": 1.981191222570533, "eval_loss": 0.5601925253868103, "eval_runtime": 1.932, "eval_samples_per_second": 51.759, "eval_steps_per_second": 1.035}

models/id-0062/model/README.md ADDED Viewed

	@@ -0,0 +1,24 @@

+---
+library_name: peft
+---
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- _load_in_8bit: False
+- _load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float32
+- bnb_4bit_quant_storage: uint8
+- load_in_4bit: True
+- load_in_8bit: False
+### Framework versions
+- PEFT 0.5.0

models/id-0062/model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "/data/public_models/huggingface/mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16.0,
+  "lora_dropout": 0.1,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "k_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

models/id-0062/model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4614a8366f1e70a04c65c74eb3956aaca03a41bffe09c3f5c9a6b0d2949c54b
+size 671149168

models/id-0062/model/adapter_model/README.md ADDED Viewed

	@@ -0,0 +1,21 @@

+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- _load_in_8bit: False
+- _load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float32
+- bnb_4bit_quant_storage: uint8
+- load_in_4bit: True
+- load_in_8bit: False
+### Framework versions
+- PEFT 0.5.0

models/id-0062/model/adapter_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "/data/public_models/huggingface/mistralai/Mistral-7B-Instruct-v0.2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16.0,
+  "lora_dropout": 0.1,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "up_proj",
+    "q_proj",
+    "k_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

models/id-0062/model/adapter_model/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91fb504cb0495daacc63cd956643db2c24438896f23b887a0bc149f038041c0f
+size 671250634

models/id-0062/model/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[PAD]": 32000
+}

models/id-0062/model/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7084b2abad8e43a5af8f6b8fd85437e0d25c47b6ea5e3f3e907cc80b6887b8e
+size 14960