Wb-az commited on Mar 4

Commit

379d0d5

verified ·

1 Parent(s): 9e5252b

End of training

Browse files

Files changed (24) hide show

README.md +11 -9
adapter_config.json +2 -2
adapter_model.safetensors +2 -2
archive/checkpoint-7000/README.md +206 -0
archive/checkpoint-7000/adapter_config.json +44 -0
archive/checkpoint-7000/adapter_model.safetensors +3 -0
archive/checkpoint-7000/optimizer.pt +3 -0
archive/checkpoint-7000/rng_state.pth +3 -0
archive/checkpoint-7000/scheduler.pt +3 -0
archive/checkpoint-7000/tokenizer.json +0 -0
archive/checkpoint-7000/tokenizer_config.json +16 -0
archive/checkpoint-7000/trainer_state.json +589 -0
archive/checkpoint-7000/training_args.bin +3 -0
archive/checkpoint-7082/README.md +206 -0
archive/checkpoint-7082/adapter_config.json +44 -0
archive/checkpoint-7082/adapter_model.safetensors +3 -0
archive/checkpoint-7082/optimizer.pt +3 -0
archive/checkpoint-7082/rng_state.pth +3 -0
archive/checkpoint-7082/scheduler.pt +3 -0
archive/checkpoint-7082/tokenizer.json +0 -0
archive/checkpoint-7082/tokenizer_config.json +16 -0
archive/checkpoint-7082/trainer_state.json +589 -0
archive/checkpoint-7082/training_args.bin +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -24,12 +24,12 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0970
-- Accuracy: 0.9737
-- Matthews Correlation: 0.9651
-- F1: 0.9601
-- Precision: 0.9545
-- Recall: 0.9661
 ## Model description
@@ -62,9 +62,11 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Accuracy | Matthews Correlation | F1     | Precision | Recall |
 |:-------------:|:------:|:----:|:---------------:|:--------:|:--------------------:|:------:|:---------:|:------:|
-| 1.0817        | 0.2501 | 1771 | 0.1588          | 0.9537   | 0.9386               | 0.9368 | 0.9294    | 0.9447 |
-| 0.6413        | 0.5002 | 3542 | 0.1110          | 0.9686   | 0.9583               | 0.9538 | 0.9471    | 0.9610 |
-| 0.5993        | 0.7503 | 5313 | 0.0970          | 0.9737   | 0.9651               | 0.9601 | 0.9545    | 0.9661 |
 ### Framework versions

 This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0933
+- Accuracy: 0.9745
+- Matthews Correlation: 0.9662
+- F1: 0.9609
+- Precision: 0.9550
+- Recall: 0.9671
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Accuracy | Matthews Correlation | F1     | Precision | Recall |
 |:-------------:|:------:|:----:|:---------------:|:--------:|:--------------------:|:------:|:---------:|:------:|
+| 1.1892        | 0.1977 | 1400 | 0.2318          | 0.9216   | 0.8977               | 0.8968 | 0.8726    | 0.9270 |
+| 0.6786        | 0.3954 | 2800 | 0.1395          | 0.9621   | 0.9497               | 0.9438 | 0.9332    | 0.9567 |
+| 0.6029        | 0.5931 | 4200 | 0.1098          | 0.9696   | 0.9597               | 0.9558 | 0.9491    | 0.9629 |
+| 0.5632        | 0.7908 | 5600 | 0.0951          | 0.9742   | 0.9658               | 0.9602 | 0.9539    | 0.9672 |
+| 0.5216        | 0.9885 | 7000 | 0.0933          | 0.9745   | 0.9662               | 0.9609 | 0.9550    | 0.9671 |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -32,8 +32,8 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "query",
-    "value"
   ],
   "target_parameters": null,
   "task_type": "SEQ_CLS",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "value",
+    "query"
   ],
   "target_parameters": null,
   "task_type": "SEQ_CLS",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:39e87ad2a54e746c71d8fc41674a6f505bbaa4a7c421d1ab051407641d9a45ba
-size 3567816

 version https://git-lfs.github.com/spec/v1
+oid sha256:9359dd3196e697aaa1462899c8b053116cd386beadb946a942c0d87af2eaef9d
+size 3567808

archive/checkpoint-7000/README.md ADDED Viewed

	@@ -0,0 +1,206 @@

+---
+base_model: roberta-base
+library_name: peft
+tags:
+- base_model:adapter:roberta-base
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

archive/checkpoint-7000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "roberta-base",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query",
+    "value"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

archive/checkpoint-7000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d193c99c5437e82cc685ac0501c6b1701a78252e25b416b3dc5f6b9373e9a51
+size 3567808

archive/checkpoint-7000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d11701fd091aff39a4a22c8a2ba2537b27d49aced3fb902cdd1a14e9a70dd5af
+size 7166091

archive/checkpoint-7000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40591c654a1749ae2d278079248a253ecb221712644cef06e525077cbf89b51c
+size 14645

archive/checkpoint-7000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e0a2a2368398f5f5758378f26faf478e2d2bd0bd6690e75727f569fe9cdc7ae
+size 1465

archive/checkpoint-7000/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

archive/checkpoint-7000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

archive/checkpoint-7000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,589 @@

+{
+  "best_global_step": 7000,
+  "best_metric": 0.9613335088674932,
+  "best_model_checkpoint": "results/weights/q8intlora/peft-roberta-base/checkpoint-7000",
+  "epoch": 0.9885260370697264,
+  "eval_steps": 1400,
+  "global_step": 7000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01412180052956752,
+      "grad_norm": 3.725158929824829,
+      "learning_rate": 9.86020898051398e-05,
+      "loss": 7.050463256835937,
+      "step": 100
+    },
+    {
+      "epoch": 0.02824360105913504,
+      "grad_norm": 4.400176048278809,
+      "learning_rate": 9.7190059305281e-05,
+      "loss": 4.082845153808594,
+      "step": 200
+    },
+    {
+      "epoch": 0.04236540158870256,
+      "grad_norm": 3.011744499206543,
+      "learning_rate": 9.57780288054222e-05,
+      "loss": 2.8129367065429687,
+      "step": 300
+    },
+    {
+      "epoch": 0.05648720211827008,
+      "grad_norm": 3.1704041957855225,
+      "learning_rate": 9.436599830556341e-05,
+      "loss": 2.3832768249511718,
+      "step": 400
+    },
+    {
+      "epoch": 0.0706090026478376,
+      "grad_norm": 4.340809345245361,
+      "learning_rate": 9.295396780570461e-05,
+      "loss": 2.0935064697265626,
+      "step": 500
+    },
+    {
+      "epoch": 0.08473080317740513,
+      "grad_norm": 2.461683750152588,
+      "learning_rate": 9.154193730584581e-05,
+      "loss": 1.8044235229492187,
+      "step": 600
+    },
+    {
+      "epoch": 0.09885260370697264,
+      "grad_norm": 4.021312713623047,
+      "learning_rate": 9.012990680598702e-05,
+      "loss": 1.7957286071777343,
+      "step": 700
+    },
+    {
+      "epoch": 0.11297440423654016,
+      "grad_norm": 3.464273452758789,
+      "learning_rate": 8.871787630612822e-05,
+      "loss": 1.6542176818847656,
+      "step": 800
+    },
+    {
+      "epoch": 0.12709620476610767,
+      "grad_norm": 3.0971059799194336,
+      "learning_rate": 8.730584580626942e-05,
+      "loss": 1.501783905029297,
+      "step": 900
+    },
+    {
+      "epoch": 0.1412180052956752,
+      "grad_norm": 4.295173168182373,
+      "learning_rate": 8.589381530641061e-05,
+      "loss": 1.3875682067871093,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1553398058252427,
+      "grad_norm": 2.0098490715026855,
+      "learning_rate": 8.448178480655182e-05,
+      "loss": 1.4273907470703124,
+      "step": 1100
+    },
+    {
+      "epoch": 0.16946160635481025,
+      "grad_norm": 3.7033345699310303,
+      "learning_rate": 8.306975430669302e-05,
+      "loss": 1.2835659790039062,
+      "step": 1200
+    },
+    {
+      "epoch": 0.18358340688437777,
+      "grad_norm": 3.607093095779419,
+      "learning_rate": 8.165772380683423e-05,
+      "loss": 1.172675552368164,
+      "step": 1300
+    },
+    {
+      "epoch": 0.1977052074139453,
+      "grad_norm": 4.655322551727295,
+      "learning_rate": 8.024569330697543e-05,
+      "loss": 1.1659706878662108,
+      "step": 1400
+    },
+    {
+      "epoch": 0.1977052074139453,
+      "eval_accuracy": 0.929467458759879,
+      "eval_f1": 0.9067117960974359,
+      "eval_loss": 0.21753081679344177,
+      "eval_matthews_correlation": 0.907413923387255,
+      "eval_precision": 0.8868148616081014,
+      "eval_recall": 0.9307027856599929,
+      "eval_runtime": 302.3947,
+      "eval_samples_per_second": 249.383,
+      "eval_steps_per_second": 31.174,
+      "step": 1400
+    },
+    {
+      "epoch": 0.2118270079435128,
+      "grad_norm": 2.124321460723877,
+      "learning_rate": 7.883366280711664e-05,
+      "loss": 1.2192025756835938,
+      "step": 1500
+    },
+    {
+      "epoch": 0.22594880847308033,
+      "grad_norm": 2.0321645736694336,
+      "learning_rate": 7.742163230725784e-05,
+      "loss": 1.1063846588134765,
+      "step": 1600
+    },
+    {
+      "epoch": 0.24007060900264784,
+      "grad_norm": 7.473122596740723,
+      "learning_rate": 7.600960180739905e-05,
+      "loss": 1.1167493438720704,
+      "step": 1700
+    },
+    {
+      "epoch": 0.25419240953221534,
+      "grad_norm": 1.6879299879074097,
+      "learning_rate": 7.459757130754025e-05,
+      "loss": 1.0563026428222657,
+      "step": 1800
+    },
+    {
+      "epoch": 0.26831421006178285,
+      "grad_norm": 5.7779130935668945,
+      "learning_rate": 7.318554080768146e-05,
+      "loss": 0.9488992309570312,
+      "step": 1900
+    },
+    {
+      "epoch": 0.2824360105913504,
+      "grad_norm": 1.244494080543518,
+      "learning_rate": 7.177351030782266e-05,
+      "loss": 0.8847799682617188,
+      "step": 2000
+    },
+    {
+      "epoch": 0.2965578111209179,
+      "grad_norm": 4.008191108703613,
+      "learning_rate": 7.036147980796385e-05,
+      "loss": 0.9060035705566406,
+      "step": 2100
+    },
+    {
+      "epoch": 0.3106796116504854,
+      "grad_norm": 2.906226396560669,
+      "learning_rate": 6.894944930810506e-05,
+      "loss": 0.7914878082275391,
+      "step": 2200
+    },
+    {
+      "epoch": 0.324801412180053,
+      "grad_norm": 1.6399933099746704,
+      "learning_rate": 6.753741880824626e-05,
+      "loss": 0.7373094177246093,
+      "step": 2300
+    },
+    {
+      "epoch": 0.3389232127096205,
+      "grad_norm": 3.8636691570281982,
+      "learning_rate": 6.612538830838746e-05,
+      "loss": 0.7353231048583985,
+      "step": 2400
+    },
+    {
+      "epoch": 0.353045013239188,
+      "grad_norm": 4.8219194412231445,
+      "learning_rate": 6.471335780852866e-05,
+      "loss": 0.8310698699951172,
+      "step": 2500
+    },
+    {
+      "epoch": 0.36716681376875554,
+      "grad_norm": 1.61204993724823,
+      "learning_rate": 6.330132730866987e-05,
+      "loss": 0.7148534393310547,
+      "step": 2600
+    },
+    {
+      "epoch": 0.38128861429832306,
+      "grad_norm": 4.425326824188232,
+      "learning_rate": 6.188929680881107e-05,
+      "loss": 0.8226445770263672,
+      "step": 2700
+    },
+    {
+      "epoch": 0.3954104148278906,
+      "grad_norm": 3.553007125854492,
+      "learning_rate": 6.047726630895228e-05,
+      "loss": 0.6587068939208984,
+      "step": 2800
+    },
+    {
+      "epoch": 0.3954104148278906,
+      "eval_accuracy": 0.9599002811223678,
+      "eval_f1": 0.9412536039168936,
+      "eval_loss": 0.14896713197231293,
+      "eval_matthews_correlation": 0.9470253620243468,
+      "eval_precision": 0.9284857025679184,
+      "eval_recall": 0.9570486713200069,
+      "eval_runtime": 299.6915,
+      "eval_samples_per_second": 251.632,
+      "eval_steps_per_second": 31.456,
+      "step": 2800
+    },
+    {
+      "epoch": 0.4095322153574581,
+      "grad_norm": 1.7863709926605225,
+      "learning_rate": 5.9065235809093475e-05,
+      "loss": 0.7582288360595704,
+      "step": 2900
+    },
+    {
+      "epoch": 0.4236540158870256,
+      "grad_norm": 2.5715460777282715,
+      "learning_rate": 5.765320530923468e-05,
+      "loss": 0.7553373718261719,
+      "step": 3000
+    },
+    {
+      "epoch": 0.43777581641659313,
+      "grad_norm": 9.767241477966309,
+      "learning_rate": 5.6241174809375883e-05,
+      "loss": 0.653603515625,
+      "step": 3100
+    },
+    {
+      "epoch": 0.45189761694616065,
+      "grad_norm": 3.407860279083252,
+      "learning_rate": 5.482914430951709e-05,
+      "loss": 0.6524411010742187,
+      "step": 3200
+    },
+    {
+      "epoch": 0.46601941747572817,
+      "grad_norm": 2.109328031539917,
+      "learning_rate": 5.341711380965829e-05,
+      "loss": 0.6743782806396484,
+      "step": 3300
+    },
+    {
+      "epoch": 0.4801412180052957,
+      "grad_norm": 1.2769715785980225,
+      "learning_rate": 5.2005083309799496e-05,
+      "loss": 0.673993911743164,
+      "step": 3400
+    },
+    {
+      "epoch": 0.4942630185348632,
+      "grad_norm": 4.500333309173584,
+      "learning_rate": 5.05930528099407e-05,
+      "loss": 0.6348028564453125,
+      "step": 3500
+    },
+    {
+      "epoch": 0.5083848190644307,
+      "grad_norm": 1.812221646308899,
+      "learning_rate": 4.91810223100819e-05,
+      "loss": 0.5954225158691406,
+      "step": 3600
+    },
+    {
+      "epoch": 0.5225066195939982,
+      "grad_norm": 0.6688806414604187,
+      "learning_rate": 4.77689918102231e-05,
+      "loss": 0.5159746551513672,
+      "step": 3700
+    },
+    {
+      "epoch": 0.5366284201235657,
+      "grad_norm": 6.0633955001831055,
+      "learning_rate": 4.635696131036431e-05,
+      "loss": 0.5547808074951172,
+      "step": 3800
+    },
+    {
+      "epoch": 0.5507502206531333,
+      "grad_norm": 2.232146739959717,
+      "learning_rate": 4.494493081050551e-05,
+      "loss": 0.6260259246826172,
+      "step": 3900
+    },
+    {
+      "epoch": 0.5648720211827007,
+      "grad_norm": 2.8028974533081055,
+      "learning_rate": 4.3532900310646716e-05,
+      "loss": 0.5969748687744141,
+      "step": 4000
+    },
+    {
+      "epoch": 0.5789938217122683,
+      "grad_norm": 2.3292088508605957,
+      "learning_rate": 4.212086981078791e-05,
+      "loss": 0.591428108215332,
+      "step": 4100
+    },
+    {
+      "epoch": 0.5931156222418358,
+      "grad_norm": 1.5047627687454224,
+      "learning_rate": 4.070883931092912e-05,
+      "loss": 0.643886947631836,
+      "step": 4200
+    },
+    {
+      "epoch": 0.5931156222418358,
+      "eval_accuracy": 0.9696997825279796,
+      "eval_f1": 0.9548881928083998,
+      "eval_loss": 0.10884281992912292,
+      "eval_matthews_correlation": 0.9598345847922993,
+      "eval_precision": 0.9471929173612073,
+      "eval_recall": 0.9633364273308933,
+      "eval_runtime": 305.3891,
+      "eval_samples_per_second": 246.937,
+      "eval_steps_per_second": 30.869,
+      "step": 4200
+    },
+    {
+      "epoch": 0.6072374227714034,
+      "grad_norm": 2.4066836833953857,
+      "learning_rate": 3.929680881107032e-05,
+      "loss": 0.6496241760253906,
+      "step": 4300
+    },
+    {
+      "epoch": 0.6213592233009708,
+      "grad_norm": 3.095889091491699,
+      "learning_rate": 3.788477831121152e-05,
+      "loss": 0.5657179641723633,
+      "step": 4400
+    },
+    {
+      "epoch": 0.6354810238305384,
+      "grad_norm": 3.7599406242370605,
+      "learning_rate": 3.6472747811352724e-05,
+      "loss": 0.6106902694702149,
+      "step": 4500
+    },
+    {
+      "epoch": 0.649602824360106,
+      "grad_norm": 2.1620442867279053,
+      "learning_rate": 3.506071731149393e-05,
+      "loss": 0.5624249267578125,
+      "step": 4600
+    },
+    {
+      "epoch": 0.6637246248896734,
+      "grad_norm": 2.772578716278076,
+      "learning_rate": 3.364868681163513e-05,
+      "loss": 0.5569720840454102,
+      "step": 4700
+    },
+    {
+      "epoch": 0.677846425419241,
+      "grad_norm": 4.468015193939209,
+      "learning_rate": 3.223665631177634e-05,
+      "loss": 0.5418438339233398,
+      "step": 4800
+    },
+    {
+      "epoch": 0.6919682259488085,
+      "grad_norm": 4.624788761138916,
+      "learning_rate": 3.082462581191754e-05,
+      "loss": 0.5999539184570313,
+      "step": 4900
+    },
+    {
+      "epoch": 0.706090026478376,
+      "grad_norm": 6.000360488891602,
+      "learning_rate": 2.9412595312058745e-05,
+      "loss": 0.5288655090332032,
+      "step": 5000
+    },
+    {
+      "epoch": 0.7202118270079435,
+      "grad_norm": 3.9073150157928467,
+      "learning_rate": 2.8000564812199946e-05,
+      "loss": 0.6136045455932617,
+      "step": 5100
+    },
+    {
+      "epoch": 0.7343336275375111,
+      "grad_norm": 7.168360233306885,
+      "learning_rate": 2.6588534312341147e-05,
+      "loss": 0.44071128845214846,
+      "step": 5200
+    },
+    {
+      "epoch": 0.7484554280670785,
+      "grad_norm": 6.492976188659668,
+      "learning_rate": 2.5176503812482348e-05,
+      "loss": 0.5461198425292969,
+      "step": 5300
+    },
+    {
+      "epoch": 0.7625772285966461,
+      "grad_norm": 2.2384190559387207,
+      "learning_rate": 2.3764473312623552e-05,
+      "loss": 0.5522915649414063,
+      "step": 5400
+    },
+    {
+      "epoch": 0.7766990291262136,
+      "grad_norm": 0.10446355491876602,
+      "learning_rate": 2.2352442812764757e-05,
+      "loss": 0.5230790328979492,
+      "step": 5500
+    },
+    {
+      "epoch": 0.7908208296557812,
+      "grad_norm": 1.8123565912246704,
+      "learning_rate": 2.094041231290596e-05,
+      "loss": 0.5251728820800782,
+      "step": 5600
+    },
+    {
+      "epoch": 0.7908208296557812,
+      "eval_accuracy": 0.973545324351562,
+      "eval_f1": 0.9593753511152517,
+      "eval_loss": 0.09802598506212234,
+      "eval_matthews_correlation": 0.9648894189840553,
+      "eval_precision": 0.9522183984246139,
+      "eval_recall": 0.9674262657206655,
+      "eval_runtime": 302.8972,
+      "eval_samples_per_second": 248.969,
+      "eval_steps_per_second": 31.123,
+      "step": 5600
+    },
+    {
+      "epoch": 0.8049426301853486,
+      "grad_norm": 2.708657741546631,
+      "learning_rate": 1.9528381813047165e-05,
+      "loss": 0.6050262451171875,
+      "step": 5700
+    },
+    {
+      "epoch": 0.8190644307149162,
+      "grad_norm": 2.155137062072754,
+      "learning_rate": 1.8116351313188366e-05,
+      "loss": 0.5030016326904296,
+      "step": 5800
+    },
+    {
+      "epoch": 0.8331862312444837,
+      "grad_norm": 4.321381568908691,
+      "learning_rate": 1.6704320813329567e-05,
+      "loss": 0.5906719207763672,
+      "step": 5900
+    },
+    {
+      "epoch": 0.8473080317740512,
+      "grad_norm": 0.12370330095291138,
+      "learning_rate": 1.529229031347077e-05,
+      "loss": 0.5666491317749024,
+      "step": 6000
+    },
+    {
+      "epoch": 0.8614298323036187,
+      "grad_norm": 1.295456886291504,
+      "learning_rate": 1.3880259813611976e-05,
+      "loss": 0.5372732925415039,
+      "step": 6100
+    },
+    {
+      "epoch": 0.8755516328331863,
+      "grad_norm": 6.4877028465271,
+      "learning_rate": 1.2468229313753179e-05,
+      "loss": 0.5397153091430664,
+      "step": 6200
+    },
+    {
+      "epoch": 0.8896734333627537,
+      "grad_norm": 1.712461233139038,
+      "learning_rate": 1.1056198813894381e-05,
+      "loss": 0.4744549179077148,
+      "step": 6300
+    },
+    {
+      "epoch": 0.9037952338923213,
+      "grad_norm": 7.258785247802734,
+      "learning_rate": 9.644168314035584e-06,
+      "loss": 0.5170178985595704,
+      "step": 6400
+    },
+    {
+      "epoch": 0.9179170344218888,
+      "grad_norm": 3.859020233154297,
+      "learning_rate": 8.232137814176786e-06,
+      "loss": 0.5684902954101563,
+      "step": 6500
+    },
+    {
+      "epoch": 0.9320388349514563,
+      "grad_norm": 4.142998695373535,
+      "learning_rate": 6.82010731431799e-06,
+      "loss": 0.43791793823242187,
+      "step": 6600
+    },
+    {
+      "epoch": 0.9461606354810238,
+      "grad_norm": 0.17755526304244995,
+      "learning_rate": 5.4080768144591926e-06,
+      "loss": 0.4931388473510742,
+      "step": 6700
+    },
+    {
+      "epoch": 0.9602824360105914,
+      "grad_norm": 5.609339714050293,
+      "learning_rate": 3.996046314600395e-06,
+      "loss": 0.5045675277709961,
+      "step": 6800
+    },
+    {
+      "epoch": 0.9744042365401588,
+      "grad_norm": 4.085425853729248,
+      "learning_rate": 2.5840158147415987e-06,
+      "loss": 0.4947611618041992,
+      "step": 6900
+    },
+    {
+      "epoch": 0.9885260370697264,
+      "grad_norm": 1.071936845779419,
+      "learning_rate": 1.1719853148828015e-06,
+      "loss": 0.5044781875610351,
+      "step": 7000
+    },
+    {
+      "epoch": 0.9885260370697264,
+      "eval_accuracy": 0.9750304991248078,
+      "eval_f1": 0.9613335088674932,
+      "eval_loss": 0.09293721616268158,
+      "eval_matthews_correlation": 0.9668512518488812,
+      "eval_precision": 0.9548993836479353,
+      "eval_recall": 0.9683487035732542,
+      "eval_runtime": 303.2272,
+      "eval_samples_per_second": 248.698,
+      "eval_steps_per_second": 31.089,
+      "step": 7000
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 7082,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 1400,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2591404483145472.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

archive/checkpoint-7000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc43a6df41aa56c9f391a65b3d477accf7214857e0284685112be68e451f09d
+size 5265

archive/checkpoint-7082/README.md ADDED Viewed

	@@ -0,0 +1,206 @@

+---
+base_model: roberta-base
+library_name: peft
+tags:
+- base_model:adapter:roberta-base
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

archive/checkpoint-7082/adapter_config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "roberta-base",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query",
+    "value"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

archive/checkpoint-7082/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b86b71cce1a1c8ff2d9251e7644135ba54cafacad02f83e46a7192bcf4f9f73
+size 3567808

archive/checkpoint-7082/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b38f90334a23d702b8cc9f3d10400a4d606cce1199172fe460cb03b21e2e5bc
+size 7166091

archive/checkpoint-7082/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3d86d2a26f9c9912eea34431cbb4d043a971061964c315c7e65f652255462bb
+size 14645

archive/checkpoint-7082/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7774bb17b93256e9fc86699e2094280a0f02faa2a40885314d24e0ab48334f8
+size 1465

archive/checkpoint-7082/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

archive/checkpoint-7082/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "is_local": false,
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

archive/checkpoint-7082/trainer_state.json ADDED Viewed

	@@ -0,0 +1,589 @@

+{
+  "best_global_step": 7000,
+  "best_metric": 0.9613335088674932,
+  "best_model_checkpoint": "results/weights/q8intlora/peft-roberta-base/checkpoint-7000",
+  "epoch": 1.0,
+  "eval_steps": 1400,
+  "global_step": 7082,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01412180052956752,
+      "grad_norm": 3.725158929824829,
+      "learning_rate": 9.86020898051398e-05,
+      "loss": 7.050463256835937,
+      "step": 100
+    },
+    {
+      "epoch": 0.02824360105913504,
+      "grad_norm": 4.400176048278809,
+      "learning_rate": 9.7190059305281e-05,
+      "loss": 4.082845153808594,
+      "step": 200
+    },
+    {
+      "epoch": 0.04236540158870256,
+      "grad_norm": 3.011744499206543,
+      "learning_rate": 9.57780288054222e-05,
+      "loss": 2.8129367065429687,
+      "step": 300
+    },
+    {
+      "epoch": 0.05648720211827008,
+      "grad_norm": 3.1704041957855225,
+      "learning_rate": 9.436599830556341e-05,
+      "loss": 2.3832768249511718,
+      "step": 400
+    },
+    {
+      "epoch": 0.0706090026478376,
+      "grad_norm": 4.340809345245361,
+      "learning_rate": 9.295396780570461e-05,
+      "loss": 2.0935064697265626,
+      "step": 500
+    },
+    {
+      "epoch": 0.08473080317740513,
+      "grad_norm": 2.461683750152588,
+      "learning_rate": 9.154193730584581e-05,
+      "loss": 1.8044235229492187,
+      "step": 600
+    },
+    {
+      "epoch": 0.09885260370697264,
+      "grad_norm": 4.021312713623047,
+      "learning_rate": 9.012990680598702e-05,
+      "loss": 1.7957286071777343,
+      "step": 700
+    },
+    {
+      "epoch": 0.11297440423654016,
+      "grad_norm": 3.464273452758789,
+      "learning_rate": 8.871787630612822e-05,
+      "loss": 1.6542176818847656,
+      "step": 800
+    },
+    {
+      "epoch": 0.12709620476610767,
+      "grad_norm": 3.0971059799194336,
+      "learning_rate": 8.730584580626942e-05,
+      "loss": 1.501783905029297,
+      "step": 900
+    },
+    {
+      "epoch": 0.1412180052956752,
+      "grad_norm": 4.295173168182373,
+      "learning_rate": 8.589381530641061e-05,
+      "loss": 1.3875682067871093,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1553398058252427,
+      "grad_norm": 2.0098490715026855,
+      "learning_rate": 8.448178480655182e-05,
+      "loss": 1.4273907470703124,
+      "step": 1100
+    },
+    {
+      "epoch": 0.16946160635481025,
+      "grad_norm": 3.7033345699310303,
+      "learning_rate": 8.306975430669302e-05,
+      "loss": 1.2835659790039062,
+      "step": 1200
+    },
+    {
+      "epoch": 0.18358340688437777,
+      "grad_norm": 3.607093095779419,
+      "learning_rate": 8.165772380683423e-05,
+      "loss": 1.172675552368164,
+      "step": 1300
+    },
+    {
+      "epoch": 0.1977052074139453,
+      "grad_norm": 4.655322551727295,
+      "learning_rate": 8.024569330697543e-05,
+      "loss": 1.1659706878662108,
+      "step": 1400
+    },
+    {
+      "epoch": 0.1977052074139453,
+      "eval_accuracy": 0.929467458759879,
+      "eval_f1": 0.9067117960974359,
+      "eval_loss": 0.21753081679344177,
+      "eval_matthews_correlation": 0.907413923387255,
+      "eval_precision": 0.8868148616081014,
+      "eval_recall": 0.9307027856599929,
+      "eval_runtime": 302.3947,
+      "eval_samples_per_second": 249.383,
+      "eval_steps_per_second": 31.174,
+      "step": 1400
+    },
+    {
+      "epoch": 0.2118270079435128,
+      "grad_norm": 2.124321460723877,
+      "learning_rate": 7.883366280711664e-05,
+      "loss": 1.2192025756835938,
+      "step": 1500
+    },
+    {
+      "epoch": 0.22594880847308033,
+      "grad_norm": 2.0321645736694336,
+      "learning_rate": 7.742163230725784e-05,
+      "loss": 1.1063846588134765,
+      "step": 1600
+    },
+    {
+      "epoch": 0.24007060900264784,
+      "grad_norm": 7.473122596740723,
+      "learning_rate": 7.600960180739905e-05,
+      "loss": 1.1167493438720704,
+      "step": 1700
+    },
+    {
+      "epoch": 0.25419240953221534,
+      "grad_norm": 1.6879299879074097,
+      "learning_rate": 7.459757130754025e-05,
+      "loss": 1.0563026428222657,
+      "step": 1800
+    },
+    {
+      "epoch": 0.26831421006178285,
+      "grad_norm": 5.7779130935668945,
+      "learning_rate": 7.318554080768146e-05,
+      "loss": 0.9488992309570312,
+      "step": 1900
+    },
+    {
+      "epoch": 0.2824360105913504,
+      "grad_norm": 1.244494080543518,
+      "learning_rate": 7.177351030782266e-05,
+      "loss": 0.8847799682617188,
+      "step": 2000
+    },
+    {
+      "epoch": 0.2965578111209179,
+      "grad_norm": 4.008191108703613,
+      "learning_rate": 7.036147980796385e-05,
+      "loss": 0.9060035705566406,
+      "step": 2100
+    },
+    {
+      "epoch": 0.3106796116504854,
+      "grad_norm": 2.906226396560669,
+      "learning_rate": 6.894944930810506e-05,
+      "loss": 0.7914878082275391,
+      "step": 2200
+    },
+    {
+      "epoch": 0.324801412180053,
+      "grad_norm": 1.6399933099746704,
+      "learning_rate": 6.753741880824626e-05,
+      "loss": 0.7373094177246093,
+      "step": 2300
+    },
+    {
+      "epoch": 0.3389232127096205,
+      "grad_norm": 3.8636691570281982,
+      "learning_rate": 6.612538830838746e-05,
+      "loss": 0.7353231048583985,
+      "step": 2400
+    },
+    {
+      "epoch": 0.353045013239188,
+      "grad_norm": 4.8219194412231445,
+      "learning_rate": 6.471335780852866e-05,
+      "loss": 0.8310698699951172,
+      "step": 2500
+    },
+    {
+      "epoch": 0.36716681376875554,
+      "grad_norm": 1.61204993724823,
+      "learning_rate": 6.330132730866987e-05,
+      "loss": 0.7148534393310547,
+      "step": 2600
+    },
+    {
+      "epoch": 0.38128861429832306,
+      "grad_norm": 4.425326824188232,
+      "learning_rate": 6.188929680881107e-05,
+      "loss": 0.8226445770263672,
+      "step": 2700
+    },
+    {
+      "epoch": 0.3954104148278906,
+      "grad_norm": 3.553007125854492,
+      "learning_rate": 6.047726630895228e-05,
+      "loss": 0.6587068939208984,
+      "step": 2800
+    },
+    {
+      "epoch": 0.3954104148278906,
+      "eval_accuracy": 0.9599002811223678,
+      "eval_f1": 0.9412536039168936,
+      "eval_loss": 0.14896713197231293,
+      "eval_matthews_correlation": 0.9470253620243468,
+      "eval_precision": 0.9284857025679184,
+      "eval_recall": 0.9570486713200069,
+      "eval_runtime": 299.6915,
+      "eval_samples_per_second": 251.632,
+      "eval_steps_per_second": 31.456,
+      "step": 2800
+    },
+    {
+      "epoch": 0.4095322153574581,
+      "grad_norm": 1.7863709926605225,
+      "learning_rate": 5.9065235809093475e-05,
+      "loss": 0.7582288360595704,
+      "step": 2900
+    },
+    {
+      "epoch": 0.4236540158870256,
+      "grad_norm": 2.5715460777282715,
+      "learning_rate": 5.765320530923468e-05,
+      "loss": 0.7553373718261719,
+      "step": 3000
+    },
+    {
+      "epoch": 0.43777581641659313,
+      "grad_norm": 9.767241477966309,
+      "learning_rate": 5.6241174809375883e-05,
+      "loss": 0.653603515625,
+      "step": 3100
+    },
+    {
+      "epoch": 0.45189761694616065,
+      "grad_norm": 3.407860279083252,
+      "learning_rate": 5.482914430951709e-05,
+      "loss": 0.6524411010742187,
+      "step": 3200
+    },
+    {
+      "epoch": 0.46601941747572817,
+      "grad_norm": 2.109328031539917,
+      "learning_rate": 5.341711380965829e-05,
+      "loss": 0.6743782806396484,
+      "step": 3300
+    },
+    {
+      "epoch": 0.4801412180052957,
+      "grad_norm": 1.2769715785980225,
+      "learning_rate": 5.2005083309799496e-05,
+      "loss": 0.673993911743164,
+      "step": 3400
+    },
+    {
+      "epoch": 0.4942630185348632,
+      "grad_norm": 4.500333309173584,
+      "learning_rate": 5.05930528099407e-05,
+      "loss": 0.6348028564453125,
+      "step": 3500
+    },
+    {
+      "epoch": 0.5083848190644307,
+      "grad_norm": 1.812221646308899,
+      "learning_rate": 4.91810223100819e-05,
+      "loss": 0.5954225158691406,
+      "step": 3600
+    },
+    {
+      "epoch": 0.5225066195939982,
+      "grad_norm": 0.6688806414604187,
+      "learning_rate": 4.77689918102231e-05,
+      "loss": 0.5159746551513672,
+      "step": 3700
+    },
+    {
+      "epoch": 0.5366284201235657,
+      "grad_norm": 6.0633955001831055,
+      "learning_rate": 4.635696131036431e-05,
+      "loss": 0.5547808074951172,
+      "step": 3800
+    },
+    {
+      "epoch": 0.5507502206531333,
+      "grad_norm": 2.232146739959717,
+      "learning_rate": 4.494493081050551e-05,
+      "loss": 0.6260259246826172,
+      "step": 3900
+    },
+    {
+      "epoch": 0.5648720211827007,
+      "grad_norm": 2.8028974533081055,
+      "learning_rate": 4.3532900310646716e-05,
+      "loss": 0.5969748687744141,
+      "step": 4000
+    },
+    {
+      "epoch": 0.5789938217122683,
+      "grad_norm": 2.3292088508605957,
+      "learning_rate": 4.212086981078791e-05,
+      "loss": 0.591428108215332,
+      "step": 4100
+    },
+    {
+      "epoch": 0.5931156222418358,
+      "grad_norm": 1.5047627687454224,
+      "learning_rate": 4.070883931092912e-05,
+      "loss": 0.643886947631836,
+      "step": 4200
+    },
+    {
+      "epoch": 0.5931156222418358,
+      "eval_accuracy": 0.9696997825279796,
+      "eval_f1": 0.9548881928083998,
+      "eval_loss": 0.10884281992912292,
+      "eval_matthews_correlation": 0.9598345847922993,
+      "eval_precision": 0.9471929173612073,
+      "eval_recall": 0.9633364273308933,
+      "eval_runtime": 305.3891,
+      "eval_samples_per_second": 246.937,
+      "eval_steps_per_second": 30.869,
+      "step": 4200
+    },
+    {
+      "epoch": 0.6072374227714034,
+      "grad_norm": 2.4066836833953857,
+      "learning_rate": 3.929680881107032e-05,
+      "loss": 0.6496241760253906,
+      "step": 4300
+    },
+    {
+      "epoch": 0.6213592233009708,
+      "grad_norm": 3.095889091491699,
+      "learning_rate": 3.788477831121152e-05,
+      "loss": 0.5657179641723633,
+      "step": 4400
+    },
+    {
+      "epoch": 0.6354810238305384,
+      "grad_norm": 3.7599406242370605,
+      "learning_rate": 3.6472747811352724e-05,
+      "loss": 0.6106902694702149,
+      "step": 4500
+    },
+    {
+      "epoch": 0.649602824360106,
+      "grad_norm": 2.1620442867279053,
+      "learning_rate": 3.506071731149393e-05,
+      "loss": 0.5624249267578125,
+      "step": 4600
+    },
+    {
+      "epoch": 0.6637246248896734,
+      "grad_norm": 2.772578716278076,
+      "learning_rate": 3.364868681163513e-05,
+      "loss": 0.5569720840454102,
+      "step": 4700
+    },
+    {
+      "epoch": 0.677846425419241,
+      "grad_norm": 4.468015193939209,
+      "learning_rate": 3.223665631177634e-05,
+      "loss": 0.5418438339233398,
+      "step": 4800
+    },
+    {
+      "epoch": 0.6919682259488085,
+      "grad_norm": 4.624788761138916,
+      "learning_rate": 3.082462581191754e-05,
+      "loss": 0.5999539184570313,
+      "step": 4900
+    },
+    {
+      "epoch": 0.706090026478376,
+      "grad_norm": 6.000360488891602,
+      "learning_rate": 2.9412595312058745e-05,
+      "loss": 0.5288655090332032,
+      "step": 5000
+    },
+    {
+      "epoch": 0.7202118270079435,
+      "grad_norm": 3.9073150157928467,
+      "learning_rate": 2.8000564812199946e-05,
+      "loss": 0.6136045455932617,
+      "step": 5100
+    },
+    {
+      "epoch": 0.7343336275375111,
+      "grad_norm": 7.168360233306885,
+      "learning_rate": 2.6588534312341147e-05,
+      "loss": 0.44071128845214846,
+      "step": 5200
+    },
+    {
+      "epoch": 0.7484554280670785,
+      "grad_norm": 6.492976188659668,
+      "learning_rate": 2.5176503812482348e-05,
+      "loss": 0.5461198425292969,
+      "step": 5300
+    },
+    {
+      "epoch": 0.7625772285966461,
+      "grad_norm": 2.2384190559387207,
+      "learning_rate": 2.3764473312623552e-05,
+      "loss": 0.5522915649414063,
+      "step": 5400
+    },
+    {
+      "epoch": 0.7766990291262136,
+      "grad_norm": 0.10446355491876602,
+      "learning_rate": 2.2352442812764757e-05,
+      "loss": 0.5230790328979492,
+      "step": 5500
+    },
+    {
+      "epoch": 0.7908208296557812,
+      "grad_norm": 1.8123565912246704,
+      "learning_rate": 2.094041231290596e-05,
+      "loss": 0.5251728820800782,
+      "step": 5600
+    },
+    {
+      "epoch": 0.7908208296557812,
+      "eval_accuracy": 0.973545324351562,
+      "eval_f1": 0.9593753511152517,
+      "eval_loss": 0.09802598506212234,
+      "eval_matthews_correlation": 0.9648894189840553,
+      "eval_precision": 0.9522183984246139,
+      "eval_recall": 0.9674262657206655,
+      "eval_runtime": 302.8972,
+      "eval_samples_per_second": 248.969,
+      "eval_steps_per_second": 31.123,
+      "step": 5600
+    },
+    {
+      "epoch": 0.8049426301853486,
+      "grad_norm": 2.708657741546631,
+      "learning_rate": 1.9528381813047165e-05,
+      "loss": 0.6050262451171875,
+      "step": 5700
+    },
+    {
+      "epoch": 0.8190644307149162,
+      "grad_norm": 2.155137062072754,
+      "learning_rate": 1.8116351313188366e-05,
+      "loss": 0.5030016326904296,
+      "step": 5800
+    },
+    {
+      "epoch": 0.8331862312444837,
+      "grad_norm": 4.321381568908691,
+      "learning_rate": 1.6704320813329567e-05,
+      "loss": 0.5906719207763672,
+      "step": 5900
+    },
+    {
+      "epoch": 0.8473080317740512,
+      "grad_norm": 0.12370330095291138,
+      "learning_rate": 1.529229031347077e-05,
+      "loss": 0.5666491317749024,
+      "step": 6000
+    },
+    {
+      "epoch": 0.8614298323036187,
+      "grad_norm": 1.295456886291504,
+      "learning_rate": 1.3880259813611976e-05,
+      "loss": 0.5372732925415039,
+      "step": 6100
+    },
+    {
+      "epoch": 0.8755516328331863,
+      "grad_norm": 6.4877028465271,
+      "learning_rate": 1.2468229313753179e-05,
+      "loss": 0.5397153091430664,
+      "step": 6200
+    },
+    {
+      "epoch": 0.8896734333627537,
+      "grad_norm": 1.712461233139038,
+      "learning_rate": 1.1056198813894381e-05,
+      "loss": 0.4744549179077148,
+      "step": 6300
+    },
+    {
+      "epoch": 0.9037952338923213,
+      "grad_norm": 7.258785247802734,
+      "learning_rate": 9.644168314035584e-06,
+      "loss": 0.5170178985595704,
+      "step": 6400
+    },
+    {
+      "epoch": 0.9179170344218888,
+      "grad_norm": 3.859020233154297,
+      "learning_rate": 8.232137814176786e-06,
+      "loss": 0.5684902954101563,
+      "step": 6500
+    },
+    {
+      "epoch": 0.9320388349514563,
+      "grad_norm": 4.142998695373535,
+      "learning_rate": 6.82010731431799e-06,
+      "loss": 0.43791793823242187,
+      "step": 6600
+    },
+    {
+      "epoch": 0.9461606354810238,
+      "grad_norm": 0.17755526304244995,
+      "learning_rate": 5.4080768144591926e-06,
+      "loss": 0.4931388473510742,
+      "step": 6700
+    },
+    {
+      "epoch": 0.9602824360105914,
+      "grad_norm": 5.609339714050293,
+      "learning_rate": 3.996046314600395e-06,
+      "loss": 0.5045675277709961,
+      "step": 6800
+    },
+    {
+      "epoch": 0.9744042365401588,
+      "grad_norm": 4.085425853729248,
+      "learning_rate": 2.5840158147415987e-06,
+      "loss": 0.4947611618041992,
+      "step": 6900
+    },
+    {
+      "epoch": 0.9885260370697264,
+      "grad_norm": 1.071936845779419,
+      "learning_rate": 1.1719853148828015e-06,
+      "loss": 0.5044781875610351,
+      "step": 7000
+    },
+    {
+      "epoch": 0.9885260370697264,
+      "eval_accuracy": 0.9750304991248078,
+      "eval_f1": 0.9613335088674932,
+      "eval_loss": 0.09293721616268158,
+      "eval_matthews_correlation": 0.9668512518488812,
+      "eval_precision": 0.9548993836479353,
+      "eval_recall": 0.9683487035732542,
+      "eval_runtime": 303.2272,
+      "eval_samples_per_second": 248.698,
+      "eval_steps_per_second": 31.089,
+      "step": 7000
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 7082,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 1400,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2621886414450048.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

archive/checkpoint-7082/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc43a6df41aa56c9f391a65b3d477accf7214857e0284685112be68e451f09d
+size 5265

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b49ab111d77a36d20c49082ed0bf29a2e4185f8210b8934c20a6b275b50d4f55
 size 5265

 version https://git-lfs.github.com/spec/v1
+oid sha256:08747cf29321824e71aa993ca57b70a3e903a69c34a3e7cefaf3115f8b5d97db
 size 5265