Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +1 -69
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +106 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,58 +1,6 @@
 ---
-license: mit
-datasets:
-- bmd1905/vi-error-correction-2.0
-metrics:
-- accuracy
-- bleu
-base_model:
-- vinai/bartpho-syllable
-pipeline_tag: text-generation
 library_name: peft
 ---
-<!-- This model card has been generated automatically according to the information the Trainer had access to. You
-should probably proofread and complete it, then remove this comment. -->
-# vietnamese-correction-lora-v2
-This model is a fine-tuned version of [vinai/bartpho-syllable](https://huggingface.co/vinai/bartpho-syllable) on an unknown dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.1776123046875
-- Sacrebleu: 25.07128550273525
-- Precision: 0.9230769230769231
-- Recall: 0.6
-- F1 Score: 0.7272727272727274
-- eval_samples_per_second: 6.776
-## Model description
-More information needed
-## Intended uses & limitations
-More information needed
-## Training and evaluation data
-```html
-DatasetDict({
-    train: Dataset({
-        features: ['input', 'output'],
-        num_rows: 800000
-    })
-    val: Dataset({
-        features: ['input', 'output'],
-        num_rows: 200000
-    })
-    test: Dataset({
-        features: ['input', 'output'],
-        num_rows: 40000
-    })
-})
-```
 ## Training procedure
@@ -66,23 +14,7 @@ The following `bitsandbytes` quantization config was used during training:
 - bnb_4bit_quant_type: nf4
 - bnb_4bit_use_double_quant: True
 - bnb_4bit_compute_dtype: float16
-### Training hyperparameters
-The following hyperparameters were used during training:
-- trainable params: 25,165,824 || all params: 326,801,408 || trainable%: 7.700647360735974
-- Num examples = 800,000
-- Num Epochs = 2
-- Instantaneous batch size per device = 12
-- Total train batch size (w. parallel, distributed & accumulation) = 72
-- Gradient Accumulation steps = 6
-- Total optimization steps = 22,222
-- Number of trainable parameters = 25,165,824
 ### Framework versions
 - PEFT 0.4.0
-- PEFT 0.14.0
-- Transformers 4.47.0
-- Pytorch 2.5.1+cu121
-- Datasets 3.3.1
-- Tokenizers 0.21.0

 ---
 library_name: peft
 ---
 ## Training procedure
 - bnb_4bit_quant_type: nf4
 - bnb_4bit_use_double_quant: True
 - bnb_4bit_compute_dtype: float16
 ### Framework versions
 - PEFT 0.4.0

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:503a6bbddeb2280904eaed9a9d691f3ef56b6727331d687b35ea2db18d81ef51
+size 201469562

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a8f049d0dc7f221e1d8058fd45a0cc9b5e2d694b045a1cd41f34bd549f036c7
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cdf108449d81f49456c5c1d2c15135a982f7e11f00496563cd74e5af9e83359
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,106 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.999727273612748,
+  "global_step": 6844,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.86,
+      "learning_rate": 1.65919930625813e-05,
+      "loss": 1.2243,
+      "step": 1200
+    },
+    {
+      "epoch": 0.86,
+      "eval_f1_score": 0.7272727272727274,
+      "eval_loss": 0.3056640625,
+      "eval_precision": 0.9230769230769231,
+      "eval_recall": 0.6,
+      "eval_runtime": 22188.3103,
+      "eval_sacrebleu": 22.803636183857474,
+      "eval_samples_per_second": 4.507,
+      "eval_steps_per_second": 0.376,
+      "step": 1200
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 1.4433110563958261e-05,
+      "loss": 0.4402,
+      "step": 2400
+    },
+    {
+      "epoch": 1.4,
+      "eval_f1_score": 0.8571428571428571,
+      "eval_loss": 0.3037109375,
+      "eval_precision": 0.8571428571428571,
+      "eval_recall": 0.8571428571428571,
+      "eval_runtime": 41845.2709,
+      "eval_sacrebleu": 27.58721404301854,
+      "eval_samples_per_second": 7.169,
+      "eval_steps_per_second": 0.478,
+      "step": 2400
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 1.1619181615664206e-05,
+      "loss": 0.3997,
+      "step": 3600
+    },
+    {
+      "epoch": 2.1,
+      "eval_f1_score": 0.8571428571428571,
+      "eval_loss": 0.283447265625,
+      "eval_precision": 0.8571428571428571,
+      "eval_recall": 0.8571428571428571,
+      "eval_runtime": 30810.3722,
+      "eval_sacrebleu": 28.184010528346654,
+      "eval_samples_per_second": 9.737,
+      "eval_steps_per_second": 0.649,
+      "step": 3600
+    },
+    {
+      "epoch": 2.81,
+      "learning_rate": 8.80525266737015e-06,
+      "loss": 0.3927,
+      "step": 4800
+    },
+    {
+      "epoch": 2.81,
+      "eval_f1_score": 0.8571428571428571,
+      "eval_loss": 0.29443359375,
+      "eval_precision": 0.8571428571428571,
+      "eval_recall": 0.8571428571428571,
+      "eval_runtime": 30830.552,
+      "eval_sacrebleu": 28.208588875941434,
+      "eval_samples_per_second": 9.731,
+      "eval_steps_per_second": 0.649,
+      "step": 4800
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 5.991323719076094e-06,
+      "loss": 0.3993,
+      "step": 6000
+    },
+    {
+      "epoch": 3.51,
+      "eval_f1_score": 0.8571428571428571,
+      "eval_loss": 0.279052734375,
+      "eval_precision": 0.8571428571428571,
+      "eval_recall": 0.8571428571428571,
+      "eval_runtime": 30959.953,
+      "eval_sacrebleu": 28.577699452894848,
+      "eval_samples_per_second": 9.69,
+      "eval_steps_per_second": 0.646,
+      "step": 6000
+    }
+  ],
+  "max_steps": 8555,
+  "num_train_epochs": 5,
+  "total_flos": 2.97641943465984e+17,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58c9075965d741872444ce679419408a906a36a4990cdbfef5a7c249390c824e
 size 4600

 version https://git-lfs.github.com/spec/v1
+oid sha256:125adbe784ad4664d78128701bd159c971ff16b9e7f0f220f90bfbee58ff093c
 size 4600