End of training

Browse files

Files changed (7) hide show

README.md +23 -2
all_results.json +19 -0
eval_results.json +13 -0
generation_config.json +0 -1
runs/Feb25_10-20-16_gitspot/events.out.tfevents.1740464329.gitspot.2349229.1 +3 -0
train_results.json +9 -0
trainer_state.json +140 -0

README.md CHANGED Viewed

@@ -4,9 +4,23 @@ license: apache-2.0
 base_model: facebook/bart-large
 tags:
 - generated_from_trainer
 model-index:
 - name: bart-bas-summ
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -14,7 +28,14 @@ should probably proofread and complete it, then remove this comment. -->
 # bart-bas-summ
-This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on an unknown dataset.
 ## Model description

 base_model: facebook/bart-large
 tags:
 - generated_from_trainer
+datasets:
+- Mwnthai/bodo-legal-summary-data
+metrics:
+- rouge
 model-index:
 - name: bart-bas-summ
+  results:
+  - task:
+      name: Summarization
+      type: summarization
+    dataset:
+      name: Mwnthai/bodo-legal-summary-data
+      type: Mwnthai/bodo-legal-summary-data
+    metrics:
+    - name: Rouge1
+      type: rouge
+      value: 21.2656
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # bart-bas-summ
+This model is a fine-tuned version of [facebook/bart-large](https://huggingface.co/facebook/bart-large) on the Mwnthai/bodo-legal-summary-data dataset.
+It achieves the following results on the evaluation set:
+- Loss: 4.1477
+- Rouge1: 21.2656
+- Rouge2: 7.0402
+- Rougel: 19.1943
+- Rougelsum: 19.6362
+- Gen Len: 128.0
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+    "epoch": 3.0,
+    "eval_gen_len": 128.0,
+    "eval_loss": 4.1476616859436035,
+    "eval_rouge1": 21.2656,
+    "eval_rouge2": 7.0402,
+    "eval_rougeL": 19.1943,
+    "eval_rougeLsum": 19.6362,
+    "eval_runtime": 159.8917,
+    "eval_samples": 200,
+    "eval_samples_per_second": 1.251,
+    "eval_steps_per_second": 0.625,
+    "total_flos": 3.246076355120333e+16,
+    "train_loss": 5.265000151162597,
+    "train_runtime": 4607.9912,
+    "train_samples": 4995,
+    "train_samples_per_second": 3.252,
+    "train_steps_per_second": 1.626
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 3.0,
+    "eval_gen_len": 128.0,
+    "eval_loss": 4.1476616859436035,
+    "eval_rouge1": 21.2656,
+    "eval_rouge2": 7.0402,
+    "eval_rougeL": 19.1943,
+    "eval_rougeLsum": 19.6362,
+    "eval_runtime": 159.8917,
+    "eval_samples": 200,
+    "eval_samples_per_second": 1.251,
+    "eval_steps_per_second": 0.625
+}

generation_config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "_from_model_config": true,
   "bos_token_id": 0,
   "decoder_start_token_id": 2,
   "eos_token_id": 2,

 {
   "bos_token_id": 0,
   "decoder_start_token_id": 2,
   "eos_token_id": 2,

runs/Feb25_10-20-16_gitspot/events.out.tfevents.1740464329.gitspot.2349229.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf924a77c128066dc0241e76140a75ffd0d47592c5fd80266af978426fb8d776
+size 565

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.0,
+    "total_flos": 3.246076355120333e+16,
+    "train_loss": 5.265000151162597,
+    "train_runtime": 4607.9912,
+    "train_samples": 4995,
+    "train_samples_per_second": 3.252,
+    "train_steps_per_second": 1.626
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,140 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 1000.0,
+  "global_step": 7494,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.200160128102482,
+      "grad_norm": 6.359441757202148,
+      "learning_rate": 4.6663997864958635e-05,
+      "loss": 6.6546,
+      "step": 500
+    },
+    {
+      "epoch": 0.400320256204964,
+      "grad_norm": 5.609431743621826,
+      "learning_rate": 4.332799572991727e-05,
+      "loss": 6.1195,
+      "step": 1000
+    },
+    {
+      "epoch": 0.600480384307446,
+      "grad_norm": 8.655993461608887,
+      "learning_rate": 3.99919935948759e-05,
+      "loss": 5.8641,
+      "step": 1500
+    },
+    {
+      "epoch": 0.800640512409928,
+      "grad_norm": 5.4739837646484375,
+      "learning_rate": 3.665599145983454e-05,
+      "loss": 5.7106,
+      "step": 2000
+    },
+    {
+      "epoch": 1.00080064051241,
+      "grad_norm": 5.922306060791016,
+      "learning_rate": 3.3319989324793174e-05,
+      "loss": 5.5891,
+      "step": 2500
+    },
+    {
+      "epoch": 1.200960768614892,
+      "grad_norm": 7.8872504234313965,
+      "learning_rate": 2.9983987189751807e-05,
+      "loss": 5.1944,
+      "step": 3000
+    },
+    {
+      "epoch": 1.4011208967173738,
+      "grad_norm": 5.556982040405273,
+      "learning_rate": 2.6647985054710433e-05,
+      "loss": 5.1804,
+      "step": 3500
+    },
+    {
+      "epoch": 1.601281024819856,
+      "grad_norm": 6.548351287841797,
+      "learning_rate": 2.331198291966907e-05,
+      "loss": 5.1859,
+      "step": 4000
+    },
+    {
+      "epoch": 1.801441152922338,
+      "grad_norm": 7.128460884094238,
+      "learning_rate": 1.9975980784627705e-05,
+      "loss": 5.0938,
+      "step": 4500
+    },
+    {
+      "epoch": 2.00160128102482,
+      "grad_norm": 5.632880210876465,
+      "learning_rate": 1.6639978649586335e-05,
+      "loss": 5.0563,
+      "step": 5000
+    },
+    {
+      "epoch": 2.2017614091273017,
+      "grad_norm": 6.655033588409424,
+      "learning_rate": 1.330397651454497e-05,
+      "loss": 4.68,
+      "step": 5500
+    },
+    {
+      "epoch": 2.401921537229784,
+      "grad_norm": 8.128520965576172,
+      "learning_rate": 9.967974379503602e-06,
+      "loss": 4.7047,
+      "step": 6000
+    },
+    {
+      "epoch": 2.602081665332266,
+      "grad_norm": 8.819676399230957,
+      "learning_rate": 6.631972244462237e-06,
+      "loss": 4.6252,
+      "step": 6500
+    },
+    {
+      "epoch": 2.8022417934347477,
+      "grad_norm": 6.872320175170898,
+      "learning_rate": 3.29597010942087e-06,
+      "loss": 4.656,
+      "step": 7000
+    },
+    {
+      "epoch": 3.0,
+      "step": 7494,
+      "total_flos": 3.246076355120333e+16,
+      "train_loss": 5.265000151162597,
+      "train_runtime": 4607.9912,
+      "train_samples_per_second": 3.252,
+      "train_steps_per_second": 1.626
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 7494,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.246076355120333e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}