End of training

Browse files

Files changed (4) hide show

README.md +66 -0
all_results.json +7 -0
train_results.json +7 -0
trainer_state.json +126 -0

README.md ADDED Viewed

	@@ -0,0 +1,66 @@

+---
+license: mit
+base_model: FacebookAI/roberta-base
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+- f1
+- precision
+- recall
+model-index:
+- name: roberta-base-CD_baseline
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# roberta-base-CD_baseline
+This model is a fine-tuned version of [FacebookAI/roberta-base](https://huggingface.co/FacebookAI/roberta-base) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.6700
+- Accuracy: 0.4
+- F1: 0.3310
+- Precision: 0.3202
+- Recall: 0.4
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Accuracy | F1     | Precision | Recall |
+|:-------------:|:-----:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
+| 1.6396        | 1.0   | 125  | 1.6700          | 0.4      | 0.3310 | 0.3202    | 0.4    |
+### Framework versions
+- Transformers 4.38.0
+- Pytorch 2.8.0+cu128
+- Datasets 4.2.0
+- Tokenizers 0.15.2

all_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 1.0,
+    "train_loss": 1.857890853881836,
+    "train_runtime": 33.8699,
+    "train_samples_per_second": 58.931,
+    "train_steps_per_second": 3.691
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 1.0,
+    "train_loss": 1.857890853881836,
+    "train_runtime": 33.8699,
+    "train_samples_per_second": 58.931,
+    "train_steps_per_second": 3.691
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,126 @@

+{
+  "best_metric": 1.6699775457382202,
+  "best_model_checkpoint": "roberta-base-CD_baseline/checkpoint-125",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 125,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.08,
+      "grad_norm": 3.975125312805176,
+      "learning_rate": 1.8400000000000003e-05,
+      "loss": 2.2918,
+      "step": 10
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 6.948821544647217,
+      "learning_rate": 1.6800000000000002e-05,
+      "loss": 2.1418,
+      "step": 20
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 7.936761379241943,
+      "learning_rate": 1.5200000000000002e-05,
+      "loss": 1.9934,
+      "step": 30
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 13.492217063903809,
+      "learning_rate": 1.3600000000000002e-05,
+      "loss": 1.9768,
+      "step": 40
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 9.684530258178711,
+      "learning_rate": 1.2e-05,
+      "loss": 1.7049,
+      "step": 50
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 21.087621688842773,
+      "learning_rate": 1.04e-05,
+      "loss": 1.887,
+      "step": 60
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 2.634740114212036,
+      "learning_rate": 8.8e-06,
+      "loss": 1.8188,
+      "step": 70
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 2.7339694499969482,
+      "learning_rate": 7.2000000000000005e-06,
+      "loss": 1.7238,
+      "step": 80
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 16.557479858398438,
+      "learning_rate": 5.600000000000001e-06,
+      "loss": 1.5953,
+      "step": 90
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 21.172969818115234,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 1.7577,
+      "step": 100
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 3.875187635421753,
+      "learning_rate": 2.4000000000000003e-06,
+      "loss": 1.7686,
+      "step": 110
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 13.141642570495605,
+      "learning_rate": 8.000000000000001e-07,
+      "loss": 1.6396,
+      "step": 120
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.4,
+      "eval_f1": 0.33096034764074916,
+      "eval_loss": 1.6699775457382202,
+      "eval_precision": 0.3202175398971738,
+      "eval_recall": 0.4,
+      "eval_runtime": 1.3009,
+      "eval_samples_per_second": 176.8,
+      "eval_steps_per_second": 11.53,
+      "step": 125
+    },
+    {
+      "epoch": 1.0,
+      "step": 125,
+      "total_flos": 284581711923216.0,
+      "train_loss": 1.857890853881836,
+      "train_runtime": 33.8699,
+      "train_samples_per_second": 58.931,
+      "train_steps_per_second": 3.691
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 125,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "total_flos": 284581711923216.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}