ANLP-Final-Project
/

m0-oplora-lr

+---
+library_name: transformers
+license: llama2
+base_model: meta-llama/Llama-2-7b-hf
+tags:
+- generated_from_trainer
+model-index:
+- name: m0-new-lr
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# m0-new-lr
+This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.03
+- num_epochs: 1.0
+### Training results
+### Framework versions
+- Transformers 4.57.1
+- Pytorch 2.8.0+cu126
+- Datasets 4.0.0
+- Tokenizers 0.22.1

trainer_state.json ADDED Viewed

	@@ -0,0 +1,253 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 15000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03333333333333333,
+      "grad_norm": 0.8437663912773132,
+      "learning_rate": 0.00029999160495301487,
+      "loss": 0.4576,
+      "step": 500
+    },
+    {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 1.3834174871444702,
+      "learning_rate": 0.00029894738121610755,
+      "loss": 0.3618,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.8362070322036743,
+      "learning_rate": 0.0002961688552258895,
+      "loss": 0.3312,
+      "step": 1500
+    },
+    {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 1.8011195659637451,
+      "learning_rate": 0.0002916883793731605,
+      "loss": 0.3265,
+      "step": 2000
+    },
+    {
+      "epoch": 0.16666666666666666,
+      "grad_norm": 1.187129259109497,
+      "learning_rate": 0.0002855581230838202,
+      "loss": 0.3176,
+      "step": 2500
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.6343923807144165,
+      "learning_rate": 0.000277849465372452,
+      "loss": 0.3267,
+      "step": 3000
+    },
+    {
+      "epoch": 0.23333333333333334,
+      "grad_norm": 0.5139018297195435,
+      "learning_rate": 0.00026865216372475085,
+      "loss": 0.3304,
+      "step": 3500
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.5840966701507568,
+      "learning_rate": 0.0002580733089860996,
+      "loss": 0.3174,
+      "step": 4000
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 1.0480272769927979,
+      "learning_rate": 0.0002462360784252821,
+      "loss": 0.3131,
+      "step": 4500
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.768731415271759,
+      "learning_rate": 0.00023327830149231583,
+      "loss": 0.3005,
+      "step": 5000
+    },
+    {
+      "epoch": 0.36666666666666664,
+      "grad_norm": 0.8862756490707397,
+      "learning_rate": 0.00021935085497032568,
+      "loss": 0.2977,
+      "step": 5500
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 1.8197040557861328,
+      "learning_rate": 0.00020461590620786605,
+      "loss": 0.3006,
+      "step": 6000
+    },
+    {
+      "epoch": 0.43333333333333335,
+      "grad_norm": 0.5644539594650269,
+      "learning_rate": 0.00018924502488701202,
+      "loss": 0.2891,
+      "step": 6500
+    },
+    {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 0.7333141565322876,
+      "learning_rate": 0.00017341718531326979,
+      "loss": 0.2958,
+      "step": 7000
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 0.8840310573577881,
+      "learning_rate": 0.00015731668248809323,
+      "loss": 0.2914,
+      "step": 7500
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.7415375113487244,
+      "learning_rate": 0.0001411309862286835,
+      "loss": 0.284,
+      "step": 8000
+    },
+    {
+      "epoch": 0.5666666666666667,
+      "grad_norm": 1.0699294805526733,
+      "learning_rate": 0.00012504855832110804,
+      "loss": 0.2776,
+      "step": 8500
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 1.0262274742126465,
+      "learning_rate": 0.00010925665812320933,
+      "loss": 0.2751,
+      "step": 9000
+    },
+    {
+      "epoch": 0.6333333333333333,
+      "grad_norm": 1.2797510623931885,
+      "learning_rate": 9.393916216825465e-05,
+      "loss": 0.2797,
+      "step": 9500
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.8398839831352234,
+      "learning_rate": 7.927442315726411e-05,
+      "loss": 0.2655,
+      "step": 10000
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 0.9251325726509094,
+      "learning_rate": 6.543319326931815e-05,
+      "loss": 0.2605,
+      "step": 10500
+    },
+    {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 0.6823338866233826,
+      "learning_rate": 5.257663597024785e-05,
+      "loss": 0.2674,
+      "step": 11000
+    },
+    {
+      "epoch": 0.7666666666666667,
+      "grad_norm": 0.7772260904312134,
+      "learning_rate": 4.085444946965953e-05,
+      "loss": 0.2624,
+      "step": 11500
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.6518853902816772,
+      "learning_rate": 3.040312367624248e-05,
+      "loss": 0.2596,
+      "step": 12000
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.6722842454910278,
+      "learning_rate": 2.1344350946892218e-05,
+      "loss": 0.2509,
+      "step": 12500
+    },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 1.5721765756607056,
+      "learning_rate": 1.3783609134448331e-05,
+      "loss": 0.25,
+      "step": 13000
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 1.592251181602478,
+      "learning_rate": 7.808933432648067e-06,
+      "loss": 0.249,
+      "step": 13500
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 1.5700002908706665,
+      "learning_rate": 3.4898913185952726e-06,
+      "loss": 0.2618,
+      "step": 14000
+    },
+    {
+      "epoch": 0.9666666666666667,
+      "grad_norm": 1.067083477973938,
+      "learning_rate": 8.767725282315785e-07,
+      "loss": 0.2653,
+      "step": 14500
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.734412670135498,
+      "learning_rate": 3.496511979950156e-12,
+      "loss": 0.2583,
+      "step": 15000
+    },
+    {
+      "epoch": 1.0,
+      "step": 15000,
+      "total_flos": 1.6839046112147866e+17,
+      "train_loss": 0.29414576873779297,
+      "train_runtime": 2822.8333,
+      "train_samples_per_second": 5.314,
+      "train_steps_per_second": 5.314
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 15000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 5000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.6839046112147866e+17,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}