Upload math SFT checkpoint

Browse files

Files changed (9) hide show

README.md +58 -0
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
tokenizer.json +2 -2
tokenizer_config.json +2 -1
trainer_state.json +59 -460
training_args.bin +2 -2

README.md ADDED Viewed

	@@ -0,0 +1,58 @@

+---
+base_model: meta-llama/Llama-3.1-8B
+library_name: transformers
+model_name: math
+tags:
+- generated_from_trainer
+- sft
+- trl
+licence: license
+---
+# Model Card for math
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/aq1048576-princeton-university/huggingface/runs/3npxcokv)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.23.0
+- Transformers: 4.57.1
+- Pytorch: 2.8.0
+- Datasets: 3.5.0
+- Tokenizers: 0.22.1
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c9ee29d73bdd07ea72d03279bc2ffbfa3ee77484a01abf00bfc822f308d37ca
 size 4976698672

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdbdb0b664d4e9c7e513be6668cd1fc051b071e6a652e9948678364600125476
 size 4976698672

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19fffaf57a12349b94838398d632a9bcf3dfd88656db7a7c245606f89053e977
 size 4999802720

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6a7804fa585d031b548d1ef958def1c8e0ee5af66c5d7ebb360eaec23815b5a
 size 4999802720

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57e0ce2e5ee497b4672592217620cffa453c2eb6b2944795170e4f243c3dd88b
 size 4915916176

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea953a29f478a82af8b6c1c26ec4d58878da0a0bac82741718169aad010a0cea
 size 4915916176

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b4e8defb49000fb60157a8d2488c46c8a88029f07de7e8d2b6da55342ddf3c8
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c0d3c81356ced85ec9704dc8b1a6aec7026326ee508f15199a0e02e4d096094
 size 1168138808

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2f90a0ee1b41702c7b233b02234294a53bc0684a08d3bcd8c8ff702e9a12f64
-size 17210019

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
+size 17209920

tokenizer_config.json CHANGED Viewed

@@ -2059,5 +2059,6 @@
   ],
   "model_max_length": 4096,
   "pad_token": "<|end_of_text|>",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   ],
   "model_max_length": 4096,
   "pad_token": "<|end_of_text|>",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n'  + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n'  + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}"
 }

trainer_state.json CHANGED Viewed

@@ -4,487 +4,86 @@
   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 1293,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.015472391451503724,
-      "grad_norm": 0.86328125,
-      "learning_rate": 3.653846153846154e-05,
-      "loss": 0.5808,
-      "step": 20
-    },
-    {
-      "epoch": 0.030944782903007447,
-      "grad_norm": 0.609375,
-      "learning_rate": 4.998701309604454e-05,
-      "loss": 0.4787,
-      "step": 40
-    },
-    {
-      "epoch": 0.04641717435451117,
-      "grad_norm": 0.5625,
-      "learning_rate": 4.9916354596319675e-05,
-      "loss": 0.4591,
-      "step": 60
-    },
-    {
-      "epoch": 0.061889565806014894,
-      "grad_norm": 0.59765625,
-      "learning_rate": 4.978443256845644e-05,
-      "loss": 0.4506,
-      "step": 80
-    },
-    {
-      "epoch": 0.07736195725751861,
-      "grad_norm": 0.53125,
-      "learning_rate": 4.9591571378077203e-05,
-      "loss": 0.4429,
-      "step": 100
-    },
-    {
-      "epoch": 0.09283434870902234,
-      "grad_norm": 0.6015625,
-      "learning_rate": 4.933824522604945e-05,
-      "loss": 0.4434,
-      "step": 120
-    },
-    {
-      "epoch": 0.10830674016052606,
-      "grad_norm": 0.52734375,
-      "learning_rate": 4.9025076982535925e-05,
-      "loss": 0.4411,
-      "step": 140
-    },
-    {
-      "epoch": 0.12377913161202979,
-      "grad_norm": 0.50390625,
-      "learning_rate": 4.865283665550167e-05,
-      "loss": 0.4358,
-      "step": 160
-    },
-    {
-      "epoch": 0.1392515230635335,
-      "grad_norm": 0.54296875,
-      "learning_rate": 4.8222439497443233e-05,
-      "loss": 0.4324,
-      "step": 180
-    },
-    {
-      "epoch": 0.15472391451503723,
-      "grad_norm": 0.453125,
-      "learning_rate": 4.773494375499543e-05,
-      "loss": 0.4336,
-      "step": 200
-    },
-    {
-      "epoch": 0.17019630596654095,
-      "grad_norm": 0.515625,
-      "learning_rate": 4.7191548066948686e-05,
-      "loss": 0.4289,
-      "step": 220
-    },
-    {
-      "epoch": 0.18566869741804468,
-      "grad_norm": 0.4765625,
-      "learning_rate": 4.659358851707464e-05,
-      "loss": 0.4242,
-      "step": 240
-    },
-    {
-      "epoch": 0.2011410888695484,
-      "grad_norm": 0.45703125,
-      "learning_rate": 4.5942535349006555e-05,
-      "loss": 0.42,
-      "step": 260
-    },
-    {
-      "epoch": 0.21661348032105213,
-      "grad_norm": 0.439453125,
-      "learning_rate": 4.523998935125173e-05,
-      "loss": 0.4255,
-      "step": 280
-    },
-    {
-      "epoch": 0.23208587177255585,
-      "grad_norm": 0.46484375,
-      "learning_rate": 4.44876779212244e-05,
-      "loss": 0.421,
-      "step": 300
-    },
-    {
-      "epoch": 0.24755826322405958,
-      "grad_norm": 0.54296875,
-      "learning_rate": 4.368745081797678e-05,
-      "loss": 0.4207,
-      "step": 320
-    },
-    {
-      "epoch": 0.2630306546755633,
-      "grad_norm": 0.44140625,
-      "learning_rate": 4.2841275614071176e-05,
-      "loss": 0.4133,
-      "step": 340
-    },
-    {
-      "epoch": 0.278503046127067,
-      "grad_norm": 0.5,
-      "learning_rate": 4.1951232857776164e-05,
-      "loss": 0.4144,
-      "step": 360
-    },
-    {
-      "epoch": 0.29397543757857075,
-      "grad_norm": 0.404296875,
-      "learning_rate": 4.1019510957481656e-05,
-      "loss": 0.412,
-      "step": 380
-    },
-    {
-      "epoch": 0.30944782903007445,
-      "grad_norm": 0.453125,
-      "learning_rate": 4.004840080091103e-05,
-      "loss": 0.4109,
-      "step": 400
-    },
-    {
-      "epoch": 0.3249202204815782,
-      "grad_norm": 0.439453125,
-      "learning_rate": 3.904029012236033e-05,
-      "loss": 0.4078,
-      "step": 420
-    },
-    {
-      "epoch": 0.3403926119330819,
-      "grad_norm": 0.447265625,
-      "learning_rate": 3.7997657631814363e-05,
-      "loss": 0.4048,
-      "step": 440
-    },
-    {
-      "epoch": 0.35586500338458565,
-      "grad_norm": 0.44921875,
-      "learning_rate": 3.6923066920374494e-05,
-      "loss": 0.4012,
-      "step": 460
-    },
-    {
-      "epoch": 0.37133739483608935,
-      "grad_norm": 0.43359375,
-      "learning_rate": 3.5819160156983755e-05,
-      "loss": 0.4033,
-      "step": 480
-    },
-    {
-      "epoch": 0.38680978628759305,
-      "grad_norm": 0.427734375,
-      "learning_rate": 3.4688651591947096e-05,
-      "loss": 0.4046,
-      "step": 500
-    },
-    {
-      "epoch": 0.38680978628759305,
-      "eval_loss": 0.3988688290119171,
-      "eval_runtime": 35.5904,
-      "eval_samples_per_second": 93.93,
-      "eval_steps_per_second": 2.95,
       "step": 500
     },
     {
-      "epoch": 0.4022821777390968,
-      "grad_norm": 0.421875,
-      "learning_rate": 3.3534320883220366e-05,
-      "loss": 0.4044,
-      "step": 520
-    },
-    {
-      "epoch": 0.4177545691906005,
-      "grad_norm": 0.421875,
-      "learning_rate": 3.235900626187713e-05,
-      "loss": 0.3979,
-      "step": 540
-    },
-    {
-      "epoch": 0.43322696064210425,
-      "grad_norm": 0.458984375,
-      "learning_rate": 3.116559755355772e-05,
-      "loss": 0.3964,
-      "step": 560
-    },
-    {
-      "epoch": 0.44869935209360795,
-      "grad_norm": 0.416015625,
-      "learning_rate": 2.9957029073059272e-05,
-      "loss": 0.3983,
-      "step": 580
-    },
-    {
-      "epoch": 0.4641717435451117,
-      "grad_norm": 0.423828125,
-      "learning_rate": 2.8736272409537257e-05,
-      "loss": 0.3909,
-      "step": 600
-    },
-    {
-      "epoch": 0.4796441349966154,
-      "grad_norm": 0.400390625,
-      "learning_rate": 2.7506329120058007e-05,
-      "loss": 0.392,
-      "step": 620
-    },
-    {
-      "epoch": 0.49511652644811915,
-      "grad_norm": 0.390625,
-      "learning_rate": 2.6270223349467123e-05,
-      "loss": 0.3923,
-      "step": 640
-    },
-    {
-      "epoch": 0.5105889178996229,
-      "grad_norm": 0.38671875,
-      "learning_rate": 2.503099439471977e-05,
-      "loss": 0.3922,
-      "step": 660
-    },
-    {
-      "epoch": 0.5260613093511266,
-      "grad_norm": 0.3984375,
-      "learning_rate": 2.3791689231955474e-05,
-      "loss": 0.389,
-      "step": 680
-    },
-    {
-      "epoch": 0.5415337008026303,
-      "grad_norm": 0.375,
-      "learning_rate": 2.2555355024691588e-05,
-      "loss": 0.3908,
-      "step": 700
-    },
-    {
-      "epoch": 0.557006092254134,
-      "grad_norm": 0.42578125,
-      "learning_rate": 2.1325031631555993e-05,
-      "loss": 0.3848,
-      "step": 720
-    },
-    {
-      "epoch": 0.5724784837056377,
-      "grad_norm": 0.37890625,
-      "learning_rate": 2.0103744131980902e-05,
-      "loss": 0.3887,
-      "step": 740
-    },
-    {
-      "epoch": 0.5879508751571415,
-      "grad_norm": 0.380859375,
-      "learning_rate": 1.8894495388235166e-05,
-      "loss": 0.3905,
-      "step": 760
-    },
-    {
-      "epoch": 0.6034232666086452,
-      "grad_norm": 0.37109375,
-      "learning_rate": 1.7700258662083573e-05,
-      "loss": 0.3907,
-      "step": 780
-    },
-    {
-      "epoch": 0.6188956580601489,
-      "grad_norm": 0.376953125,
-      "learning_rate": 1.6523970304226778e-05,
-      "loss": 0.3875,
-      "step": 800
-    },
-    {
-      "epoch": 0.6343680495116526,
-      "grad_norm": 0.376953125,
-      "learning_rate": 1.5368522534496994e-05,
-      "loss": 0.3843,
-      "step": 820
-    },
-    {
-      "epoch": 0.6498404409631564,
-      "grad_norm": 0.373046875,
-      "learning_rate": 1.4236756330561318e-05,
-      "loss": 0.3826,
-      "step": 840
-    },
-    {
-      "epoch": 0.6653128324146601,
-      "grad_norm": 0.3671875,
-      "learning_rate": 1.3131454442617521e-05,
-      "loss": 0.3832,
-      "step": 860
-    },
-    {
-      "epoch": 0.6807852238661638,
-      "grad_norm": 0.38671875,
-      "learning_rate": 1.2055334551257747e-05,
-      "loss": 0.3804,
-      "step": 880
-    },
-    {
-      "epoch": 0.6962576153176675,
-      "grad_norm": 0.357421875,
-      "learning_rate": 1.1011042585323233e-05,
-      "loss": 0.3798,
-      "step": 900
-    },
-    {
-      "epoch": 0.7117300067691713,
-      "grad_norm": 0.36328125,
-      "learning_rate": 1.000114621617988e-05,
-      "loss": 0.3834,
-      "step": 920
-    },
-    {
-      "epoch": 0.727202398220675,
-      "grad_norm": 0.357421875,
-      "learning_rate": 9.028128544410814e-06,
-      "loss": 0.3764,
-      "step": 940
-    },
-    {
-      "epoch": 0.7426747896721787,
-      "grad_norm": 0.39453125,
-      "learning_rate": 8.094381994448896e-06,
-      "loss": 0.3772,
-      "step": 960
-    },
-    {
-      "epoch": 0.7581471811236824,
-      "grad_norm": 0.400390625,
-      "learning_rate": 7.202202432160712e-06,
-      "loss": 0.3857,
-      "step": 980
-    },
-    {
-      "epoch": 0.7736195725751861,
-      "grad_norm": 0.380859375,
-      "learning_rate": 6.3537835198457515e-06,
-      "loss": 0.3811,
-      "step": 1000
-    },
-    {
-      "epoch": 0.7736195725751861,
-      "eval_loss": 0.3767356276512146,
-      "eval_runtime": 35.55,
-      "eval_samples_per_second": 94.036,
-      "eval_steps_per_second": 2.954,
       "step": 1000
     },
     {
-      "epoch": 0.7890919640266899,
-      "grad_norm": 0.359375,
-      "learning_rate": 5.551211322530381e-06,
-      "loss": 0.3784,
-      "step": 1020
-    },
-    {
-      "epoch": 0.8045643554781936,
-      "grad_norm": 0.359375,
-      "learning_rate": 4.796459178818496e-06,
-      "loss": 0.3821,
-      "step": 1040
-    },
-    {
-      "epoch": 0.8200367469296973,
-      "grad_norm": 0.375,
-      "learning_rate": 4.09138284891028e-06,
-      "loss": 0.3756,
-      "step": 1060
-    },
-    {
-      "epoch": 0.835509138381201,
-      "grad_norm": 0.36328125,
-      "learning_rate": 3.4377159517189896e-06,
-      "loss": 0.3774,
-      "step": 1080
     },
     {
-      "epoch": 0.8509815298327048,
-      "grad_norm": 0.376953125,
-      "learning_rate": 2.837065702304667e-06,
-      "loss": 0.3824,
-      "step": 1100
     },
     {
-      "epoch": 0.8664539212842085,
-      "grad_norm": 0.36328125,
-      "learning_rate": 2.2909089601057367e-06,
-      "loss": 0.3819,
-      "step": 1120
-    },
-    {
-      "epoch": 0.8819263127357122,
-      "grad_norm": 0.369140625,
-      "learning_rate": 1.800588597684652e-06,
-      "loss": 0.3772,
-      "step": 1140
-    },
-    {
-      "epoch": 0.8973987041872159,
-      "grad_norm": 0.376953125,
-      "learning_rate": 1.3673101989161912e-06,
-      "loss": 0.3787,
-      "step": 1160
-    },
-    {
-      "epoch": 0.9128710956387197,
-      "grad_norm": 0.353515625,
-      "learning_rate": 9.921390947368076e-07,
-      "loss": 0.3779,
-      "step": 1180
-    },
-    {
-      "epoch": 0.9283434870902234,
-      "grad_norm": 0.359375,
-      "learning_rate": 6.759977437432647e-07,
-      "loss": 0.3803,
-      "step": 1200
-    },
-    {
-      "epoch": 0.9438158785417271,
-      "grad_norm": 0.35546875,
-      "learning_rate": 4.196634640812602e-07,
-      "loss": 0.3804,
-      "step": 1220
-    },
-    {
-      "epoch": 0.9592882699932308,
-      "grad_norm": 0.365234375,
-      "learning_rate": 2.237665222006552e-07,
-      "loss": 0.38,
-      "step": 1240
-    },
-    {
-      "epoch": 0.9747606614447345,
-      "grad_norm": 0.36328125,
-      "learning_rate": 8.87885831766827e-08,
-      "loss": 0.3757,
-      "step": 1260
     },
     {
-      "epoch": 0.9902330528962383,
-      "grad_norm": 0.376953125,
-      "learning_rate": 1.5061526407406103e-08,
-      "loss": 0.3798,
-      "step": 1280
     },
     {
       "epoch": 1.0,
-      "step": 1293,
-      "total_flos": 2.149210011822614e+19,
-      "train_loss": 0.4033243281843122,
-      "train_runtime": 11128.3081,
-      "train_samples_per_second": 29.736,
-      "train_steps_per_second": 0.116
     }
   ],
-  "logging_steps": 20,
-  "max_steps": 1293,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
@@ -500,7 +99,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.149210011822614e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 3447,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 0.5079199960827827,
+      "epoch": 0.1450536698578474,
+      "grad_norm": 0.67578125,
+      "learning_rate": 4.363528715216104e-05,
+      "loss": 0.5061,
+      "mean_token_accuracy": 0.847917699560523,
+      "num_tokens": 34675536.0,
       "step": 500
     },
     {
+      "entropy": 0.4645708839818835,
+      "epoch": 0.2901073397156948,
+      "grad_norm": 0.609375,
+      "learning_rate": 3.6234458259325044e-05,
+      "loss": 0.4625,
+      "mean_token_accuracy": 0.8582921151965857,
+      "num_tokens": 69136517.0,
       "step": 1000
     },
     {
+      "entropy": 0.4413825359642506,
+      "epoch": 0.4351610095735422,
+      "grad_norm": 0.5859375,
+      "learning_rate": 2.8833629366489046e-05,
+      "loss": 0.4391,
+      "mean_token_accuracy": 0.8642738572955132,
+      "num_tokens": 103657138.0,
+      "step": 1500
     },
     {
+      "entropy": 0.426292674459517,
+      "epoch": 0.5802146794313896,
+      "grad_norm": 0.58984375,
+      "learning_rate": 2.143280047365305e-05,
+      "loss": 0.4239,
+      "mean_token_accuracy": 0.8679788280278444,
+      "num_tokens": 138126943.0,
+      "step": 2000
     },
     {
+      "entropy": 0.41619670213758947,
+      "epoch": 0.725268349289237,
+      "grad_norm": 0.6015625,
+      "learning_rate": 1.4031971580817053e-05,
+      "loss": 0.4135,
+      "mean_token_accuracy": 0.8707230059802532,
+      "num_tokens": 172549297.0,
+      "step": 2500
     },
     {
+      "entropy": 0.41040751719474794,
+      "epoch": 0.8703220191470844,
+      "grad_norm": 0.61328125,
+      "learning_rate": 6.631142687981054e-06,
+      "loss": 0.4075,
+      "mean_token_accuracy": 0.8723464601933956,
+      "num_tokens": 207014931.0,
+      "step": 3000
     },
     {
+      "entropy": 0.40811442769647177,
       "epoch": 1.0,
+      "mean_token_accuracy": 0.8727223603117386,
+      "num_tokens": 237886773.0,
+      "step": 3447,
+      "total_flos": 1.4839042051252683e+19,
+      "train_loss": 0.43732520784887674,
+      "train_runtime": 16918.1607,
+      "train_samples_per_second": 19.559,
+      "train_steps_per_second": 0.204
     }
   ],
+  "logging_steps": 500,
+  "max_steps": 3447,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1.4839042051252683e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e59b1b525c9b30f502e4fa90763c8538b3812972f6f2d3b635427432568b1e5f
-size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:81d65e9d414959d083a9cc3fc2b708a400d23698532b047892373466c21f0227
+size 6289