Model save

Browse files

Files changed (9) hide show

README.md +57 -0
all_results.json +9 -0
generation_config.json +6 -0
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
train_results.json +9 -0
trainer_state.json +535 -0

README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+---
+base_model: Qwen/Qwen2.5-7B
+library_name: transformers
+model_name: qwen25_7b_sft_math
+tags:
+- generated_from_trainer
+- trl
+- sft
+licence: license
+---
+# Model Card for qwen25_7b_sft_math
+This model is a fine-tuned version of [Qwen/Qwen2.5-7B](https://huggingface.co/Qwen/Qwen2.5-7B).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+## Quick start
+```python
+from transformers import pipeline
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="Jennny/qwen25_7b_sft_math", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/jenny-shen/huggingface/runs/r912sj1z)
+This model was trained with SFT.
+### Framework versions
+- TRL: 0.12.2
+- Transformers: 4.46.3
+- Pytorch: 2.5.1+cu124
+- Datasets: 3.3.2
+- Tokenizers: 0.20.3
+## Citations
+Cite TRL as:
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 24329682780160.0,
+    "train_loss": 0.14914496035286876,
+    "train_runtime": 934.4929,
+    "train_samples": 5274,
+    "train_samples_per_second": 11.287,
+    "train_steps_per_second": 0.353
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "max_new_tokens": 2048,
+  "transformers_version": "4.46.3"
+}

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea4d19d19387ed04886c6e7ad825f2376fe46f2ac1f3e1a68dafb043d9a11d44
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:10147203b67be5a160709ebc57781cd8fff90dd3360a0f3eac2b7f0f2f053673
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b2ddb97d57b189055535bac3dfa2b59481934882968dd40f80c032e924aef95
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:44a142dd671d80f4e3060f084447b5349a84bf6c15aad72df10ab3baf8fabc48
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c53f6423939a85f65850c6be7cee9b6b2c539e60caf44a571a46a70049cd911a
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b3a68110d39341eae2f46e62860343e24e2e09b2616ace54f1136082bfef65d
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83da8f4036c38e2bad7ca1a855629135dcfcae9e78b480d4e0e8ef825de3137a
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:41b22ca4336a63c3bae0867343d3ae3de8eb0414bb17aa8f4d9d731359964420
 size 1089994880

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 2.0,
+    "total_flos": 24329682780160.0,
+    "train_loss": 0.14914496035286876,
+    "train_runtime": 934.4929,
+    "train_samples": 5274,
+    "train_samples_per_second": 11.287,
+    "train_steps_per_second": 0.353
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,535 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 100,
+  "global_step": 330,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.006060606060606061,
+      "grad_norm": 35.87954264224515,
+      "learning_rate": 6.060606060606061e-07,
+      "loss": 0.4258,
+      "step": 1
+    },
+    {
+      "epoch": 0.030303030303030304,
+      "grad_norm": 19.325121547130255,
+      "learning_rate": 3.0303030303030305e-06,
+      "loss": 0.3764,
+      "step": 5
+    },
+    {
+      "epoch": 0.06060606060606061,
+      "grad_norm": 28.11024716591639,
+      "learning_rate": 6.060606060606061e-06,
+      "loss": 0.3414,
+      "step": 10
+    },
+    {
+      "epoch": 0.09090909090909091,
+      "grad_norm": 4.816824647664995,
+      "learning_rate": 9.090909090909091e-06,
+      "loss": 0.2563,
+      "step": 15
+    },
+    {
+      "epoch": 0.12121212121212122,
+      "grad_norm": 1.6101991751262534,
+      "learning_rate": 1.2121212121212122e-05,
+      "loss": 0.2229,
+      "step": 20
+    },
+    {
+      "epoch": 0.15151515151515152,
+      "grad_norm": 1.4466897117313482,
+      "learning_rate": 1.5151515151515153e-05,
+      "loss": 0.2053,
+      "step": 25
+    },
+    {
+      "epoch": 0.18181818181818182,
+      "grad_norm": 1.411982251904895,
+      "learning_rate": 1.8181818181818182e-05,
+      "loss": 0.1941,
+      "step": 30
+    },
+    {
+      "epoch": 0.21212121212121213,
+      "grad_norm": 1.1694992562710236,
+      "learning_rate": 1.999776230627102e-05,
+      "loss": 0.1899,
+      "step": 35
+    },
+    {
+      "epoch": 0.24242424242424243,
+      "grad_norm": 1.0438333794181684,
+      "learning_rate": 1.9972599751485225e-05,
+      "loss": 0.1846,
+      "step": 40
+    },
+    {
+      "epoch": 0.2727272727272727,
+      "grad_norm": 0.8319491355818911,
+      "learning_rate": 1.9919548128307954e-05,
+      "loss": 0.1841,
+      "step": 45
+    },
+    {
+      "epoch": 0.30303030303030304,
+      "grad_norm": 0.9996099757839279,
+      "learning_rate": 1.9838755799290993e-05,
+      "loss": 0.1964,
+      "step": 50
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.7913395175455826,
+      "learning_rate": 1.973044870579824e-05,
+      "loss": 0.2125,
+      "step": 55
+    },
+    {
+      "epoch": 0.36363636363636365,
+      "grad_norm": 0.8429105469391287,
+      "learning_rate": 1.9594929736144978e-05,
+      "loss": 0.1969,
+      "step": 60
+    },
+    {
+      "epoch": 0.3939393939393939,
+      "grad_norm": 0.9398324090183084,
+      "learning_rate": 1.9432577878549635e-05,
+      "loss": 0.2056,
+      "step": 65
+    },
+    {
+      "epoch": 0.42424242424242425,
+      "grad_norm": 0.7734388791502247,
+      "learning_rate": 1.9243847161266924e-05,
+      "loss": 0.209,
+      "step": 70
+    },
+    {
+      "epoch": 0.45454545454545453,
+      "grad_norm": 0.8398725546299396,
+      "learning_rate": 1.9029265382866216e-05,
+      "loss": 0.2001,
+      "step": 75
+    },
+    {
+      "epoch": 0.48484848484848486,
+      "grad_norm": 0.811876332964586,
+      "learning_rate": 1.8789432636206197e-05,
+      "loss": 0.1909,
+      "step": 80
+    },
+    {
+      "epoch": 0.5151515151515151,
+      "grad_norm": 0.9379567664437414,
+      "learning_rate": 1.8525019630233463e-05,
+      "loss": 0.2014,
+      "step": 85
+    },
+    {
+      "epoch": 0.5454545454545454,
+      "grad_norm": 0.7797031245696635,
+      "learning_rate": 1.8236765814298328e-05,
+      "loss": 0.1954,
+      "step": 90
+    },
+    {
+      "epoch": 0.5757575757575758,
+      "grad_norm": 0.8027439585103229,
+      "learning_rate": 1.792547731023332e-05,
+      "loss": 0.1893,
+      "step": 95
+    },
+    {
+      "epoch": 0.6060606060606061,
+      "grad_norm": 0.8602477802444706,
+      "learning_rate": 1.7592024657977432e-05,
+      "loss": 0.2123,
+      "step": 100
+    },
+    {
+      "epoch": 0.6060606060606061,
+      "eval_loss": 0.2053576111793518,
+      "eval_runtime": 5.4818,
+      "eval_samples_per_second": 54.727,
+      "eval_steps_per_second": 1.824,
+      "step": 100
+    },
+    {
+      "epoch": 0.6363636363636364,
+      "grad_norm": 0.8118731483818742,
+      "learning_rate": 1.72373403810507e-05,
+      "loss": 0.2138,
+      "step": 105
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.8978812848872854,
+      "learning_rate": 1.686241637868734e-05,
+      "loss": 0.1936,
+      "step": 110
+    },
+    {
+      "epoch": 0.696969696969697,
+      "grad_norm": 0.6638605979568581,
+      "learning_rate": 1.6468301151920576e-05,
+      "loss": 0.2009,
+      "step": 115
+    },
+    {
+      "epoch": 0.7272727272727273,
+      "grad_norm": 0.8138109930342797,
+      "learning_rate": 1.6056096871376667e-05,
+      "loss": 0.1909,
+      "step": 120
+    },
+    {
+      "epoch": 0.7575757575757576,
+      "grad_norm": 0.8916280069276008,
+      "learning_rate": 1.5626956294978103e-05,
+      "loss": 0.1995,
+      "step": 125
+    },
+    {
+      "epoch": 0.7878787878787878,
+      "grad_norm": 0.929042576381489,
+      "learning_rate": 1.5182079544175957e-05,
+      "loss": 0.2032,
+      "step": 130
+    },
+    {
+      "epoch": 0.8181818181818182,
+      "grad_norm": 0.822937565391655,
+      "learning_rate": 1.472271074772683e-05,
+      "loss": 0.1992,
+      "step": 135
+    },
+    {
+      "epoch": 0.8484848484848485,
+      "grad_norm": 0.7355733490412192,
+      "learning_rate": 1.4250134562400301e-05,
+      "loss": 0.1905,
+      "step": 140
+    },
+    {
+      "epoch": 0.8787878787878788,
+      "grad_norm": 0.8972031528134459,
+      "learning_rate": 1.3765672580346986e-05,
+      "loss": 0.1963,
+      "step": 145
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 0.7484213489004259,
+      "learning_rate": 1.3270679633174219e-05,
+      "loss": 0.1872,
+      "step": 150
+    },
+    {
+      "epoch": 0.9393939393939394,
+      "grad_norm": 0.8864952745311097,
+      "learning_rate": 1.2766540003065272e-05,
+      "loss": 0.1813,
+      "step": 155
+    },
+    {
+      "epoch": 0.9696969696969697,
+      "grad_norm": 0.7562707141763975,
+      "learning_rate": 1.2254663551538047e-05,
+      "loss": 0.1937,
+      "step": 160
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.7925812619946488,
+      "learning_rate": 1.1736481776669307e-05,
+      "loss": 0.1869,
+      "step": 165
+    },
+    {
+      "epoch": 1.0303030303030303,
+      "grad_norm": 0.7262299560547822,
+      "learning_rate": 1.121344380981082e-05,
+      "loss": 0.1035,
+      "step": 170
+    },
+    {
+      "epoch": 1.0606060606060606,
+      "grad_norm": 0.8601895545477497,
+      "learning_rate": 1.068701236299281e-05,
+      "loss": 0.0924,
+      "step": 175
+    },
+    {
+      "epoch": 1.0909090909090908,
+      "grad_norm": 0.8688643496111008,
+      "learning_rate": 1.015865963834808e-05,
+      "loss": 0.0927,
+      "step": 180
+    },
+    {
+      "epoch": 1.121212121212121,
+      "grad_norm": 0.6566542211184438,
+      "learning_rate": 9.62986321099642e-06,
+      "loss": 0.0929,
+      "step": 185
+    },
+    {
+      "epoch": 1.1515151515151516,
+      "grad_norm": 0.7231975837719832,
+      "learning_rate": 9.102101896903084e-06,
+      "loss": 0.0992,
+      "step": 190
+    },
+    {
+      "epoch": 1.1818181818181819,
+      "grad_norm": 0.6644028122208893,
+      "learning_rate": 8.576851617267151e-06,
+      "loss": 0.1022,
+      "step": 195
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "grad_norm": 0.6779100739664534,
+      "learning_rate": 8.055581271005292e-06,
+      "loss": 0.0911,
+      "step": 200
+    },
+    {
+      "epoch": 1.2121212121212122,
+      "eval_loss": 0.20411260426044464,
+      "eval_runtime": 5.4814,
+      "eval_samples_per_second": 54.731,
+      "eval_steps_per_second": 1.824,
+      "step": 200
+    },
+    {
+      "epoch": 1.2424242424242424,
+      "grad_norm": 0.7215991029800394,
+      "learning_rate": 7.539748626873866e-06,
+      "loss": 0.097,
+      "step": 205
+    },
+    {
+      "epoch": 1.2727272727272727,
+      "grad_norm": 0.6420840571051777,
+      "learning_rate": 7.0307962467172555e-06,
+      "loss": 0.0828,
+      "step": 210
+    },
+    {
+      "epoch": 1.303030303030303,
+      "grad_norm": 0.7157636358082953,
+      "learning_rate": 6.530147451243377e-06,
+      "loss": 0.0953,
+      "step": 215
+    },
+    {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.6543745308409002,
+      "learning_rate": 6.039202339608432e-06,
+      "loss": 0.0911,
+      "step": 220
+    },
+    {
+      "epoch": 1.3636363636363638,
+      "grad_norm": 0.6350258787861462,
+      "learning_rate": 5.559333873942259e-06,
+      "loss": 0.0873,
+      "step": 225
+    },
+    {
+      "epoch": 1.393939393939394,
+      "grad_norm": 0.6107660743054869,
+      "learning_rate": 5.091884039764321e-06,
+      "loss": 0.0883,
+      "step": 230
+    },
+    {
+      "epoch": 1.4242424242424243,
+      "grad_norm": 0.7110830925103607,
+      "learning_rate": 4.638160093027908e-06,
+      "loss": 0.0938,
+      "step": 235
+    },
+    {
+      "epoch": 1.4545454545454546,
+      "grad_norm": 0.7041242370967582,
+      "learning_rate": 4.19943090428802e-06,
+      "loss": 0.0938,
+      "step": 240
+    },
+    {
+      "epoch": 1.4848484848484849,
+      "grad_norm": 0.6257423065834941,
+      "learning_rate": 3.7769234102166365e-06,
+      "loss": 0.0869,
+      "step": 245
+    },
+    {
+      "epoch": 1.5151515151515151,
+      "grad_norm": 0.6573334135670441,
+      "learning_rate": 3.37181918238904e-06,
+      "loss": 0.0886,
+      "step": 250
+    },
+    {
+      "epoch": 1.5454545454545454,
+      "grad_norm": 0.5682090705026351,
+      "learning_rate": 2.9852511229367862e-06,
+      "loss": 0.0897,
+      "step": 255
+    },
+    {
+      "epoch": 1.5757575757575757,
+      "grad_norm": 0.599789982325095,
+      "learning_rate": 2.618300296308135e-06,
+      "loss": 0.0844,
+      "step": 260
+    },
+    {
+      "epoch": 1.606060606060606,
+      "grad_norm": 0.6976590209412902,
+      "learning_rate": 2.27199290599617e-06,
+      "loss": 0.0871,
+      "step": 265
+    },
+    {
+      "epoch": 1.6363636363636362,
+      "grad_norm": 0.739903490519265,
+      "learning_rate": 1.947297424689414e-06,
+      "loss": 0.0871,
+      "step": 270
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.6235342968784775,
+      "learning_rate": 1.6451218858706374e-06,
+      "loss": 0.0891,
+      "step": 275
+    },
+    {
+      "epoch": 1.696969696969697,
+      "grad_norm": 0.6326699548618793,
+      "learning_rate": 1.3663113444380905e-06,
+      "loss": 0.0836,
+      "step": 280
+    },
+    {
+      "epoch": 1.7272727272727273,
+      "grad_norm": 0.6057890106461301,
+      "learning_rate": 1.1116455134507665e-06,
+      "loss": 0.0835,
+      "step": 285
+    },
+    {
+      "epoch": 1.7575757575757576,
+      "grad_norm": 0.6815041899874066,
+      "learning_rate": 8.818365836066101e-07,
+      "loss": 0.0823,
+      "step": 290
+    },
+    {
+      "epoch": 1.7878787878787878,
+      "grad_norm": 0.6655841999996316,
+      "learning_rate": 6.775272315517423e-07,
+      "loss": 0.0809,
+      "step": 295
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 0.6919417565002871,
+      "learning_rate": 4.992888225905467e-07,
+      "loss": 0.0827,
+      "step": 300
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "eval_loss": 0.19683437049388885,
+      "eval_runtime": 5.4854,
+      "eval_samples_per_second": 54.691,
+      "eval_steps_per_second": 1.823,
+      "step": 300
+    },
+    {
+      "epoch": 1.8484848484848486,
+      "grad_norm": 0.6671269900553093,
+      "learning_rate": 3.476198128228736e-07,
+      "loss": 0.0806,
+      "step": 305
+    },
+    {
+      "epoch": 1.878787878787879,
+      "grad_norm": 0.7226986759311911,
+      "learning_rate": 2.2294435517691504e-07,
+      "loss": 0.0849,
+      "step": 310
+    },
+    {
+      "epoch": 1.9090909090909092,
+      "grad_norm": 0.6203115249287291,
+      "learning_rate": 1.2561111323605714e-07,
+      "loss": 0.0796,
+      "step": 315
+    },
+    {
+      "epoch": 1.9393939393939394,
+      "grad_norm": 0.5457139445057179,
+      "learning_rate": 5.5892286176932875e-08,
+      "loss": 0.0844,
+      "step": 320
+    },
+    {
+      "epoch": 1.9696969696969697,
+      "grad_norm": 0.6283544510778528,
+      "learning_rate": 1.3982847545507271e-08,
+      "loss": 0.0868,
+      "step": 325
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.6466043764676332,
+      "learning_rate": 0.0,
+      "loss": 0.0862,
+      "step": 330
+    },
+    {
+      "epoch": 2.0,
+      "step": 330,
+      "total_flos": 24329682780160.0,
+      "train_loss": 0.14914496035286876,
+      "train_runtime": 934.4929,
+      "train_samples_per_second": 11.287,
+      "train_steps_per_second": 0.353
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 330,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 24329682780160.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}