finalform commited on Aug 6, 2025

Commit

6d1d04a

verified ·

1 Parent(s): 4b34842

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
adapter_config.json +2 -2
adapter_model.safetensors +1 -1
checkpoint-1245/adapter_config.json +2 -2
checkpoint-1245/adapter_model.safetensors +1 -1
checkpoint-1245/optimizer.pt +1 -1
checkpoint-1245/rng_state.pth +1 -1
checkpoint-1245/scheduler.pt +1 -1
checkpoint-1245/trainer_state.json +279 -249
checkpoint-1245/training_args.bin +1 -1
checkpoint-1660/README.md +202 -0
checkpoint-1660/adapter_config.json +39 -0
checkpoint-1660/adapter_model.safetensors +3 -0
checkpoint-1660/added_tokens.json +28 -0
checkpoint-1660/chat_template.jinja +89 -0
checkpoint-1660/merges.txt +0 -0
checkpoint-1660/optimizer.pt +3 -0
checkpoint-1660/rng_state.pth +3 -0
checkpoint-1660/scheduler.pt +3 -0
checkpoint-1660/special_tokens_map.json +25 -0
checkpoint-1660/tokenizer.json +3 -0
checkpoint-1660/tokenizer_config.json +239 -0
checkpoint-1660/trainer_state.json +668 -0
checkpoint-1660/training_args.bin +3 -0
checkpoint-1660/vocab.json +0 -0
checkpoint-415/README.md +202 -0
checkpoint-415/adapter_config.json +39 -0
checkpoint-415/adapter_model.safetensors +3 -0
checkpoint-415/added_tokens.json +28 -0
checkpoint-415/chat_template.jinja +89 -0
checkpoint-415/merges.txt +0 -0
checkpoint-415/optimizer.pt +3 -0
checkpoint-415/rng_state.pth +3 -0
checkpoint-415/scheduler.pt +3 -0
checkpoint-415/special_tokens_map.json +25 -0
checkpoint-415/tokenizer.json +3 -0
checkpoint-415/tokenizer_config.json +239 -0
checkpoint-415/trainer_state.json +188 -0
checkpoint-415/training_args.bin +3 -0
checkpoint-415/vocab.json +0 -0
checkpoint-830/README.md +202 -0
checkpoint-830/adapter_config.json +39 -0
checkpoint-830/adapter_model.safetensors +3 -0
checkpoint-830/added_tokens.json +28 -0
checkpoint-830/chat_template.jinja +89 -0
checkpoint-830/merges.txt +0 -0
checkpoint-830/optimizer.pt +3 -0
checkpoint-830/rng_state.pth +3 -0
checkpoint-830/scheduler.pt +3 -0
checkpoint-830/special_tokens_map.json +25 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 checkpoint-1245/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 checkpoint-1245/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-1660/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-415/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-830/tokenizer.json filter=lfs diff=lfs merge=lfs -text

adapter_config.json CHANGED Viewed

@@ -24,12 +24,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
     "gate_proj",
     "o_proj",
     "down_proj",
-    "v_proj",
-    "q_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
     "k_proj",
+    "v_proj",
     "gate_proj",
     "o_proj",
     "down_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03224c5e80ca77378b2d951c7e354983e452793b01311aca070149ab1b3752f6
 size 349243752

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea2ad39cdd86eff3c4df726c9ae56cab758f941a5339c78780151d5f982614d6
 size 349243752

checkpoint-1245/adapter_config.json CHANGED Viewed

@@ -24,12 +24,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
     "gate_proj",
     "o_proj",
     "down_proj",
-    "v_proj",
-    "q_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
     "k_proj",
+    "v_proj",
     "gate_proj",
     "o_proj",
     "down_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

checkpoint-1245/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03224c5e80ca77378b2d951c7e354983e452793b01311aca070149ab1b3752f6
 size 349243752

 version https://git-lfs.github.com/spec/v1
+oid sha256:debbf9c7f4978f933f45469be929af12fd6873eb796fb1cfede2ddf133a31247
 size 349243752

checkpoint-1245/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c06811ab2521eb093525da13aff2b31069d46b9db08099b1a092ff3122a2495e
 size 698662547

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4bc260a1513be6d302f69fb80d16cd2d6a1bb041d6cd844862be1de3616901c
 size 698662547

checkpoint-1245/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca940159064321c03f2cd2c848b8308bc995f510752a53bc4b8a8f27aa7b6e76
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:e12287504ff9b057ed520c310df9c42c835bf6f37da9431e263304d2dd53349e
 size 14645

checkpoint-1245/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82fcd00123c3c69d4e7b09d8e96247aa0926ecb0862a7624726a4095234b5d76
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:193574996f600c28e8b47f45151d73bb7e621e2e2ca63a97be22e8a15e926943
 size 1465

checkpoint-1245/trainer_state.json CHANGED Viewed

@@ -11,451 +11,481 @@
   "log_history": [
     {
       "epoch": 0.060350030175015085,
-      "grad_norm": 0.2726256847381592,
-      "learning_rate": 0.0001894736842105263,
-      "loss": 1.7821,
-      "mean_token_accuracy": 0.634402088522911,
-      "num_tokens": 157892.0,
       "step": 25
     },
     {
       "epoch": 0.12070006035003017,
-      "grad_norm": 0.27001282572746277,
-      "learning_rate": 0.00029993852448555923,
-      "loss": 0.8134,
-      "mean_token_accuracy": 0.7912577825784683,
-      "num_tokens": 284188.0,
       "step": 50
     },
     {
       "epoch": 0.18105009052504525,
-      "grad_norm": 0.23073740303516388,
-      "learning_rate": 0.00029934198818572623,
-      "loss": 0.603,
-      "mean_token_accuracy": 0.8322482287883759,
-      "num_tokens": 446681.0,
       "step": 75
     },
     {
       "epoch": 0.24140012070006034,
-      "grad_norm": 0.28391265869140625,
-      "learning_rate": 0.0002981133400718627,
-      "loss": 0.4865,
-      "mean_token_accuracy": 0.8649927872419357,
-      "num_tokens": 575282.0,
       "step": 100
     },
     {
       "epoch": 0.30175015087507545,
-      "grad_norm": 0.24657945334911346,
-      "learning_rate": 0.0002962577805768642,
-      "loss": 0.3658,
-      "mean_token_accuracy": 0.8938217234611511,
-      "num_tokens": 735901.0,
       "step": 125
     },
     {
       "epoch": 0.3621001810500905,
-      "grad_norm": 0.33079952001571655,
-      "learning_rate": 0.00029378316362776546,
-      "loss": 0.3273,
-      "mean_token_accuracy": 0.9049834907054901,
-      "num_tokens": 864618.0,
       "step": 150
     },
     {
       "epoch": 0.4224502112251056,
-      "grad_norm": 0.2289544939994812,
-      "learning_rate": 0.0002906999634028451,
-      "loss": 0.2503,
-      "mean_token_accuracy": 0.9277959632873535,
-      "num_tokens": 1024868.0,
       "step": 175
     },
     {
       "epoch": 0.4828002414001207,
-      "grad_norm": 0.44999751448631287,
-      "learning_rate": 0.0002870212299981334,
-      "loss": 0.2345,
-      "mean_token_accuracy": 0.9310170763731003,
-      "num_tokens": 1152961.0,
       "step": 200
     },
     {
       "epoch": 0.5431502715751357,
-      "grad_norm": 0.2300587147474289,
-      "learning_rate": 0.00028276253419097193,
-      "loss": 0.1834,
-      "mean_token_accuracy": 0.9455738461017609,
-      "num_tokens": 1310452.0,
       "step": 225
     },
     {
       "epoch": 0.6035003017501509,
-      "grad_norm": 0.4270714521408081,
-      "learning_rate": 0.00027794190153442033,
-      "loss": 0.1734,
-      "mean_token_accuracy": 0.9492023700475692,
-      "num_tokens": 1438013.0,
       "step": 250
     },
     {
       "epoch": 0.663850331925166,
-      "grad_norm": 0.17301490902900696,
-      "learning_rate": 0.00027257973606146575,
-      "loss": 0.1234,
-      "mean_token_accuracy": 0.9651462835073471,
-      "num_tokens": 1593568.0,
       "step": 275
     },
     {
       "epoch": 0.724200362100181,
-      "grad_norm": 0.42916586995124817,
-      "learning_rate": 0.0002666987339219681,
-      "loss": 0.1336,
-      "mean_token_accuracy": 0.9616361856460571,
-      "num_tokens": 1721008.0,
       "step": 300
     },
     {
       "epoch": 0.7845503922751962,
-      "grad_norm": 0.09723316133022308,
-      "learning_rate": 0.0002603237873178853,
-      "loss": 0.1095,
-      "mean_token_accuracy": 0.968398722410202,
-      "num_tokens": 1877612.0,
       "step": 325
     },
     {
       "epoch": 0.8449004224502112,
-      "grad_norm": 0.29951852560043335,
-      "learning_rate": 0.0002534818791433866,
-      "loss": 0.0958,
-      "mean_token_accuracy": 0.9727602601051331,
-      "num_tokens": 2003095.0,
       "step": 350
     },
     {
       "epoch": 0.9052504526252263,
-      "grad_norm": 0.19163310527801514,
-      "learning_rate": 0.00024620196877580576,
-      "loss": 0.094,
-      "mean_token_accuracy": 0.973841313123703,
-      "num_tokens": 2159917.0,
       "step": 375
     },
     {
       "epoch": 0.9656004828002414,
-      "grad_norm": 0.40213829278945923,
-      "learning_rate": 0.00023851486950083892,
-      "loss": 0.0911,
-      "mean_token_accuracy": 0.97495201587677,
-      "num_tokens": 2287879.0,
       "step": 400
     },
     {
       "epoch": 1.024140012070006,
-      "grad_norm": 0.1628786027431488,
-      "learning_rate": 0.00023045311809080567,
-      "loss": 0.0952,
-      "mean_token_accuracy": 0.9737161440947621,
-      "num_tokens": 2438228.0,
       "step": 425
     },
     {
       "epoch": 1.0844900422450212,
-      "grad_norm": 0.17094825208187103,
-      "learning_rate": 0.00022205083708799942,
-      "loss": 0.0561,
-      "mean_token_accuracy": 0.9833515232801437,
-      "num_tokens": 2583293.0,
       "step": 450
     },
     {
       "epoch": 1.1448400724200363,
-      "grad_norm": 0.1984417885541916,
-      "learning_rate": 0.0002133435903760353,
-      "loss": 0.0719,
-      "mean_token_accuracy": 0.9798995298147202,
-      "num_tokens": 2724870.0,
       "step": 475
     },
     {
       "epoch": 1.2051901025950513,
-      "grad_norm": 0.1823195070028305,
-      "learning_rate": 0.0002043682326505094,
-      "loss": 0.0466,
-      "mean_token_accuracy": 0.9866354477405548,
-      "num_tokens": 2868460.0,
       "step": 500
     },
     {
       "epoch": 1.2655401327700664,
-      "grad_norm": 0.14491313695907593,
-      "learning_rate": 0.000195162753426108,
-      "loss": 0.0701,
-      "mean_token_accuracy": 0.9807595479488372,
-      "num_tokens": 3011315.0,
       "step": 525
     },
     {
       "epoch": 1.3258901629450814,
-      "grad_norm": 0.14561036229133606,
-      "learning_rate": 0.00018576611624042852,
-      "loss": 0.0493,
-      "mean_token_accuracy": 0.9858711469173431,
-      "num_tokens": 3153318.0,
       "step": 550
     },
     {
       "epoch": 1.3862401931200965,
-      "grad_norm": 0.08111721277236938,
-      "learning_rate": 0.00017621809373510641,
-      "loss": 0.0621,
-      "mean_token_accuracy": 0.9823315119743348,
-      "num_tokens": 3296073.0,
       "step": 575
     },
     {
       "epoch": 1.4465902232951118,
-      "grad_norm": 0.0793062075972557,
-      "learning_rate": 0.00016655909931229048,
-      "loss": 0.0472,
-      "mean_token_accuracy": 0.986634315252304,
-      "num_tokens": 3440522.0,
       "step": 600
     },
     {
       "epoch": 1.5069402534701268,
-      "grad_norm": 0.09899070113897324,
-      "learning_rate": 0.00015683001607900553,
-      "loss": 0.0507,
-      "mean_token_accuracy": 0.9856790328025817,
-      "num_tokens": 3582759.0,
       "step": 625
     },
     {
       "epoch": 1.567290283645142,
-      "grad_norm": 0.10104668885469437,
-      "learning_rate": 0.00014707202380342108,
-      "loss": 0.045,
-      "mean_token_accuracy": 0.9872064375877381,
-      "num_tokens": 3724646.0,
       "step": 650
     },
     {
       "epoch": 1.627640313820157,
-      "grad_norm": 0.12305350601673126,
-      "learning_rate": 0.00013732642461545747,
-      "loss": 0.061,
-      "mean_token_accuracy": 0.9832958990335464,
-      "num_tokens": 3867116.0,
       "step": 675
     },
     {
       "epoch": 1.687990343995172,
-      "grad_norm": 0.07760481536388397,
-      "learning_rate": 0.00012763446818947865,
-      "loss": 0.039,
-      "mean_token_accuracy": 0.9887771773338317,
-      "num_tokens": 4010711.0,
       "step": 700
     },
     {
       "epoch": 1.748340374170187,
-      "grad_norm": 0.07849643379449844,
-      "learning_rate": 0.00011803717714901029,
-      "loss": 0.0611,
-      "mean_token_accuracy": 0.9825647151470185,
-      "num_tokens": 4153743.0,
       "step": 725
     },
     {
       "epoch": 1.8086904043452021,
-      "grad_norm": 0.07204271852970123,
-      "learning_rate": 0.00010857517343248423,
-      "loss": 0.0371,
-      "mean_token_accuracy": 0.9890217131376267,
-      "num_tokens": 4297676.0,
       "step": 750
     },
     {
       "epoch": 1.8690404345202172,
-      "grad_norm": 0.08970830589532852,
-      "learning_rate": 9.9288506354941e-05,
-      "loss": 0.0518,
-      "mean_token_accuracy": 0.9855406028032303,
-      "num_tokens": 4439199.0,
       "step": 775
     },
     {
       "epoch": 1.9293904646952322,
-      "grad_norm": 0.07621912658214569,
-      "learning_rate": 9.021648309344443e-05,
-      "loss": 0.0354,
-      "mean_token_accuracy": 0.9890959084033966,
-      "num_tokens": 4581949.0,
       "step": 800
     },
     {
       "epoch": 1.9897404948702473,
-      "grad_norm": 0.07033903896808624,
-      "learning_rate": 8.139750231370407e-05,
-      "loss": 0.0416,
-      "mean_token_accuracy": 0.9879856097698212,
-      "num_tokens": 4714427.0,
       "step": 825
     },
     {
       "epoch": 2.048280024140012,
-      "grad_norm": 0.07433084398508072,
-      "learning_rate": 7.28688916421049e-05,
-      "loss": 0.048,
-      "mean_token_accuracy": 0.9853949061374074,
-      "num_tokens": 4865666.0,
       "step": 850
     },
     {
       "epoch": 2.1086300543150274,
-      "grad_norm": 0.06543659418821335,
-      "learning_rate": 6.466674967106751e-05,
-      "loss": 0.0291,
-      "mean_token_accuracy": 0.9913258212804794,
-      "num_tokens": 4998878.0,
       "step": 875
     },
     {
       "epoch": 2.1689800844900424,
-      "grad_norm": 0.06879922747612,
-      "learning_rate": 5.682579316647408e-05,
-      "loss": 0.039,
-      "mean_token_accuracy": 0.9881585425138474,
-      "num_tokens": 5150646.0,
       "step": 900
     },
     {
       "epoch": 2.2293301146650575,
-      "grad_norm": 0.06347832828760147,
-      "learning_rate": 4.937921012387816e-05,
-      "loss": 0.0294,
-      "mean_token_accuracy": 0.9909294676780701,
-      "num_tokens": 5285687.0,
       "step": 925
     },
     {
       "epoch": 2.2896801448400725,
-      "grad_norm": 0.08423357456922531,
-      "learning_rate": 4.235851929545771e-05,
-      "loss": 0.0379,
-      "mean_token_accuracy": 0.9882619392871856,
-      "num_tokens": 5437604.0,
       "step": 950
     },
     {
       "epoch": 2.3500301750150876,
-      "grad_norm": 0.06368061155080795,
-      "learning_rate": 3.579343678228525e-05,
-      "loss": 0.0292,
-      "mean_token_accuracy": 0.9909913945198059,
-      "num_tokens": 5571761.0,
       "step": 975
     },
     {
       "epoch": 2.4103802051901027,
-      "grad_norm": 0.061880286782979965,
-      "learning_rate": 2.9711750256582538e-05,
       "loss": 0.0403,
-      "mean_token_accuracy": 0.9878204268217087,
-      "num_tokens": 5724871.0,
       "step": 1000
     },
     {
       "epoch": 2.4707302353651177,
-      "grad_norm": 0.060907330363988876,
-      "learning_rate": 2.413920134633272e-05,
-      "loss": 0.0269,
-      "mean_token_accuracy": 0.9914020735025406,
-      "num_tokens": 5859593.0,
       "step": 1025
     },
     {
       "epoch": 2.5310802655401328,
-      "grad_norm": 0.07528848201036453,
-      "learning_rate": 1.909937668007352e-05,
-      "loss": 0.0396,
-      "mean_token_accuracy": 0.9879765379428863,
-      "num_tokens": 6012549.0,
       "step": 1050
     },
     {
       "epoch": 2.591430295715148,
-      "grad_norm": 0.07129911333322525,
-      "learning_rate": 1.461360805304146e-05,
-      "loss": 0.0252,
-      "mean_token_accuracy": 0.9917944890260696,
-      "num_tokens": 6146101.0,
       "step": 1075
     },
     {
       "epoch": 2.651780325890163,
-      "grad_norm": 0.055356480181217194,
-      "learning_rate": 1.0700882137227434e-05,
-      "loss": 0.0457,
-      "mean_token_accuracy": 0.9865469449758529,
-      "num_tokens": 6298782.0,
       "step": 1100
     },
     {
       "epoch": 2.712130356065178,
-      "grad_norm": 0.0652829185128212,
-      "learning_rate": 7.377760117509834e-06,
-      "loss": 0.0251,
-      "mean_token_accuracy": 0.9921035206317902,
-      "num_tokens": 6430918.0,
       "step": 1125
     },
     {
       "epoch": 2.772480386240193,
-      "grad_norm": 0.05269525945186615,
-      "learning_rate": 4.65830759401658e-06,
-      "loss": 0.0382,
-      "mean_token_accuracy": 0.9884834003448486,
-      "num_tokens": 6580407.0,
       "step": 1150
     },
     {
       "epoch": 2.832830416415208,
-      "grad_norm": 0.07319523394107819,
-      "learning_rate": 2.554035047414732e-06,
-      "loss": 0.0244,
-      "mean_token_accuracy": 0.9923358470201492,
-      "num_tokens": 6712568.0,
       "step": 1175
     },
     {
       "epoch": 2.8931804465902236,
-      "grad_norm": 0.07114146649837494,
-      "learning_rate": 1.0738491191171372e-06,
-      "loss": 0.0404,
-      "mean_token_accuracy": 0.9876986140012741,
-      "num_tokens": 6865440.0,
       "step": 1200
     },
     {
       "epoch": 2.9535304767652386,
-      "grad_norm": 0.05154518783092499,
-      "learning_rate": 2.2401491261947456e-07,
-      "loss": 0.0261,
-      "mean_token_accuracy": 0.9918761789798737,
-      "num_tokens": 7000309.0,
       "step": 1225
     }
   ],
   "logging_steps": 25,
-  "max_steps": 1245,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
-  "save_steps": 750,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -463,12 +493,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.26612671378901e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "log_history": [
     {
       "epoch": 0.060350030175015085,
+      "grad_norm": 0.3188655376434326,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 1.86,
+      "mean_token_accuracy": 0.6245196205377579,
+      "num_tokens": 157541.0,
       "step": 25
     },
     {
       "epoch": 0.12070006035003017,
+      "grad_norm": 0.2658841907978058,
+      "learning_rate": 0.000294,
+      "loss": 0.8983,
+      "mean_token_accuracy": 0.7745399290323257,
+      "num_tokens": 284307.0,
       "step": 50
     },
     {
       "epoch": 0.18105009052504525,
+      "grad_norm": 0.24192510545253754,
+      "learning_rate": 0.00029983554299928354,
+      "loss": 0.6083,
+      "mean_token_accuracy": 0.8323455977439881,
+      "num_tokens": 440617.0,
       "step": 75
     },
     {
       "epoch": 0.24140012070006034,
+      "grad_norm": 0.3552621006965637,
+      "learning_rate": 0.00029931487386844626,
+      "loss": 0.5333,
+      "mean_token_accuracy": 0.8501472049951553,
+      "num_tokens": 567051.0,
       "step": 100
     },
     {
       "epoch": 0.30175015087507545,
+      "grad_norm": 0.29719677567481995,
+      "learning_rate": 0.000298438945846945,
+      "loss": 0.4156,
+      "mean_token_accuracy": 0.8798932474851608,
+      "num_tokens": 724100.0,
       "step": 125
     },
     {
       "epoch": 0.3621001810500905,
+      "grad_norm": 0.30843958258628845,
+      "learning_rate": 0.0002972098429951895,
+      "loss": 0.3274,
+      "mean_token_accuracy": 0.9020548111200333,
+      "num_tokens": 852073.0,
       "step": 150
     },
     {
       "epoch": 0.4224502112251056,
+      "grad_norm": 0.3006002604961395,
+      "learning_rate": 0.0002956304896682979,
+      "loss": 0.2719,
+      "mean_token_accuracy": 0.9195013505220413,
+      "num_tokens": 1012131.0,
       "step": 175
     },
     {
       "epoch": 0.4828002414001207,
+      "grad_norm": 0.39749929308891296,
+      "learning_rate": 0.00029370464355829616,
+      "loss": 0.2304,
+      "mean_token_accuracy": 0.9330711585283279,
+      "num_tokens": 1138475.0,
       "step": 200
     },
     {
       "epoch": 0.5431502715751357,
+      "grad_norm": 0.21084098517894745,
+      "learning_rate": 0.00029143688675359184,
+      "loss": 0.1776,
+      "mean_token_accuracy": 0.9493161207437515,
+      "num_tokens": 1298548.0,
       "step": 225
     },
     {
       "epoch": 0.6035003017501509,
+      "grad_norm": 0.2666475772857666,
+      "learning_rate": 0.000288832614836995,
+      "loss": 0.1483,
+      "mean_token_accuracy": 0.9569978493452073,
+      "num_tokens": 1428047.0,
       "step": 250
     },
     {
       "epoch": 0.663850331925166,
+      "grad_norm": 0.291533887386322,
+      "learning_rate": 0.00028589802404822455,
+      "loss": 0.1563,
+      "mean_token_accuracy": 0.9560109853744507,
+      "num_tokens": 1586758.0,
       "step": 275
     },
     {
       "epoch": 0.724200362100181,
+      "grad_norm": 0.3723059892654419,
+      "learning_rate": 0.0002826400965414433,
+      "loss": 0.1303,
+      "mean_token_accuracy": 0.9621474850177765,
+      "num_tokens": 1714018.0,
       "step": 300
     },
     {
       "epoch": 0.7845503922751962,
+      "grad_norm": 0.39150121808052063,
+      "learning_rate": 0.00027906658377289907,
+      "loss": 0.115,
+      "mean_token_accuracy": 0.9675602000951767,
+      "num_tokens": 1872256.0,
       "step": 325
     },
     {
       "epoch": 0.8449004224502112,
+      "grad_norm": 0.3891217112541199,
+      "learning_rate": 0.0002751859880581954,
+      "loss": 0.1052,
+      "mean_token_accuracy": 0.9699741625785827,
+      "num_tokens": 1999407.0,
       "step": 350
     },
     {
       "epoch": 0.9052504526252263,
+      "grad_norm": 0.15646834671497345,
+      "learning_rate": 0.00027100754234307293,
+      "loss": 0.095,
+      "mean_token_accuracy": 0.9727736663818359,
+      "num_tokens": 2159407.0,
       "step": 375
     },
     {
       "epoch": 0.9656004828002414,
+      "grad_norm": 0.3074830174446106,
+      "learning_rate": 0.00026654118823583243,
+      "loss": 0.1027,
+      "mean_token_accuracy": 0.9708205509185791,
+      "num_tokens": 2287233.0,
       "step": 400
     },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.08644451200962067,
+      "eval_mean_token_accuracy": 0.9759664358319463,
+      "eval_num_tokens": 2367283.0,
+      "eval_runtime": 72.5321,
+      "eval_samples_per_second": 5.087,
+      "eval_steps_per_second": 2.551,
+      "step": 415
+    },
     {
       "epoch": 1.024140012070006,
+      "grad_norm": 0.12701059877872467,
+      "learning_rate": 0.00026179755235366435,
+      "loss": 0.0911,
+      "mean_token_accuracy": 0.9744510755096514,
+      "num_tokens": 2436776.0,
       "step": 425
     },
     {
       "epoch": 1.0844900422450212,
+      "grad_norm": 0.18361371755599976,
+      "learning_rate": 0.00025678792103916504,
+      "loss": 0.0605,
+      "mean_token_accuracy": 0.9830399179458618,
+      "num_tokens": 2579134.0,
       "step": 450
     },
     {
       "epoch": 1.1448400724200363,
+      "grad_norm": 0.14009377360343933,
+      "learning_rate": 0.0002515242135071945,
+      "loss": 0.0841,
+      "mean_token_accuracy": 0.9770982998609543,
+      "num_tokens": 2723253.0,
       "step": 475
     },
     {
       "epoch": 1.2051901025950513,
+      "grad_norm": 0.2148066610097885,
+      "learning_rate": 0.0002460189534859663,
+      "loss": 0.0574,
+      "mean_token_accuracy": 0.983966583609581,
+      "num_tokens": 2868154.0,
       "step": 500
     },
     {
       "epoch": 1.2655401327700664,
+      "grad_norm": 0.13684915006160736,
+      "learning_rate": 0.00024028523941984378,
+      "loss": 0.0752,
+      "mean_token_accuracy": 0.9793112319707871,
+      "num_tokens": 3010656.0,
       "step": 525
     },
     {
       "epoch": 1.3258901629450814,
+      "grad_norm": 0.1301647573709488,
+      "learning_rate": 0.00023433671330473613,
+      "loss": 0.0548,
+      "mean_token_accuracy": 0.9846732890605927,
+      "num_tokens": 3154781.0,
       "step": 550
     },
     {
       "epoch": 1.3862401931200965,
+      "grad_norm": 0.12535132467746735,
+      "learning_rate": 0.00022818752823024516,
+      "loss": 0.0633,
+      "mean_token_accuracy": 0.9826712667942047,
+      "num_tokens": 3297498.0,
       "step": 575
     },
     {
       "epoch": 1.4465902232951118,
+      "grad_norm": 0.1197441816329956,
+      "learning_rate": 0.00022185231470578672,
+      "loss": 0.0457,
+      "mean_token_accuracy": 0.986530932188034,
+      "num_tokens": 3439937.0,
       "step": 600
     },
     {
       "epoch": 1.5069402534701268,
+      "grad_norm": 0.09161168336868286,
+      "learning_rate": 0.00021534614585080636,
+      "loss": 0.0661,
+      "mean_token_accuracy": 0.980983544588089,
+      "num_tokens": 3581658.0,
       "step": 625
     },
     {
       "epoch": 1.567290283645142,
+      "grad_norm": 0.1546151489019394,
+      "learning_rate": 0.0002086845015319108,
+      "loss": 0.0432,
+      "mean_token_accuracy": 0.9872822916507721,
+      "num_tokens": 3724450.0,
       "step": 650
     },
     {
       "epoch": 1.627640313820157,
+      "grad_norm": 0.10510735213756561,
+      "learning_rate": 0.00020188323153224244,
+      "loss": 0.0615,
+      "mean_token_accuracy": 0.9828537595272064,
+      "num_tokens": 3865114.0,
       "step": 675
     },
     {
       "epoch": 1.687990343995172,
+      "grad_norm": 0.09251394867897034,
+      "learning_rate": 0.00019495851784072558,
+      "loss": 0.0426,
+      "mean_token_accuracy": 0.9873446094989776,
+      "num_tokens": 4007753.0,
       "step": 700
     },
     {
       "epoch": 1.748340374170187,
+      "grad_norm": 0.09151948243379593,
+      "learning_rate": 0.00018792683615090954,
+      "loss": 0.0559,
+      "mean_token_accuracy": 0.9842093575000763,
+      "num_tokens": 4149621.0,
       "step": 725
     },
     {
       "epoch": 1.8086904043452021,
+      "grad_norm": 0.17836125195026398,
+      "learning_rate": 0.00018080491666101184,
+      "loss": 0.0403,
+      "mean_token_accuracy": 0.9877779418230057,
+      "num_tokens": 4293130.0,
       "step": 750
     },
     {
       "epoch": 1.8690404345202172,
+      "grad_norm": 0.10722211748361588,
+      "learning_rate": 0.00017360970426842824,
+      "loss": 0.0507,
+      "mean_token_accuracy": 0.9855174136161804,
+      "num_tokens": 4435313.0,
       "step": 775
     },
     {
       "epoch": 1.9293904646952322,
+      "grad_norm": 0.08690394461154938,
+      "learning_rate": 0.00016635831825341846,
+      "loss": 0.0388,
+      "mean_token_accuracy": 0.9888739967346192,
+      "num_tokens": 4579734.0,
       "step": 800
     },
     {
       "epoch": 1.9897404948702473,
+      "grad_norm": 0.07448932528495789,
+      "learning_rate": 0.00015906801154788881,
+      "loss": 0.0442,
+      "mean_token_accuracy": 0.9876042759418487,
+      "num_tokens": 4714195.0,
       "step": 825
     },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.04987528547644615,
+      "eval_mean_token_accuracy": 0.9863727804776784,
+      "eval_num_tokens": 4734566.0,
+      "eval_runtime": 72.6082,
+      "eval_samples_per_second": 5.082,
+      "eval_steps_per_second": 2.548,
+      "step": 830
+    },
     {
       "epoch": 2.048280024140012,
+      "grad_norm": 0.06745678931474686,
+      "learning_rate": 0.00015175612968618376,
+      "loss": 0.0459,
+      "mean_token_accuracy": 0.9859098005540592,
+      "num_tokens": 4863820.0,
       "step": 850
     },
     {
       "epoch": 2.1086300543150274,
+      "grad_norm": 0.0881015732884407,
+      "learning_rate": 0.00014444006953555127,
+      "loss": 0.0295,
+      "mean_token_accuracy": 0.9908721047639847,
+      "num_tokens": 4997194.0,
       "step": 875
     },
     {
       "epoch": 2.1689800844900424,
+      "grad_norm": 0.06334047764539719,
+      "learning_rate": 0.00013713723790447483,
+      "loss": 0.0496,
+      "mean_token_accuracy": 0.9859565341472626,
+      "num_tokens": 5149497.0,
       "step": 900
     },
     {
       "epoch": 2.2293301146650575,
+      "grad_norm": 0.0663692057132721,
+      "learning_rate": 0.0001298650101273517,
+      "loss": 0.0272,
+      "mean_token_accuracy": 0.9913459432125091,
+      "num_tokens": 5284646.0,
       "step": 925
     },
     {
       "epoch": 2.2896801448400725,
+      "grad_norm": 0.0703674927353859,
+      "learning_rate": 0.00012264068872405698,
+      "loss": 0.0408,
+      "mean_token_accuracy": 0.9879277718067169,
+      "num_tokens": 5436909.0,
       "step": 950
     },
     {
       "epoch": 2.3500301750150876,
+      "grad_norm": 0.05159657076001167,
+      "learning_rate": 0.00011548146223275205,
+      "loss": 0.0293,
+      "mean_token_accuracy": 0.9910051214694977,
+      "num_tokens": 5569302.0,
       "step": 975
     },
     {
       "epoch": 2.4103802051901027,
+      "grad_norm": 0.0799742266535759,
+      "learning_rate": 0.00010840436431388485,
       "loss": 0.0403,
+      "mean_token_accuracy": 0.9877477496862411,
+      "num_tokens": 5722331.0,
       "step": 1000
     },
     {
       "epoch": 2.4707302353651177,
+      "grad_norm": 0.056557413190603256,
+      "learning_rate": 0.00010142623322268498,
+      "loss": 0.0287,
+      "mean_token_accuracy": 0.9909706234931945,
+      "num_tokens": 5856158.0,
       "step": 1025
     },
     {
       "epoch": 2.5310802655401328,
+      "grad_norm": 0.0646870955824852,
+      "learning_rate": 9.456367174657846e-05,
+      "loss": 0.0423,
+      "mean_token_accuracy": 0.987390770316124,
+      "num_tokens": 6007433.0,
       "step": 1050
     },
     {
       "epoch": 2.591430295715148,
+      "grad_norm": 0.039614204317331314,
+      "learning_rate": 8.783300770284126e-05,
+      "loss": 0.0271,
+      "mean_token_accuracy": 0.9912664991617203,
+      "num_tokens": 6141256.0,
       "step": 1075
     },
     {
       "epoch": 2.651780325890163,
+      "grad_norm": 0.0858864039182663,
+      "learning_rate": 8.125025509047871e-05,
+      "loss": 0.0347,
+      "mean_token_accuracy": 0.9889630949497223,
+      "num_tokens": 6293285.0,
       "step": 1100
     },
     {
       "epoch": 2.712130356065178,
+      "grad_norm": 0.09786231815814972,
+      "learning_rate": 7.483107598875994e-05,
+      "loss": 0.0255,
+      "mean_token_accuracy": 0.991877788901329,
+      "num_tokens": 6427387.0,
       "step": 1125
     },
     {
       "epoch": 2.772480386240193,
+      "grad_norm": 0.060420017689466476,
+      "learning_rate": 6.859074329306077e-05,
+      "loss": 0.0386,
+      "mean_token_accuracy": 0.9882360059022903,
+      "num_tokens": 6580270.0,
       "step": 1150
     },
     {
       "epoch": 2.832830416415208,
+      "grad_norm": 0.0470358170568943,
+      "learning_rate": 6.254410437667635e-05,
+      "loss": 0.026,
+      "mean_token_accuracy": 0.9918814355134964,
+      "num_tokens": 6713537.0,
       "step": 1175
     },
     {
       "epoch": 2.8931804465902236,
+      "grad_norm": 0.06813222169876099,
+      "learning_rate": 5.670554576506152e-05,
+      "loss": 0.0429,
+      "mean_token_accuracy": 0.9869187504053116,
+      "num_tokens": 6867844.0,
       "step": 1200
     },
     {
       "epoch": 2.9535304767652386,
+      "grad_norm": 0.05573548004031181,
+      "learning_rate": 5.108895890654753e-05,
+      "loss": 0.0279,
+      "mean_token_accuracy": 0.991449517607689,
+      "num_tokens": 7002067.0,
       "step": 1225
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.043443720787763596,
+      "eval_mean_token_accuracy": 0.9880821160368017,
+      "eval_num_tokens": 7101849.0,
+      "eval_runtime": 72.5056,
+      "eval_samples_per_second": 5.089,
+      "eval_steps_per_second": 2.552,
+      "step": 1245
     }
   ],
   "logging_steps": 25,
+  "max_steps": 1660,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.265941598976164e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

checkpoint-1245/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7e0d8c6c509827c1c38daeb1f564df9c52039702bf2e2293954393e7867f804
 size 6033

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa67ef66fca6209c5a81244c077133db48814d04ffb68d47dcf6047e1890fc8f
 size 6033

checkpoint-1660/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen3-8B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

checkpoint-1660/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-8B",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-1660/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea2ad39cdd86eff3c4df726c9ae56cab758f941a5339c78780151d5f982614d6
+size 349243752

checkpoint-1660/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-1660/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

checkpoint-1660/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1660/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6737cd1e3f29baa257ea7eb2ae89c6342cab13c9142039f6c497af72e8bb36ef
+size 698662547

checkpoint-1660/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d6bb50ea24906e5110a15499d2a9d32288557f095bede9872f9afda5c752b4b
+size 14645

checkpoint-1660/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc85a65ac2e2afe9d530a3895150368dd9e7507622696dbf23bba79d510dda80
+size 1465

checkpoint-1660/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|im_end|>"
+}

checkpoint-1660/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69fae70b4e2890472c74ae51adff7a0f50c32b6bfbea38cd97da67fea79a12bb
+size 11422819

checkpoint-1660/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|im_end|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

checkpoint-1660/trainer_state.json ADDED Viewed

	@@ -0,0 +1,668 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1660,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.060350030175015085,
+      "grad_norm": 0.3188655376434326,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 1.86,
+      "mean_token_accuracy": 0.6245196205377579,
+      "num_tokens": 157541.0,
+      "step": 25
+    },
+    {
+      "epoch": 0.12070006035003017,
+      "grad_norm": 0.2658841907978058,
+      "learning_rate": 0.000294,
+      "loss": 0.8983,
+      "mean_token_accuracy": 0.7745399290323257,
+      "num_tokens": 284307.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.18105009052504525,
+      "grad_norm": 0.24192510545253754,
+      "learning_rate": 0.00029983554299928354,
+      "loss": 0.6083,
+      "mean_token_accuracy": 0.8323455977439881,
+      "num_tokens": 440617.0,
+      "step": 75
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "grad_norm": 0.3552621006965637,
+      "learning_rate": 0.00029931487386844626,
+      "loss": 0.5333,
+      "mean_token_accuracy": 0.8501472049951553,
+      "num_tokens": 567051.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.30175015087507545,
+      "grad_norm": 0.29719677567481995,
+      "learning_rate": 0.000298438945846945,
+      "loss": 0.4156,
+      "mean_token_accuracy": 0.8798932474851608,
+      "num_tokens": 724100.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.3621001810500905,
+      "grad_norm": 0.30843958258628845,
+      "learning_rate": 0.0002972098429951895,
+      "loss": 0.3274,
+      "mean_token_accuracy": 0.9020548111200333,
+      "num_tokens": 852073.0,
+      "step": 150
+    },
+    {
+      "epoch": 0.4224502112251056,
+      "grad_norm": 0.3006002604961395,
+      "learning_rate": 0.0002956304896682979,
+      "loss": 0.2719,
+      "mean_token_accuracy": 0.9195013505220413,
+      "num_tokens": 1012131.0,
+      "step": 175
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "grad_norm": 0.39749929308891296,
+      "learning_rate": 0.00029370464355829616,
+      "loss": 0.2304,
+      "mean_token_accuracy": 0.9330711585283279,
+      "num_tokens": 1138475.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.5431502715751357,
+      "grad_norm": 0.21084098517894745,
+      "learning_rate": 0.00029143688675359184,
+      "loss": 0.1776,
+      "mean_token_accuracy": 0.9493161207437515,
+      "num_tokens": 1298548.0,
+      "step": 225
+    },
+    {
+      "epoch": 0.6035003017501509,
+      "grad_norm": 0.2666475772857666,
+      "learning_rate": 0.000288832614836995,
+      "loss": 0.1483,
+      "mean_token_accuracy": 0.9569978493452073,
+      "num_tokens": 1428047.0,
+      "step": 250
+    },
+    {
+      "epoch": 0.663850331925166,
+      "grad_norm": 0.291533887386322,
+      "learning_rate": 0.00028589802404822455,
+      "loss": 0.1563,
+      "mean_token_accuracy": 0.9560109853744507,
+      "num_tokens": 1586758.0,
+      "step": 275
+    },
+    {
+      "epoch": 0.724200362100181,
+      "grad_norm": 0.3723059892654419,
+      "learning_rate": 0.0002826400965414433,
+      "loss": 0.1303,
+      "mean_token_accuracy": 0.9621474850177765,
+      "num_tokens": 1714018.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.7845503922751962,
+      "grad_norm": 0.39150121808052063,
+      "learning_rate": 0.00027906658377289907,
+      "loss": 0.115,
+      "mean_token_accuracy": 0.9675602000951767,
+      "num_tokens": 1872256.0,
+      "step": 325
+    },
+    {
+      "epoch": 0.8449004224502112,
+      "grad_norm": 0.3891217112541199,
+      "learning_rate": 0.0002751859880581954,
+      "loss": 0.1052,
+      "mean_token_accuracy": 0.9699741625785827,
+      "num_tokens": 1999407.0,
+      "step": 350
+    },
+    {
+      "epoch": 0.9052504526252263,
+      "grad_norm": 0.15646834671497345,
+      "learning_rate": 0.00027100754234307293,
+      "loss": 0.095,
+      "mean_token_accuracy": 0.9727736663818359,
+      "num_tokens": 2159407.0,
+      "step": 375
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "grad_norm": 0.3074830174446106,
+      "learning_rate": 0.00026654118823583243,
+      "loss": 0.1027,
+      "mean_token_accuracy": 0.9708205509185791,
+      "num_tokens": 2287233.0,
+      "step": 400
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.08644451200962067,
+      "eval_mean_token_accuracy": 0.9759664358319463,
+      "eval_num_tokens": 2367283.0,
+      "eval_runtime": 72.5321,
+      "eval_samples_per_second": 5.087,
+      "eval_steps_per_second": 2.551,
+      "step": 415
+    },
+    {
+      "epoch": 1.024140012070006,
+      "grad_norm": 0.12701059877872467,
+      "learning_rate": 0.00026179755235366435,
+      "loss": 0.0911,
+      "mean_token_accuracy": 0.9744510755096514,
+      "num_tokens": 2436776.0,
+      "step": 425
+    },
+    {
+      "epoch": 1.0844900422450212,
+      "grad_norm": 0.18361371755599976,
+      "learning_rate": 0.00025678792103916504,
+      "loss": 0.0605,
+      "mean_token_accuracy": 0.9830399179458618,
+      "num_tokens": 2579134.0,
+      "step": 450
+    },
+    {
+      "epoch": 1.1448400724200363,
+      "grad_norm": 0.14009377360343933,
+      "learning_rate": 0.0002515242135071945,
+      "loss": 0.0841,
+      "mean_token_accuracy": 0.9770982998609543,
+      "num_tokens": 2723253.0,
+      "step": 475
+    },
+    {
+      "epoch": 1.2051901025950513,
+      "grad_norm": 0.2148066610097885,
+      "learning_rate": 0.0002460189534859663,
+      "loss": 0.0574,
+      "mean_token_accuracy": 0.983966583609581,
+      "num_tokens": 2868154.0,
+      "step": 500
+    },
+    {
+      "epoch": 1.2655401327700664,
+      "grad_norm": 0.13684915006160736,
+      "learning_rate": 0.00024028523941984378,
+      "loss": 0.0752,
+      "mean_token_accuracy": 0.9793112319707871,
+      "num_tokens": 3010656.0,
+      "step": 525
+    },
+    {
+      "epoch": 1.3258901629450814,
+      "grad_norm": 0.1301647573709488,
+      "learning_rate": 0.00023433671330473613,
+      "loss": 0.0548,
+      "mean_token_accuracy": 0.9846732890605927,
+      "num_tokens": 3154781.0,
+      "step": 550
+    },
+    {
+      "epoch": 1.3862401931200965,
+      "grad_norm": 0.12535132467746735,
+      "learning_rate": 0.00022818752823024516,
+      "loss": 0.0633,
+      "mean_token_accuracy": 0.9826712667942047,
+      "num_tokens": 3297498.0,
+      "step": 575
+    },
+    {
+      "epoch": 1.4465902232951118,
+      "grad_norm": 0.1197441816329956,
+      "learning_rate": 0.00022185231470578672,
+      "loss": 0.0457,
+      "mean_token_accuracy": 0.986530932188034,
+      "num_tokens": 3439937.0,
+      "step": 600
+    },
+    {
+      "epoch": 1.5069402534701268,
+      "grad_norm": 0.09161168336868286,
+      "learning_rate": 0.00021534614585080636,
+      "loss": 0.0661,
+      "mean_token_accuracy": 0.980983544588089,
+      "num_tokens": 3581658.0,
+      "step": 625
+    },
+    {
+      "epoch": 1.567290283645142,
+      "grad_norm": 0.1546151489019394,
+      "learning_rate": 0.0002086845015319108,
+      "loss": 0.0432,
+      "mean_token_accuracy": 0.9872822916507721,
+      "num_tokens": 3724450.0,
+      "step": 650
+    },
+    {
+      "epoch": 1.627640313820157,
+      "grad_norm": 0.10510735213756561,
+      "learning_rate": 0.00020188323153224244,
+      "loss": 0.0615,
+      "mean_token_accuracy": 0.9828537595272064,
+      "num_tokens": 3865114.0,
+      "step": 675
+    },
+    {
+      "epoch": 1.687990343995172,
+      "grad_norm": 0.09251394867897034,
+      "learning_rate": 0.00019495851784072558,
+      "loss": 0.0426,
+      "mean_token_accuracy": 0.9873446094989776,
+      "num_tokens": 4007753.0,
+      "step": 700
+    },
+    {
+      "epoch": 1.748340374170187,
+      "grad_norm": 0.09151948243379593,
+      "learning_rate": 0.00018792683615090954,
+      "loss": 0.0559,
+      "mean_token_accuracy": 0.9842093575000763,
+      "num_tokens": 4149621.0,
+      "step": 725
+    },
+    {
+      "epoch": 1.8086904043452021,
+      "grad_norm": 0.17836125195026398,
+      "learning_rate": 0.00018080491666101184,
+      "loss": 0.0403,
+      "mean_token_accuracy": 0.9877779418230057,
+      "num_tokens": 4293130.0,
+      "step": 750
+    },
+    {
+      "epoch": 1.8690404345202172,
+      "grad_norm": 0.10722211748361588,
+      "learning_rate": 0.00017360970426842824,
+      "loss": 0.0507,
+      "mean_token_accuracy": 0.9855174136161804,
+      "num_tokens": 4435313.0,
+      "step": 775
+    },
+    {
+      "epoch": 1.9293904646952322,
+      "grad_norm": 0.08690394461154938,
+      "learning_rate": 0.00016635831825341846,
+      "loss": 0.0388,
+      "mean_token_accuracy": 0.9888739967346192,
+      "num_tokens": 4579734.0,
+      "step": 800
+    },
+    {
+      "epoch": 1.9897404948702473,
+      "grad_norm": 0.07448932528495789,
+      "learning_rate": 0.00015906801154788881,
+      "loss": 0.0442,
+      "mean_token_accuracy": 0.9876042759418487,
+      "num_tokens": 4714195.0,
+      "step": 825
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.04987528547644615,
+      "eval_mean_token_accuracy": 0.9863727804776784,
+      "eval_num_tokens": 4734566.0,
+      "eval_runtime": 72.6082,
+      "eval_samples_per_second": 5.082,
+      "eval_steps_per_second": 2.548,
+      "step": 830
+    },
+    {
+      "epoch": 2.048280024140012,
+      "grad_norm": 0.06745678931474686,
+      "learning_rate": 0.00015175612968618376,
+      "loss": 0.0459,
+      "mean_token_accuracy": 0.9859098005540592,
+      "num_tokens": 4863820.0,
+      "step": 850
+    },
+    {
+      "epoch": 2.1086300543150274,
+      "grad_norm": 0.0881015732884407,
+      "learning_rate": 0.00014444006953555127,
+      "loss": 0.0295,
+      "mean_token_accuracy": 0.9908721047639847,
+      "num_tokens": 4997194.0,
+      "step": 875
+    },
+    {
+      "epoch": 2.1689800844900424,
+      "grad_norm": 0.06334047764539719,
+      "learning_rate": 0.00013713723790447483,
+      "loss": 0.0496,
+      "mean_token_accuracy": 0.9859565341472626,
+      "num_tokens": 5149497.0,
+      "step": 900
+    },
+    {
+      "epoch": 2.2293301146650575,
+      "grad_norm": 0.0663692057132721,
+      "learning_rate": 0.0001298650101273517,
+      "loss": 0.0272,
+      "mean_token_accuracy": 0.9913459432125091,
+      "num_tokens": 5284646.0,
+      "step": 925
+    },
+    {
+      "epoch": 2.2896801448400725,
+      "grad_norm": 0.0703674927353859,
+      "learning_rate": 0.00012264068872405698,
+      "loss": 0.0408,
+      "mean_token_accuracy": 0.9879277718067169,
+      "num_tokens": 5436909.0,
+      "step": 950
+    },
+    {
+      "epoch": 2.3500301750150876,
+      "grad_norm": 0.05159657076001167,
+      "learning_rate": 0.00011548146223275205,
+      "loss": 0.0293,
+      "mean_token_accuracy": 0.9910051214694977,
+      "num_tokens": 5569302.0,
+      "step": 975
+    },
+    {
+      "epoch": 2.4103802051901027,
+      "grad_norm": 0.0799742266535759,
+      "learning_rate": 0.00010840436431388485,
+      "loss": 0.0403,
+      "mean_token_accuracy": 0.9877477496862411,
+      "num_tokens": 5722331.0,
+      "step": 1000
+    },
+    {
+      "epoch": 2.4707302353651177,
+      "grad_norm": 0.056557413190603256,
+      "learning_rate": 0.00010142623322268498,
+      "loss": 0.0287,
+      "mean_token_accuracy": 0.9909706234931945,
+      "num_tokens": 5856158.0,
+      "step": 1025
+    },
+    {
+      "epoch": 2.5310802655401328,
+      "grad_norm": 0.0646870955824852,
+      "learning_rate": 9.456367174657846e-05,
+      "loss": 0.0423,
+      "mean_token_accuracy": 0.987390770316124,
+      "num_tokens": 6007433.0,
+      "step": 1050
+    },
+    {
+      "epoch": 2.591430295715148,
+      "grad_norm": 0.039614204317331314,
+      "learning_rate": 8.783300770284126e-05,
+      "loss": 0.0271,
+      "mean_token_accuracy": 0.9912664991617203,
+      "num_tokens": 6141256.0,
+      "step": 1075
+    },
+    {
+      "epoch": 2.651780325890163,
+      "grad_norm": 0.0858864039182663,
+      "learning_rate": 8.125025509047871e-05,
+      "loss": 0.0347,
+      "mean_token_accuracy": 0.9889630949497223,
+      "num_tokens": 6293285.0,
+      "step": 1100
+    },
+    {
+      "epoch": 2.712130356065178,
+      "grad_norm": 0.09786231815814972,
+      "learning_rate": 7.483107598875994e-05,
+      "loss": 0.0255,
+      "mean_token_accuracy": 0.991877788901329,
+      "num_tokens": 6427387.0,
+      "step": 1125
+    },
+    {
+      "epoch": 2.772480386240193,
+      "grad_norm": 0.060420017689466476,
+      "learning_rate": 6.859074329306077e-05,
+      "loss": 0.0386,
+      "mean_token_accuracy": 0.9882360059022903,
+      "num_tokens": 6580270.0,
+      "step": 1150
+    },
+    {
+      "epoch": 2.832830416415208,
+      "grad_norm": 0.0470358170568943,
+      "learning_rate": 6.254410437667635e-05,
+      "loss": 0.026,
+      "mean_token_accuracy": 0.9918814355134964,
+      "num_tokens": 6713537.0,
+      "step": 1175
+    },
+    {
+      "epoch": 2.8931804465902236,
+      "grad_norm": 0.06813222169876099,
+      "learning_rate": 5.670554576506152e-05,
+      "loss": 0.0429,
+      "mean_token_accuracy": 0.9869187504053116,
+      "num_tokens": 6867844.0,
+      "step": 1200
+    },
+    {
+      "epoch": 2.9535304767652386,
+      "grad_norm": 0.05573548004031181,
+      "learning_rate": 5.108895890654753e-05,
+      "loss": 0.0279,
+      "mean_token_accuracy": 0.991449517607689,
+      "num_tokens": 7002067.0,
+      "step": 1225
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.043443720787763596,
+      "eval_mean_token_accuracy": 0.9880821160368017,
+      "eval_num_tokens": 7101849.0,
+      "eval_runtime": 72.5056,
+      "eval_samples_per_second": 5.089,
+      "eval_steps_per_second": 2.552,
+      "step": 1245
+    },
+    {
+      "epoch": 3.012070006035003,
+      "grad_norm": 0.05211171880364418,
+      "learning_rate": 4.570770712097641e-05,
+      "loss": 0.0354,
+      "mean_token_accuracy": 0.98871831119675,
+      "num_tokens": 7138755.0,
+      "step": 1250
+    },
+    {
+      "epoch": 3.0724200362100182,
+      "grad_norm": 0.07240189611911774,
+      "learning_rate": 4.057459380488989e-05,
+      "loss": 0.0234,
+      "mean_token_accuracy": 0.9925487804412841,
+      "num_tokens": 7287577.0,
+      "step": 1275
+    },
+    {
+      "epoch": 3.1327700663850333,
+      "grad_norm": 0.10089480876922607,
+      "learning_rate": 3.570183196892178e-05,
+      "loss": 0.0316,
+      "mean_token_accuracy": 0.9904714208841324,
+      "num_tokens": 7423900.0,
+      "step": 1300
+    },
+    {
+      "epoch": 3.1931200965600484,
+      "grad_norm": 0.07049904763698578,
+      "learning_rate": 3.110101517987129e-05,
+      "loss": 0.0245,
+      "mean_token_accuracy": 0.9921106594800949,
+      "num_tokens": 7573131.0,
+      "step": 1325
+    },
+    {
+      "epoch": 3.2534701267350634,
+      "grad_norm": 0.09604395180940628,
+      "learning_rate": 2.6783089976594708e-05,
+      "loss": 0.0288,
+      "mean_token_accuracy": 0.9908786052465439,
+      "num_tokens": 7708680.0,
+      "step": 1350
+    },
+    {
+      "epoch": 3.3138201569100785,
+      "grad_norm": 0.07211136817932129,
+      "learning_rate": 2.2758329825344545e-05,
+      "loss": 0.0234,
+      "mean_token_accuracy": 0.9924190586805344,
+      "num_tokens": 7856618.0,
+      "step": 1375
+    },
+    {
+      "epoch": 3.3741701870850935,
+      "grad_norm": 0.0694340318441391,
+      "learning_rate": 1.9036310676523382e-05,
+      "loss": 0.0258,
+      "mean_token_accuracy": 0.9920255327224732,
+      "num_tokens": 7992886.0,
+      "step": 1400
+    },
+    {
+      "epoch": 3.4345202172601086,
+      "grad_norm": 0.05492745339870453,
+      "learning_rate": 1.5625888181008965e-05,
+      "loss": 0.0233,
+      "mean_token_accuracy": 0.9923601657152176,
+      "num_tokens": 8142240.0,
+      "step": 1425
+    },
+    {
+      "epoch": 3.4948702474351236,
+      "grad_norm": 0.10188309103250504,
+      "learning_rate": 1.2535176620259418e-05,
+      "loss": 0.0274,
+      "mean_token_accuracy": 0.9914963799715042,
+      "num_tokens": 8280355.0,
+      "step": 1450
+    },
+    {
+      "epoch": 3.5552202776101387,
+      "grad_norm": 0.06785538047552109,
+      "learning_rate": 9.771529600328754e-06,
+      "loss": 0.0251,
+      "mean_token_accuracy": 0.991920046210289,
+      "num_tokens": 8429816.0,
+      "step": 1475
+    },
+    {
+      "epoch": 3.6155703077851538,
+      "grad_norm": 0.08000839501619339,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.0258,
+      "mean_token_accuracy": 0.9917354655265808,
+      "num_tokens": 8567287.0,
+      "step": 1500
+    },
+    {
+      "epoch": 3.675920337960169,
+      "grad_norm": 0.06494925171136856,
+      "learning_rate": 5.250937104752384e-06,
+      "loss": 0.0227,
+      "mean_token_accuracy": 0.9926939576864242,
+      "num_tokens": 8717200.0,
+      "step": 1525
+    },
+    {
+      "epoch": 3.736270368135184,
+      "grad_norm": 0.06809823215007782,
+      "learning_rate": 3.5047472935191723e-06,
+      "loss": 0.0267,
+      "mean_token_accuracy": 0.9914516353607178,
+      "num_tokens": 8854512.0,
+      "step": 1550
+    },
+    {
+      "epoch": 3.796620398310199,
+      "grad_norm": 0.06002349779009819,
+      "learning_rate": 2.1071077614088605e-06,
+      "loss": 0.0248,
+      "mean_token_accuracy": 0.9922404575347901,
+      "num_tokens": 9002707.0,
+      "step": 1575
+    },
+    {
+      "epoch": 3.856970428485214,
+      "grad_norm": 0.07697267085313797,
+      "learning_rate": 1.0613438561036302e-06,
+      "loss": 0.0259,
+      "mean_token_accuracy": 0.9919641929864883,
+      "num_tokens": 9138803.0,
+      "step": 1600
+    },
+    {
+      "epoch": 3.9173204586602295,
+      "grad_norm": 0.056576263159513474,
+      "learning_rate": 3.6994372171977317e-07,
+      "loss": 0.0229,
+      "mean_token_accuracy": 0.992724329829216,
+      "num_tokens": 9288237.0,
+      "step": 1625
+    },
+    {
+      "epoch": 3.9776704888352445,
+      "grad_norm": 0.07688049972057343,
+      "learning_rate": 3.455237886632045e-08,
+      "loss": 0.0234,
+      "mean_token_accuracy": 0.992882153391838,
+      "num_tokens": 9421425.0,
+      "step": 1650
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.04342251271009445,
+      "eval_mean_token_accuracy": 0.988437723791277,
+      "eval_num_tokens": 9469132.0,
+      "eval_runtime": 72.5212,
+      "eval_samples_per_second": 5.088,
+      "eval_steps_per_second": 2.551,
+      "step": 1660
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 1660,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.354643154101023e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1660/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa67ef66fca6209c5a81244c077133db48814d04ffb68d47dcf6047e1890fc8f
+size 6033

checkpoint-1660/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-415/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen3-8B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

checkpoint-415/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-8B",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-415/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44999bc24686256a3ea4c97089d58ddd2327c87b1ea499cf33cb68c00a8f5a95
+size 349243752

checkpoint-415/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-415/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

checkpoint-415/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-415/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a138f7115f2f059fcb953fd24c1f5dd3a8879bcd1888768b28d78a442df8aa2
+size 698662547

checkpoint-415/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89a0d2294425ffc300740dd935430fffd4cd109cb2876b1b773f8cb61d0a6153
+size 14645

checkpoint-415/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b01c172f8ab5b63a14eea628ee0fae82d2bb80e4fd533a8eab533314c8ec1612
+size 1465

checkpoint-415/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|im_end|>"
+}

checkpoint-415/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69fae70b4e2890472c74ae51adff7a0f50c32b6bfbea38cd97da67fea79a12bb
+size 11422819

checkpoint-415/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|im_end|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

checkpoint-415/trainer_state.json ADDED Viewed

	@@ -0,0 +1,188 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 415,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.060350030175015085,
+      "grad_norm": 0.3188655376434326,
+      "learning_rate": 0.00014399999999999998,
+      "loss": 1.86,
+      "mean_token_accuracy": 0.6245196205377579,
+      "num_tokens": 157541.0,
+      "step": 25
+    },
+    {
+      "epoch": 0.12070006035003017,
+      "grad_norm": 0.2658841907978058,
+      "learning_rate": 0.000294,
+      "loss": 0.8983,
+      "mean_token_accuracy": 0.7745399290323257,
+      "num_tokens": 284307.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.18105009052504525,
+      "grad_norm": 0.24192510545253754,
+      "learning_rate": 0.00029983554299928354,
+      "loss": 0.6083,
+      "mean_token_accuracy": 0.8323455977439881,
+      "num_tokens": 440617.0,
+      "step": 75
+    },
+    {
+      "epoch": 0.24140012070006034,
+      "grad_norm": 0.3552621006965637,
+      "learning_rate": 0.00029931487386844626,
+      "loss": 0.5333,
+      "mean_token_accuracy": 0.8501472049951553,
+      "num_tokens": 567051.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.30175015087507545,
+      "grad_norm": 0.29719677567481995,
+      "learning_rate": 0.000298438945846945,
+      "loss": 0.4156,
+      "mean_token_accuracy": 0.8798932474851608,
+      "num_tokens": 724100.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.3621001810500905,
+      "grad_norm": 0.30843958258628845,
+      "learning_rate": 0.0002972098429951895,
+      "loss": 0.3274,
+      "mean_token_accuracy": 0.9020548111200333,
+      "num_tokens": 852073.0,
+      "step": 150
+    },
+    {
+      "epoch": 0.4224502112251056,
+      "grad_norm": 0.3006002604961395,
+      "learning_rate": 0.0002956304896682979,
+      "loss": 0.2719,
+      "mean_token_accuracy": 0.9195013505220413,
+      "num_tokens": 1012131.0,
+      "step": 175
+    },
+    {
+      "epoch": 0.4828002414001207,
+      "grad_norm": 0.39749929308891296,
+      "learning_rate": 0.00029370464355829616,
+      "loss": 0.2304,
+      "mean_token_accuracy": 0.9330711585283279,
+      "num_tokens": 1138475.0,
+      "step": 200
+    },
+    {
+      "epoch": 0.5431502715751357,
+      "grad_norm": 0.21084098517894745,
+      "learning_rate": 0.00029143688675359184,
+      "loss": 0.1776,
+      "mean_token_accuracy": 0.9493161207437515,
+      "num_tokens": 1298548.0,
+      "step": 225
+    },
+    {
+      "epoch": 0.6035003017501509,
+      "grad_norm": 0.2666475772857666,
+      "learning_rate": 0.000288832614836995,
+      "loss": 0.1483,
+      "mean_token_accuracy": 0.9569978493452073,
+      "num_tokens": 1428047.0,
+      "step": 250
+    },
+    {
+      "epoch": 0.663850331925166,
+      "grad_norm": 0.291533887386322,
+      "learning_rate": 0.00028589802404822455,
+      "loss": 0.1563,
+      "mean_token_accuracy": 0.9560109853744507,
+      "num_tokens": 1586758.0,
+      "step": 275
+    },
+    {
+      "epoch": 0.724200362100181,
+      "grad_norm": 0.3723059892654419,
+      "learning_rate": 0.0002826400965414433,
+      "loss": 0.1303,
+      "mean_token_accuracy": 0.9621474850177765,
+      "num_tokens": 1714018.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.7845503922751962,
+      "grad_norm": 0.39150121808052063,
+      "learning_rate": 0.00027906658377289907,
+      "loss": 0.115,
+      "mean_token_accuracy": 0.9675602000951767,
+      "num_tokens": 1872256.0,
+      "step": 325
+    },
+    {
+      "epoch": 0.8449004224502112,
+      "grad_norm": 0.3891217112541199,
+      "learning_rate": 0.0002751859880581954,
+      "loss": 0.1052,
+      "mean_token_accuracy": 0.9699741625785827,
+      "num_tokens": 1999407.0,
+      "step": 350
+    },
+    {
+      "epoch": 0.9052504526252263,
+      "grad_norm": 0.15646834671497345,
+      "learning_rate": 0.00027100754234307293,
+      "loss": 0.095,
+      "mean_token_accuracy": 0.9727736663818359,
+      "num_tokens": 2159407.0,
+      "step": 375
+    },
+    {
+      "epoch": 0.9656004828002414,
+      "grad_norm": 0.3074830174446106,
+      "learning_rate": 0.00026654118823583243,
+      "loss": 0.1027,
+      "mean_token_accuracy": 0.9708205509185791,
+      "num_tokens": 2287233.0,
+      "step": 400
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.08644451200962067,
+      "eval_mean_token_accuracy": 0.9759664358319463,
+      "eval_num_tokens": 2367283.0,
+      "eval_runtime": 72.5321,
+      "eval_samples_per_second": 5.087,
+      "eval_steps_per_second": 2.551,
+      "step": 415
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 1660,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0886023372599091e+17,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-415/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa67ef66fca6209c5a81244c077133db48814d04ffb68d47dcf6047e1890fc8f
+size 6033

checkpoint-415/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-830/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Qwen/Qwen3-8B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

checkpoint-830/adapter_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen3-8B",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "gate_proj",
+    "o_proj",
+    "down_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-830/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a71f6d23018367f729e2f625ac3a9122f7cb0a0bbcb57b6c4b5e7cdc3e0eefb
+size 349243752

checkpoint-830/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

checkpoint-830/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

checkpoint-830/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-830/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce3d77ae797509eac1c38d715e000a0c9e1b991f6d8b6fa26277f31346fde9aa
+size 698662547

checkpoint-830/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef51f7f681b288c283f6c6ee22f1c70756b935d9e14a1eb052ba8b2bddfcfaa8
+size 14645

checkpoint-830/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1820d810469d58a18885a80d0098317f425057406231d2e24bae3e0f07f43dd7
+size 1465

checkpoint-830/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|im_end|>"
+}