| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.2466655118655812, |
| "eval_steps": 500, |
| "global_step": 900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.013857613026156245, |
| "grad_norm": 0.1854863315820694, |
| "learning_rate": 2.0930232558139536e-05, |
| "loss": 0.8494, |
| "memory/device_reserved (GiB)": 89.89, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 10, |
| "tokens_per_second_per_gpu": 1111.65 |
| }, |
| { |
| "epoch": 0.02771522605231249, |
| "grad_norm": 0.09567277133464813, |
| "learning_rate": 4.418604651162791e-05, |
| "loss": 0.7842, |
| "memory/device_reserved (GiB)": 90.37, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 20, |
| "tokens_per_second_per_gpu": 1041.18 |
| }, |
| { |
| "epoch": 0.04157283907846873, |
| "grad_norm": 0.1211227998137474, |
| "learning_rate": 6.744186046511628e-05, |
| "loss": 0.7353, |
| "memory/device_reserved (GiB)": 90.37, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 30, |
| "tokens_per_second_per_gpu": 993.82 |
| }, |
| { |
| "epoch": 0.05543045210462498, |
| "grad_norm": 0.09696491807699203, |
| "learning_rate": 9.069767441860465e-05, |
| "loss": 0.6943, |
| "memory/device_reserved (GiB)": 90.37, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 40, |
| "tokens_per_second_per_gpu": 849.91 |
| }, |
| { |
| "epoch": 0.06928806513078123, |
| "grad_norm": 0.12007619440555573, |
| "learning_rate": 9.999547457436221e-05, |
| "loss": 0.6814, |
| "memory/device_reserved (GiB)": 90.38, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 50, |
| "tokens_per_second_per_gpu": 971.92 |
| }, |
| { |
| "epoch": 0.08314567815693746, |
| "grad_norm": 0.12358752638101578, |
| "learning_rate": 9.996782216198338e-05, |
| "loss": 0.69, |
| "memory/device_reserved (GiB)": 90.38, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 60, |
| "tokens_per_second_per_gpu": 862.06 |
| }, |
| { |
| "epoch": 0.09700329118309371, |
| "grad_norm": 0.11916535347700119, |
| "learning_rate": 9.991504534967746e-05, |
| "loss": 0.7048, |
| "memory/device_reserved (GiB)": 90.38, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 70, |
| "tokens_per_second_per_gpu": 1024.18 |
| }, |
| { |
| "epoch": 0.11086090420924996, |
| "grad_norm": 0.12464027106761932, |
| "learning_rate": 9.983717067423721e-05, |
| "loss": 0.6705, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 80, |
| "tokens_per_second_per_gpu": 1009.08 |
| }, |
| { |
| "epoch": 0.1247185172354062, |
| "grad_norm": 0.1264505237340927, |
| "learning_rate": 9.973423729195168e-05, |
| "loss": 0.6387, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 90, |
| "tokens_per_second_per_gpu": 1027.91 |
| }, |
| { |
| "epoch": 0.13857613026156246, |
| "grad_norm": 0.1262999027967453, |
| "learning_rate": 9.960629695891814e-05, |
| "loss": 0.6447, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 100, |
| "tokens_per_second_per_gpu": 935.25 |
| }, |
| { |
| "epoch": 0.1524337432877187, |
| "grad_norm": 0.12777547538280487, |
| "learning_rate": 9.945341400501838e-05, |
| "loss": 0.6846, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 110, |
| "tokens_per_second_per_gpu": 997.71 |
| }, |
| { |
| "epoch": 0.16629135631387493, |
| "grad_norm": 0.12563012540340424, |
| "learning_rate": 9.927566530157298e-05, |
| "loss": 0.6765, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 120, |
| "tokens_per_second_per_gpu": 1120.19 |
| }, |
| { |
| "epoch": 0.1801489693400312, |
| "grad_norm": 0.13785897195339203, |
| "learning_rate": 9.907314022268946e-05, |
| "loss": 0.6315, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 130, |
| "tokens_per_second_per_gpu": 801.56 |
| }, |
| { |
| "epoch": 0.19400658236618742, |
| "grad_norm": 0.14731284976005554, |
| "learning_rate": 9.884594060032406e-05, |
| "loss": 0.6642, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 140, |
| "tokens_per_second_per_gpu": 944.29 |
| }, |
| { |
| "epoch": 0.20786419539234366, |
| "grad_norm": 0.1298578828573227, |
| "learning_rate": 9.859418067307928e-05, |
| "loss": 0.6696, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 150, |
| "tokens_per_second_per_gpu": 913.99 |
| }, |
| { |
| "epoch": 0.22172180841849992, |
| "grad_norm": 0.13122966885566711, |
| "learning_rate": 9.831798702876352e-05, |
| "loss": 0.6768, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 160, |
| "tokens_per_second_per_gpu": 983.71 |
| }, |
| { |
| "epoch": 0.23557942144465616, |
| "grad_norm": 0.12356515228748322, |
| "learning_rate": 9.801749854074122e-05, |
| "loss": 0.6526, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 170, |
| "tokens_per_second_per_gpu": 903.88 |
| }, |
| { |
| "epoch": 0.2494370344708124, |
| "grad_norm": 0.11831440776586533, |
| "learning_rate": 9.769286629810572e-05, |
| "loss": 0.6415, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 180, |
| "tokens_per_second_per_gpu": 819.58 |
| }, |
| { |
| "epoch": 0.2632946474969686, |
| "grad_norm": 0.12409751862287521, |
| "learning_rate": 9.73442535297099e-05, |
| "loss": 0.6685, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 190, |
| "tokens_per_second_per_gpu": 973.62 |
| }, |
| { |
| "epoch": 0.2771522605231249, |
| "grad_norm": 0.1352369785308838, |
| "learning_rate": 9.697183552209288e-05, |
| "loss": 0.6329, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 200, |
| "tokens_per_second_per_gpu": 903.58 |
| }, |
| { |
| "epoch": 0.29100987354928115, |
| "grad_norm": 0.12526443600654602, |
| "learning_rate": 9.657579953134383e-05, |
| "loss": 0.6452, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 210, |
| "tokens_per_second_per_gpu": 859.06 |
| }, |
| { |
| "epoch": 0.3048674865754374, |
| "grad_norm": 0.11614521592855453, |
| "learning_rate": 9.615634468894752e-05, |
| "loss": 0.6407, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 220, |
| "tokens_per_second_per_gpu": 850.93 |
| }, |
| { |
| "epoch": 0.3187250996015936, |
| "grad_norm": 0.1328686773777008, |
| "learning_rate": 9.571368190165863e-05, |
| "loss": 0.6741, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 230, |
| "tokens_per_second_per_gpu": 1038.17 |
| }, |
| { |
| "epoch": 0.33258271262774985, |
| "grad_norm": 0.13082517683506012, |
| "learning_rate": 9.524803374545548e-05, |
| "loss": 0.6906, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 240, |
| "tokens_per_second_per_gpu": 1066.43 |
| }, |
| { |
| "epoch": 0.3464403256539061, |
| "grad_norm": 0.1282692551612854, |
| "learning_rate": 9.475963435362614e-05, |
| "loss": 0.6609, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 250, |
| "tokens_per_second_per_gpu": 986.84 |
| }, |
| { |
| "epoch": 0.3602979386800624, |
| "grad_norm": 0.13708311319351196, |
| "learning_rate": 9.424872929904358e-05, |
| "loss": 0.6169, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 260, |
| "tokens_per_second_per_gpu": 1126.25 |
| }, |
| { |
| "epoch": 0.3741555517062186, |
| "grad_norm": 0.1323172152042389, |
| "learning_rate": 9.371557547068878e-05, |
| "loss": 0.6574, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 270, |
| "tokens_per_second_per_gpu": 1062.03 |
| }, |
| { |
| "epoch": 0.38801316473237485, |
| "grad_norm": 0.12729060649871826, |
| "learning_rate": 9.316044094448392e-05, |
| "loss": 0.6583, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 280, |
| "tokens_per_second_per_gpu": 941.33 |
| }, |
| { |
| "epoch": 0.4018707777585311, |
| "grad_norm": 0.14719286561012268, |
| "learning_rate": 9.25836048485008e-05, |
| "loss": 0.6392, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 290, |
| "tokens_per_second_per_gpu": 883.56 |
| }, |
| { |
| "epoch": 0.4157283907846873, |
| "grad_norm": 0.12530402839183807, |
| "learning_rate": 9.198535722261181e-05, |
| "loss": 0.6623, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 300, |
| "tokens_per_second_per_gpu": 902.6 |
| }, |
| { |
| "epoch": 0.4295860038108436, |
| "grad_norm": 0.1330760419368744, |
| "learning_rate": 9.136599887265483e-05, |
| "loss": 0.645, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 310, |
| "tokens_per_second_per_gpu": 997.96 |
| }, |
| { |
| "epoch": 0.44344361683699984, |
| "grad_norm": 0.13317464292049408, |
| "learning_rate": 9.072584121918425e-05, |
| "loss": 0.6139, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 320, |
| "tokens_per_second_per_gpu": 980.08 |
| }, |
| { |
| "epoch": 0.4573012298631561, |
| "grad_norm": 0.12773385643959045, |
| "learning_rate": 9.006520614088535e-05, |
| "loss": 0.6658, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 330, |
| "tokens_per_second_per_gpu": 937.57 |
| }, |
| { |
| "epoch": 0.4711588428893123, |
| "grad_norm": 0.13415341079235077, |
| "learning_rate": 8.938442581272983e-05, |
| "loss": 0.6737, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 340, |
| "tokens_per_second_per_gpu": 1046.92 |
| }, |
| { |
| "epoch": 0.48501645591546855, |
| "grad_norm": 0.13382680714130402, |
| "learning_rate": 8.868384253895445e-05, |
| "loss": 0.6575, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 350, |
| "tokens_per_second_per_gpu": 1049.72 |
| }, |
| { |
| "epoch": 0.4988740689416248, |
| "grad_norm": 0.12621234357357025, |
| "learning_rate": 8.796380858094643e-05, |
| "loss": 0.6423, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 360, |
| "tokens_per_second_per_gpu": 926.35 |
| }, |
| { |
| "epoch": 0.5127316819677811, |
| "grad_norm": 0.14663882553577423, |
| "learning_rate": 8.722468598012245e-05, |
| "loss": 0.6524, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 370, |
| "tokens_per_second_per_gpu": 993.98 |
| }, |
| { |
| "epoch": 0.5265892949939373, |
| "grad_norm": 0.12107036262750626, |
| "learning_rate": 8.646684637588991e-05, |
| "loss": 0.6158, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 380, |
| "tokens_per_second_per_gpu": 882.74 |
| }, |
| { |
| "epoch": 0.5404469080200935, |
| "grad_norm": 0.12905746698379517, |
| "learning_rate": 8.56906708187824e-05, |
| "loss": 0.6359, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 390, |
| "tokens_per_second_per_gpu": 992.61 |
| }, |
| { |
| "epoch": 0.5543045210462498, |
| "grad_norm": 0.14433123171329498, |
| "learning_rate": 8.489654957886306e-05, |
| "loss": 0.6124, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 400, |
| "tokens_per_second_per_gpu": 871.5 |
| }, |
| { |
| "epoch": 0.568162134072406, |
| "grad_norm": 0.13294072449207306, |
| "learning_rate": 8.40848819494923e-05, |
| "loss": 0.6803, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 410, |
| "tokens_per_second_per_gpu": 950.11 |
| }, |
| { |
| "epoch": 0.5820197470985623, |
| "grad_norm": 0.1526036411523819, |
| "learning_rate": 8.325607604655839e-05, |
| "loss": 0.6088, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 420, |
| "tokens_per_second_per_gpu": 1025.1 |
| }, |
| { |
| "epoch": 0.5958773601247185, |
| "grad_norm": 0.1453717052936554, |
| "learning_rate": 8.241054860327216e-05, |
| "loss": 0.6669, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 430, |
| "tokens_per_second_per_gpu": 1074.36 |
| }, |
| { |
| "epoch": 0.6097349731508748, |
| "grad_norm": 0.1466919481754303, |
| "learning_rate": 8.154872476062868e-05, |
| "loss": 0.6147, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 440, |
| "tokens_per_second_per_gpu": 1010.83 |
| }, |
| { |
| "epoch": 0.6235925861770311, |
| "grad_norm": 0.12707076966762543, |
| "learning_rate": 8.067103785364139e-05, |
| "loss": 0.6096, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 450, |
| "tokens_per_second_per_gpu": 919.83 |
| }, |
| { |
| "epoch": 0.6374501992031872, |
| "grad_norm": 0.13485883176326752, |
| "learning_rate": 7.977792919345633e-05, |
| "loss": 0.6342, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 460, |
| "tokens_per_second_per_gpu": 1033.87 |
| }, |
| { |
| "epoch": 0.6513078122293435, |
| "grad_norm": 0.12489234656095505, |
| "learning_rate": 7.886984784545566e-05, |
| "loss": 0.6256, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 470, |
| "tokens_per_second_per_gpu": 1018.58 |
| }, |
| { |
| "epoch": 0.6651654252554997, |
| "grad_norm": 0.16094225645065308, |
| "learning_rate": 7.794725040346251e-05, |
| "loss": 0.6455, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 480, |
| "tokens_per_second_per_gpu": 933.16 |
| }, |
| { |
| "epoch": 0.679023038281656, |
| "grad_norm": 0.1351306140422821, |
| "learning_rate": 7.701060076016024e-05, |
| "loss": 0.6613, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 490, |
| "tokens_per_second_per_gpu": 970.62 |
| }, |
| { |
| "epoch": 0.6928806513078122, |
| "grad_norm": 0.1223958283662796, |
| "learning_rate": 7.606036987384184e-05, |
| "loss": 0.6186, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 500, |
| "tokens_per_second_per_gpu": 925.45 |
| }, |
| { |
| "epoch": 0.7067382643339685, |
| "grad_norm": 0.11873335391283035, |
| "learning_rate": 7.509703553160666e-05, |
| "loss": 0.646, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 510, |
| "tokens_per_second_per_gpu": 990.93 |
| }, |
| { |
| "epoch": 0.7205958773601248, |
| "grad_norm": 0.12990343570709229, |
| "learning_rate": 7.412108210912345e-05, |
| "loss": 0.6155, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 520, |
| "tokens_per_second_per_gpu": 955.97 |
| }, |
| { |
| "epoch": 0.7344534903862809, |
| "grad_norm": 0.1376057118177414, |
| "learning_rate": 7.31330003270808e-05, |
| "loss": 0.6443, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 530, |
| "tokens_per_second_per_gpu": 895.07 |
| }, |
| { |
| "epoch": 0.7483111034124372, |
| "grad_norm": 0.13277359306812286, |
| "learning_rate": 7.213328700444696e-05, |
| "loss": 0.6188, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 540, |
| "tokens_per_second_per_gpu": 907.27 |
| }, |
| { |
| "epoch": 0.7621687164385934, |
| "grad_norm": 0.13623632490634918, |
| "learning_rate": 7.112244480866356e-05, |
| "loss": 0.6471, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 550, |
| "tokens_per_second_per_gpu": 1002.06 |
| }, |
| { |
| "epoch": 0.7760263294647497, |
| "grad_norm": 0.13037075102329254, |
| "learning_rate": 7.010098200289859e-05, |
| "loss": 0.647, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 560, |
| "tokens_per_second_per_gpu": 1047.58 |
| }, |
| { |
| "epoch": 0.789883942490906, |
| "grad_norm": 0.12997141480445862, |
| "learning_rate": 6.906941219048584e-05, |
| "loss": 0.6071, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 570, |
| "tokens_per_second_per_gpu": 1073.59 |
| }, |
| { |
| "epoch": 0.8037415555170622, |
| "grad_norm": 0.14118416607379913, |
| "learning_rate": 6.802825405667905e-05, |
| "loss": 0.6101, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 580, |
| "tokens_per_second_per_gpu": 1097.31 |
| }, |
| { |
| "epoch": 0.8175991685432185, |
| "grad_norm": 0.126139834523201, |
| "learning_rate": 6.697803110785115e-05, |
| "loss": 0.6084, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 590, |
| "tokens_per_second_per_gpu": 933.94 |
| }, |
| { |
| "epoch": 0.8314567815693746, |
| "grad_norm": 0.1207822933793068, |
| "learning_rate": 6.591927140826902e-05, |
| "loss": 0.6416, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 600, |
| "tokens_per_second_per_gpu": 1007.52 |
| }, |
| { |
| "epoch": 0.8453143945955309, |
| "grad_norm": 0.1316983848810196, |
| "learning_rate": 6.485250731457678e-05, |
| "loss": 0.6102, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 610, |
| "tokens_per_second_per_gpu": 866.4 |
| }, |
| { |
| "epoch": 0.8591720076216872, |
| "grad_norm": 0.13538379967212677, |
| "learning_rate": 6.377827520812061e-05, |
| "loss": 0.6426, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 620, |
| "tokens_per_second_per_gpu": 1044.49 |
| }, |
| { |
| "epoch": 0.8730296206478434, |
| "grad_norm": 0.1406071037054062, |
| "learning_rate": 6.269711522525006e-05, |
| "loss": 0.6029, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 630, |
| "tokens_per_second_per_gpu": 966.75 |
| }, |
| { |
| "epoch": 0.8868872336739997, |
| "grad_norm": 0.1416017860174179, |
| "learning_rate": 6.160957098573119e-05, |
| "loss": 0.6103, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 640, |
| "tokens_per_second_per_gpu": 996.12 |
| }, |
| { |
| "epoch": 0.9007448467001559, |
| "grad_norm": 0.14168281853199005, |
| "learning_rate": 6.05161893194083e-05, |
| "loss": 0.6015, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 650, |
| "tokens_per_second_per_gpu": 1057.0 |
| }, |
| { |
| "epoch": 0.9146024597263122, |
| "grad_norm": 0.14854960143566132, |
| "learning_rate": 5.941751999125149e-05, |
| "loss": 0.5851, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 660, |
| "tokens_per_second_per_gpu": 992.44 |
| }, |
| { |
| "epoch": 0.9284600727524683, |
| "grad_norm": 0.13109387457370758, |
| "learning_rate": 5.831411542492854e-05, |
| "loss": 0.6221, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 670, |
| "tokens_per_second_per_gpu": 1056.52 |
| }, |
| { |
| "epoch": 0.9423176857786246, |
| "grad_norm": 0.13833071291446686, |
| "learning_rate": 5.720653042503978e-05, |
| "loss": 0.5828, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 680, |
| "tokens_per_second_per_gpu": 1061.91 |
| }, |
| { |
| "epoch": 0.9561752988047809, |
| "grad_norm": 0.1346118003129959, |
| "learning_rate": 5.6095321898156016e-05, |
| "loss": 0.5827, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 690, |
| "tokens_per_second_per_gpu": 1017.14 |
| }, |
| { |
| "epoch": 0.9700329118309371, |
| "grad_norm": 0.12667891383171082, |
| "learning_rate": 5.498104857279941e-05, |
| "loss": 0.6744, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 700, |
| "tokens_per_second_per_gpu": 956.99 |
| }, |
| { |
| "epoch": 0.9838905248570934, |
| "grad_norm": 0.13424526154994965, |
| "learning_rate": 5.3864270718508305e-05, |
| "loss": 0.6298, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 710, |
| "tokens_per_second_per_gpu": 953.92 |
| }, |
| { |
| "epoch": 0.9977481378832496, |
| "grad_norm": 0.12869331240653992, |
| "learning_rate": 5.274554986412716e-05, |
| "loss": 0.6199, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 720, |
| "tokens_per_second_per_gpu": 1040.0 |
| }, |
| { |
| "epoch": 1.011086090420925, |
| "grad_norm": 0.14841921627521515, |
| "learning_rate": 5.162544851546349e-05, |
| "loss": 0.6032, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 730, |
| "tokens_per_second_per_gpu": 885.8 |
| }, |
| { |
| "epoch": 1.0249437034470812, |
| "grad_norm": 0.15776373445987701, |
| "learning_rate": 5.0504529872453256e-05, |
| "loss": 0.5982, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 740, |
| "tokens_per_second_per_gpu": 1010.64 |
| }, |
| { |
| "epoch": 1.0388013164732375, |
| "grad_norm": 0.17491325736045837, |
| "learning_rate": 4.9383357545977497e-05, |
| "loss": 0.5993, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 750, |
| "tokens_per_second_per_gpu": 1085.43 |
| }, |
| { |
| "epoch": 1.0526589294993938, |
| "grad_norm": 0.15103822946548462, |
| "learning_rate": 4.8262495274472225e-05, |
| "loss": 0.5512, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 760, |
| "tokens_per_second_per_gpu": 1024.19 |
| }, |
| { |
| "epoch": 1.06651654252555, |
| "grad_norm": 0.15340133011341095, |
| "learning_rate": 4.7142506640474274e-05, |
| "loss": 0.5822, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 770, |
| "tokens_per_second_per_gpu": 1189.62 |
| }, |
| { |
| "epoch": 1.0803741555517061, |
| "grad_norm": 0.17283514142036438, |
| "learning_rate": 4.602395478724539e-05, |
| "loss": 0.5395, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 780, |
| "tokens_per_second_per_gpu": 1009.01 |
| }, |
| { |
| "epoch": 1.0942317685778624, |
| "grad_norm": 0.15003962814807892, |
| "learning_rate": 4.490740213561727e-05, |
| "loss": 0.5358, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 790, |
| "tokens_per_second_per_gpu": 1047.2 |
| }, |
| { |
| "epoch": 1.1080893816040187, |
| "grad_norm": 0.1629399210214615, |
| "learning_rate": 4.379341010119992e-05, |
| "loss": 0.601, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 800, |
| "tokens_per_second_per_gpu": 942.61 |
| }, |
| { |
| "epoch": 1.121946994630175, |
| "grad_norm": 0.17462220788002014, |
| "learning_rate": 4.268253881209532e-05, |
| "loss": 0.5845, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 810, |
| "tokens_per_second_per_gpu": 1010.69 |
| }, |
| { |
| "epoch": 1.1358046076563313, |
| "grad_norm": 0.17755432426929474, |
| "learning_rate": 4.157534682725856e-05, |
| "loss": 0.5637, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 820, |
| "tokens_per_second_per_gpu": 911.61 |
| }, |
| { |
| "epoch": 1.1496622206824874, |
| "grad_norm": 0.1729191541671753, |
| "learning_rate": 4.047239085564794e-05, |
| "loss": 0.5921, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 830, |
| "tokens_per_second_per_gpu": 914.41 |
| }, |
| { |
| "epoch": 1.1635198337086436, |
| "grad_norm": 0.15745393931865692, |
| "learning_rate": 3.937422547630519e-05, |
| "loss": 0.6086, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 840, |
| "tokens_per_second_per_gpu": 1000.17 |
| }, |
| { |
| "epoch": 1.1773774467348, |
| "grad_norm": 0.16624517738819122, |
| "learning_rate": 3.828140285950676e-05, |
| "loss": 0.5603, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 850, |
| "tokens_per_second_per_gpu": 991.45 |
| }, |
| { |
| "epoch": 1.1912350597609562, |
| "grad_norm": 0.17486163973808289, |
| "learning_rate": 3.7194472489126176e-05, |
| "loss": 0.5715, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 860, |
| "tokens_per_second_per_gpu": 1005.74 |
| }, |
| { |
| "epoch": 1.2050926727871123, |
| "grad_norm": 0.18270528316497803, |
| "learning_rate": 3.611398088634721e-05, |
| "loss": 0.5577, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 870, |
| "tokens_per_second_per_gpu": 965.77 |
| }, |
| { |
| "epoch": 1.2189502858132686, |
| "grad_norm": 0.1544029712677002, |
| "learning_rate": 3.5040471334866695e-05, |
| "loss": 0.5706, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 880, |
| "tokens_per_second_per_gpu": 885.82 |
| }, |
| { |
| "epoch": 1.2328078988394249, |
| "grad_norm": 0.1610870063304901, |
| "learning_rate": 3.397448360772516e-05, |
| "loss": 0.5791, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 890, |
| "tokens_per_second_per_gpu": 903.16 |
| }, |
| { |
| "epoch": 1.2466655118655812, |
| "grad_norm": 0.15956935286521912, |
| "learning_rate": 3.291655369590269e-05, |
| "loss": 0.5978, |
| "memory/device_reserved (GiB)": 90.59, |
| "memory/max_active (GiB)": 85.72, |
| "memory/max_allocated (GiB)": 85.72, |
| "step": 900, |
| "tokens_per_second_per_gpu": 954.24 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1444, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.990168442752205e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|