codedp-ase26's picture
Initial commit
903307f
{"timestamp": 1773761894.8111923, "event": "train_step", "step": 10, "epoch": 1, "metrics": {"train/step_loss": 1.8352766107110416, "train/step_real_loss": 1.028106451034546, "train/lr": 5.2631578947368424e-05, "train/step_canary_loss": 14.75, "perf/step_duration_sec": 6.234770041890442, "perf/samples_per_sec": 5.453288536956348, "perf/tokens_per_sec": 3980.098677781523, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 24815.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773761950.7987902, "event": "train_step", "step": 20, "epoch": 1, "metrics": {"train/step_loss": 1.0323970019817352, "train/step_real_loss": 1.0323970019817352, "train/lr": 9.999797424944042e-05, "perf/step_duration_sec": 5.150427320972085, "perf/samples_per_sec": 6.213076703305535, "perf/tokens_per_sec": 4927.9406189561805, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25381.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762007.3004794, "event": "train_step", "step": 30, "epoch": 1, "metrics": {"train/step_loss": 0.8551503717899323, "train/step_real_loss": 0.8551503717899323, "train/lr": 9.975508273693644e-05, "perf/step_duration_sec": 5.69609066285193, "perf/samples_per_sec": 5.617888108539722, "perf/tokens_per_sec": 4432.689276641233, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25249.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762065.1568909, "event": "train_step", "step": 40, "epoch": 1, "metrics": {"train/step_loss": 0.8950656801462173, "train/step_real_loss": 0.8950656801462173, "train/lr": 9.910929512300672e-05, "perf/step_duration_sec": 6.2338299779221416, "perf/samples_per_sec": 5.133280842328368, "perf/tokens_per_sec": 4016.4714290693023, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25038.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762121.2732794, "event": "train_step", "step": 50, "epoch": 1, "metrics": {"train/step_loss": 0.8450518101453781, "train/step_real_loss": 0.8450518101453781, "train/lr": 9.806584072891234e-05, "perf/step_duration_sec": 5.423216213937849, "perf/samples_per_sec": 5.900557664980961, "perf/tokens_per_sec": 5482.724425329497, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29734.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762134.073646, "event": "eval_step", "step": 50, "epoch": 1, "metrics": {"eval/loss": 0.8533683589253671, "eval/duration_sec": 12.798461285419762}}
{"timestamp": 1773762190.2760499, "event": "train_step", "step": 60, "epoch": 1, "metrics": {"train/step_loss": 1.114606170943289, "train/step_real_loss": 0.8427969664335251, "train/lr": 9.663316901718597e-05, "train/step_canary_loss": 9.8125, "perf/step_duration_sec": 5.968041606713086, "perf/samples_per_sec": 5.5294520673046765, "perf/tokens_per_sec": 4259.353683360148, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25420.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762249.207255, "event": "train_step", "step": 70, "epoch": 1, "metrics": {"train/step_loss": 1.1624658794114084, "train/step_real_loss": 0.8745741844177246, "train/lr": 9.48228811713756e-05, "train/step_canary_loss": 10.375, "perf/step_duration_sec": 6.334186799824238, "perf/samples_per_sec": 5.209824250986045, "perf/tokens_per_sec": 3829.378697936894, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24256.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 16.205660820007324, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762305.719841, "event": "train_step", "step": 80, "epoch": 1, "metrics": {"train/step_loss": 1.157600255573497, "train/step_real_loss": 0.8803409039974213, "train/lr": 9.26496361544538e-05, "train/step_canary_loss": 5.59375, "perf/step_duration_sec": 5.699764240998775, "perf/samples_per_sec": 5.9651590070052, "perf/tokens_per_sec": 4690.720329743854, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 26736.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 16.205660820007324, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762363.2025864, "event": "train_step", "step": 90, "epoch": 1, "metrics": {"train/step_loss": 0.8746908158063889, "train/step_real_loss": 0.8746908158063889, "train/lr": 9.013103200659241e-05, "perf/step_duration_sec": 5.422750173136592, "perf/samples_per_sec": 5.901064769408466, "perf/tokens_per_sec": 4211.147346069116, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22836.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762419.340239, "event": "train_step", "step": 100, "epoch": 1, "metrics": {"train/step_loss": 1.227062124194521, "train/step_real_loss": 0.9060328304767609, "train/lr": 8.728746334350483e-05, "train/step_canary_loss": 11.5, "perf/step_duration_sec": 5.688522285781801, "perf/samples_per_sec": 5.801155087760134, "perf/tokens_per_sec": 4329.068036096396, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24626.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 16.205660820007324, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762432.1276581, "event": "eval_step", "step": 100, "epoch": 1, "metrics": {"eval/loss": 0.829470864473245, "eval/duration_sec": 12.785310188308358}}
{"timestamp": 1773762489.3303852, "event": "train_step", "step": 110, "epoch": 1, "metrics": {"train/step_loss": 1.011215921604272, "train/step_real_loss": 0.9119570553302765, "train/lr": 8.414195620927492e-05, "train/step_canary_loss": 4.1875, "perf/step_duration_sec": 5.6974792359396815, "perf/samples_per_sec": 5.792035149831895, "perf/tokens_per_sec": 4259.603062159705, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24269.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762544.9202547, "event": "train_step", "step": 120, "epoch": 1, "metrics": {"train/step_loss": 0.7180200964212418, "train/step_real_loss": 0.7180200964212418, "train/lr": 8.071998162096612e-05, "perf/step_duration_sec": 5.694211829919368, "perf/samples_per_sec": 5.619741758088605, "perf/tokens_per_sec": 4598.00245969612, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26182.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.4762544631958}}
{"timestamp": 1773762601.86658, "event": "train_step", "step": 130, "epoch": 1, "metrics": {"train/step_loss": 0.8463245183229446, "train/step_real_loss": 0.8463245183229446, "train/lr": 7.704924931484997e-05, "perf/step_duration_sec": 5.1536158989183605, "perf/samples_per_sec": 6.209232629602092, "perf/tokens_per_sec": 4267.683201733388, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21994.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 101.70386934280396}}
{"timestamp": 1773762659.5638738, "event": "train_step", "step": 140, "epoch": 1, "metrics": {"train/step_loss": 0.9742496162652969, "train/step_real_loss": 0.9742496162652969, "train/lr": 7.315948336441117e-05, "perf/step_duration_sec": 5.969049285165966, "perf/samples_per_sec": 5.360987733762741, "perf/tokens_per_sec": 3927.091045847888, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23441.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 101.70386934280396}}
{"timestamp": 1773762717.6212678, "event": "train_step", "step": 150, "epoch": 1, "metrics": {"train/step_loss": 0.914526179432869, "train/step_real_loss": 0.914526179432869, "train/lr": 6.908218148708247e-05, "perf/step_duration_sec": 5.703813333064318, "perf/samples_per_sec": 5.610281776666824, "perf/tokens_per_sec": 4645.31331108013, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26496.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 101.70386934280396}}
{"timestamp": 1773762730.4167268, "event": "eval_step", "step": 150, "epoch": 1, "metrics": {"eval/loss": 0.8174186625923866, "eval/duration_sec": 12.793565314263105}}
{"timestamp": 1773762785.9700294, "event": "train_step", "step": 160, "epoch": 1, "metrics": {"train/step_loss": 0.9113822728395462, "train/step_real_loss": 0.9113822728395462, "train/lr": 6.485035998874356e-05, "perf/step_duration_sec": 5.425368802621961, "perf/samples_per_sec": 5.898216538668322, "perf/tokens_per_sec": 4679.128907832313, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25386.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 101.70386934280396}}
{"timestamp": 1773762841.0786932, "event": "train_step", "step": 170, "epoch": 1, "metrics": {"train/step_loss": 0.8910564256436897, "train/step_real_loss": 0.8441949039697647, "train/lr": 6.049828641131825e-05, "train/step_canary_loss": 2.390625, "perf/step_duration_sec": 5.690398690290749, "perf/samples_per_sec": 5.799242161415912, "perf/tokens_per_sec": 4355.582332445254, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24785.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 16.205660820007324, "system/cuda_max_memory_allocated_gb": 101.70386934280396}}
{"timestamp": 1773762898.8441253, "event": "train_step", "step": 180, "epoch": 1, "metrics": {"train/step_loss": 1.0836328773787527, "train/step_real_loss": 0.8538245260715485, "train/lr": 5.6061202048379124e-05, "train/step_canary_loss": 8.4375, "perf/step_duration_sec": 5.692985306028277, "perf/samples_per_sec": 5.7966072677293665, "perf/tokens_per_sec": 4476.7373583439585, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25486.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 101.70386934280396}}
{"timestamp": 1773762934.6675656, "event": "train_epoch", "step": 184, "epoch": 1, "metrics": {"train/epoch_loss": 0.9805822407983543, "train/epoch_real_loss": 0.9071368900552877, "train/epoch_canary_loss": 8.42140839386602, "perf/epoch_duration_sec": 1085.6805565529503, "perf/epoch_samples_per_sec": 43.826887856473604, "perf/epoch_tokens_per_sec": 34574.90951038253, "perf/epoch_samples": 47582.0, "perf/epoch_tokens": 37537307.0, "system/cuda_epoch_peak_memory_gb": 101.70386934280396, "eval/loss": 0.8121764394335258, "eval/duration_sec": 12.826699289958924}}
{"timestamp": 1773762949.1512606, "event": "audit_epoch", "step": 184, "epoch": 1, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.907944, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.876048, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 8.130579099990427}}
{"timestamp": 1773762984.332577, "event": "train_step", "step": 190, "epoch": 2, "metrics": {"train/step_loss": 0.8655764758586884, "train/step_real_loss": 0.8655764758586884, "train/lr": 5.157503657571385e-05, "perf/step_duration_sec": 5.690848938189447, "perf/samples_per_sec": 5.623062630472995, "perf/tokens_per_sec": 4602.301042334944, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26191.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 87.30217599868774}}
{"timestamp": 1773763040.7157884, "event": "train_step", "step": 200, "epoch": 2, "metrics": {"train/step_loss": 0.8589679941986547, "train/step_real_loss": 0.8308791071176529, "train/lr": 4.7076117107656534e-05, "train/step_canary_loss": 1.7578125, "perf/step_duration_sec": 5.69332688068971, "perf/samples_per_sec": 5.796259496697344, "perf/tokens_per_sec": 5059.0806752537455, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28803.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 87.30217599868774}}
{"timestamp": 1773763053.5208263, "event": "eval_step", "step": 200, "epoch": 2, "metrics": {"eval/loss": 0.8110936123591204, "eval/duration_sec": 12.803085402119905}}
{"timestamp": 1773763110.1544352, "event": "train_step", "step": 210, "epoch": 2, "metrics": {"train/step_loss": 0.7684839069843292, "train/step_real_loss": 0.7684839069843292, "train/lr": 4.2600874035126046e-05, "perf/step_duration_sec": 5.4159107422456145, "perf/samples_per_sec": 5.908516872405425, "perf/tokens_per_sec": 4630.061534138701, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25076.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 87.30229806900024}}
{"timestamp": 1773763168.1105735, "event": "train_step", "step": 220, "epoch": 2, "metrics": {"train/step_loss": 0.8040641099214554, "train/step_real_loss": 0.8040641099214554, "train/lr": 3.818554602737332e-05, "perf/step_duration_sec": 5.967140641994774, "perf/samples_per_sec": 5.36270249351834, "perf/tokens_per_sec": 4430.932935269528, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26440.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 87.30229806900024}}
{"timestamp": 1773763224.221138, "event": "train_step", "step": 230, "epoch": 2, "metrics": {"train/step_loss": 0.806927278637886, "train/step_real_loss": 0.806927278637886, "train/lr": 3.386588658621128e-05, "perf/step_duration_sec": 5.424597659613937, "perf/samples_per_sec": 5.899055009782496, "perf/tokens_per_sec": 4482.175734620363, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24314.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 87.30229806900024}}
{"timestamp": 1773763282.06351, "event": "train_step", "step": 240, "epoch": 2, "metrics": {"train/step_loss": 0.9124463796615601, "train/step_real_loss": 0.9124463796615601, "train/lr": 2.967687452893051e-05, "perf/step_duration_sec": 5.692487298045307, "perf/samples_per_sec": 5.621444251792743, "perf/tokens_per_sec": 4766.282044988772, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27132.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763337.7170568, "event": "train_step", "step": 250, "epoch": 2, "metrics": {"train/step_loss": 0.7962393760681152, "train/step_real_loss": 0.7962393760681152, "train/lr": 2.5652430744289756e-05, "perf/step_duration_sec": 5.419141778722405, "perf/samples_per_sec": 5.904994057480479, "perf/tokens_per_sec": 4963.885629569528, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26900.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763350.5270941, "event": "eval_step", "step": 250, "epoch": 2, "metrics": {"eval/loss": 0.8086002505360506, "eval/duration_sec": 12.808165564201772}}
{"timestamp": 1773763406.908792, "event": "train_step", "step": 260, "epoch": 2, "metrics": {"train/step_loss": 0.7549401223659515, "train/step_real_loss": 0.7549401223659515, "train/lr": 2.1825143515174878e-05, "perf/step_duration_sec": 5.96535021904856, "perf/samples_per_sec": 5.364312039520762, "perf/tokens_per_sec": 4828.551374573625, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28804.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763462.8593152, "event": "train_step", "step": 270, "epoch": 2, "metrics": {"train/step_loss": 0.8835187554359436, "train/step_real_loss": 0.8835187554359436, "train/lr": 1.822600463214922e-05, "perf/step_duration_sec": 6.238990655634552, "perf/samples_per_sec": 5.129034769606553, "perf/tokens_per_sec": 4065.8820312690445, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25367.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763519.993293, "event": "train_step", "step": 280, "epoch": 2, "metrics": {"train/step_loss": 0.9217604398727417, "train/step_real_loss": 0.9217604398727417, "train/lr": 1.488415843473942e-05, "perf/step_duration_sec": 5.145696292165667, "perf/samples_per_sec": 6.2187890973511335, "perf/tokens_per_sec": 4732.109828765628, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24350.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763577.139687, "event": "train_step", "step": 290, "epoch": 2, "metrics": {"train/step_loss": 0.9289029836654663, "train/step_real_loss": 0.9481655806303024, "train/lr": 1.1826665812616183e-05, "train/step_canary_loss": 0.3125, "perf/step_duration_sec": 5.694677841849625, "perf/samples_per_sec": 5.794884437094274, "perf/tokens_per_sec": 4061.3359776095867, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 23128.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763634.8883243, "event": "train_step", "step": 300, "epoch": 2, "metrics": {"train/step_loss": 0.9466440713766849, "train/step_real_loss": 0.8473204374313354, "train/lr": 9.078285077691178e-06, "train/step_canary_loss": 4.125, "perf/step_duration_sec": 6.2339927861467, "perf/samples_per_sec": 5.293557617412269, "perf/tokens_per_sec": 4104.752905210987, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25589.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763647.6901762, "event": "eval_step", "step": 300, "epoch": 2, "metrics": {"eval/loss": 0.8076710475560945, "eval/duration_sec": 12.799929299857467}}
{"timestamp": 1773763704.81611, "event": "train_step", "step": 310, "epoch": 2, "metrics": {"train/step_loss": 0.9043312668800354, "train/step_real_loss": 0.9043312668800354, "train/lr": 6.661271481537157e-06, "perf/step_duration_sec": 5.423097257036716, "perf/samples_per_sec": 5.900687095087322, "perf/tokens_per_sec": 4037.360748341778, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21895.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763760.6698298, "event": "train_step", "step": 320, "epoch": 2, "metrics": {"train/step_loss": 0.8735850304365158, "train/step_real_loss": 0.8735850304365158, "train/lr": 4.595197001556562e-06, "perf/step_duration_sec": 5.15081740077585, "perf/samples_per_sec": 6.212606176872034, "perf/tokens_per_sec": 4842.532370928723, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24943.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763818.6010072, "event": "train_step", "step": 330, "epoch": 2, "metrics": {"train/step_loss": 0.7620985209941864, "train/step_real_loss": 0.7620985209941864, "train/lr": 2.8967918551955297e-06, "perf/step_duration_sec": 6.058840225916356, "perf/samples_per_sec": 5.281538843543317, "perf/tokens_per_sec": 4943.025213289962, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29949.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763875.2762098, "event": "train_step", "step": 340, "epoch": 2, "metrics": {"train/step_loss": 0.9000806212425232, "train/step_real_loss": 0.9000806212425232, "train/lr": 1.5798090255558617e-06, "perf/step_duration_sec": 5.154371100012213, "perf/samples_per_sec": 6.2083228737496565, "perf/tokens_per_sec": 4376.285595724094, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22557.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763932.3082285, "event": "train_step", "step": 350, "epoch": 2, "metrics": {"train/step_loss": 0.7404757142066956, "train/step_real_loss": 0.7404757142066956, "train/lr": 6.54912895420573e-07, "perf/step_duration_sec": 5.694617530796677, "perf/samples_per_sec": 5.619341391575985, "perf/tokens_per_sec": 4990.501968974935, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28419.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773763945.1132307, "event": "eval_step", "step": 350, "epoch": 2, "metrics": {"eval/loss": 0.807514699605795, "eval/duration_sec": 12.803151289001107}}
{"timestamp": 1773764000.5064592, "event": "train_step", "step": 360, "epoch": 2, "metrics": {"train/step_loss": 0.8157700151205063, "train/step_real_loss": 0.8157700151205063, "train/lr": 1.295928914885336e-07, "perf/step_duration_sec": 5.695594378747046, "perf/samples_per_sec": 5.618377621729371, "perf/tokens_per_sec": 4667.116060650316, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26582.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 15.915565013885498, "system/cuda_max_memory_allocated_gb": 94.47624206542969}}
{"timestamp": 1773764058.7973218, "event": "train_epoch", "step": 368, "epoch": 2, "metrics": {"train/epoch_loss": 0.856036927981092, "train/epoch_real_loss": 0.8280306565727147, "train/epoch_canary_loss": 3.6806401156922846, "perf/epoch_duration_sec": 1096.7485609338619, "perf/epoch_samples_per_sec": 43.38460217306761, "perf/epoch_tokens_per_sec": 34225.847506959835, "perf/epoch_samples": 47582.0, "perf/epoch_tokens": 37537149.0, "system/cuda_epoch_peak_memory_gb": 94.47624206542969, "eval/loss": 0.8075161480750794, "eval/duration_sec": 12.857198356185108}}
{"timestamp": 1773764072.6744637, "event": "audit_epoch", "step": 368, "epoch": 2, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.968584, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.883776, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 7.556974642910063}}
{"timestamp": 1773764086.367738, "event": "audit_final", "step": 368, "epoch": 2, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.968584, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.883776, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0}}
{"timestamp": 1773764086.9161372, "event": "energy_final", "step": 368, "epoch": null, "metrics": {"energy/codecarbon/duration": 2345.9966679112986, "energy/codecarbon/emissions": 0.09022432714096462, "energy/codecarbon/emissions_rate": 3.8458847096868924e-05, "energy/codecarbon/cpu_power": 72.02285277932866, "energy/codecarbon/gpu_power": 3280.290622412428, "energy/codecarbon/ram_power": 54.0, "energy/codecarbon/cpu_energy": 0.045218986505725985, "energy/codecarbon/gpu_energy": 2.137964725370466, "energy/codecarbon/ram_energy": 0.03390259211605879, "energy/codecarbon/energy_consumed": 2.2170863039922497, "energy/codecarbon/water_consumed": 0.0, "energy/codecarbon/cpu_count": 256.0, "energy/codecarbon/gpu_count": 8.0, "energy/codecarbon/longitude": 16.1885, "energy/codecarbon/latitude": 58.594, "energy/codecarbon/ram_total_size": 1511.49019241333, "energy/codecarbon/cpu_utilization_percent": 3.3142796066695253, "energy/codecarbon/gpu_utilization_percent": 88.58721675929884, "energy/codecarbon/ram_utilization_percent": 5.287772552372644, "energy/codecarbon/ram_used_gb": 79.7947571596665, "energy/codecarbon/pue": 1.0, "energy/codecarbon/wue": 0.0}}