melihcatal's picture
Add files using upload-large-folder tool
076fd74 verified
{"timestamp": 1774553376.3189409, "event": "train_step", "step": 5, "epoch": 1, "metrics": {"train/step_loss": 1.29731947183609, "train/step_real_loss": 1.29731947183609, "train/lr": 2.0320937499999996e-05, "perf/step_duration_sec": 4.332902549998835, "perf/samples_per_sec": 7.385349573580556, "perf/tokens_per_sec": 6000.365736360028, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25999.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 69.79438161849976}}
{"timestamp": 1774553396.6833704, "event": "train_step", "step": 10, "epoch": 1, "metrics": {"train/step_loss": 1.7787025746177225, "train/step_real_loss": 1.3000276684761047, "train/lr": 4.063187499999999e-05, "train/step_canary_loss": 9.4375, "perf/step_duration_sec": 4.307096315082163, "perf/samples_per_sec": 7.893949313587944, "perf/tokens_per_sec": 5851.273841207158, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 25202.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553410.9195538, "event": "eval_step", "step": 10, "epoch": 1, "metrics": {"eval/loss": 0.8391811422665011, "eval/duration_sec": 14.226810971973464}}
{"timestamp": 1774553431.4030375, "event": "train_step", "step": 15, "epoch": 1, "metrics": {"train/step_loss": 1.5122452721451267, "train/step_real_loss": 1.2352841794490814, "train/lr": 6.094281249999999e-05, "train/step_canary_loss": 10.375, "perf/step_duration_sec": 4.161926166852936, "perf/samples_per_sec": 7.929021005423826, "perf/tokens_per_sec": 6395.356124283818, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26617.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553451.2936132, "event": "train_step", "step": 20, "epoch": 1, "metrics": {"train/step_loss": 1.230827122926712, "train/step_real_loss": 1.230827122926712, "train/lr": 8.125375000000001e-05, "perf/step_duration_sec": 3.8988507229369134, "perf/samples_per_sec": 8.207546857781502, "perf/tokens_per_sec": 5946.624184302003, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23185.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553469.6659799, "event": "eval_step", "step": 20, "epoch": 1, "metrics": {"eval/loss": 0.8291501370681957, "eval/duration_sec": 18.36034947214648}}
{"timestamp": 1774553490.3247662, "event": "train_step", "step": 25, "epoch": 1, "metrics": {"train/step_loss": 1.3402218276804143, "train/step_real_loss": 1.0989006757736206, "train/lr": 0.00010156468750000002, "train/step_canary_loss": 9.0625, "perf/step_duration_sec": 4.304563360987231, "perf/samples_per_sec": 7.666282787026187, "perf/tokens_per_sec": 6085.170040101937, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26194.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553510.6052778, "event": "train_step", "step": 30, "epoch": 1, "metrics": {"train/step_loss": 0.9824554324150085, "train/step_real_loss": 0.9824554324150085, "train/lr": 0.00012187562500000007, "perf/step_duration_sec": 4.047926557948813, "perf/samples_per_sec": 7.90528176386066, "perf/tokens_per_sec": 6901.310979850356, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27936.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553524.9312143, "event": "eval_step", "step": 30, "epoch": 1, "metrics": {"eval/loss": 0.8240251146595585, "eval/duration_sec": 14.318319211015478}}
{"timestamp": 1774553545.2811625, "event": "train_step", "step": 35, "epoch": 1, "metrics": {"train/step_loss": 0.9739880263805389, "train/step_real_loss": 0.9739880263805389, "train/lr": 0.0001299920948636526, "perf/step_duration_sec": 4.042316239094362, "perf/samples_per_sec": 7.916253481239079, "perf/tokens_per_sec": 5874.354848921974, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23746.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553566.2338233, "event": "train_step", "step": 40, "epoch": 1, "metrics": {"train/step_loss": 1.1072920858860016, "train/step_real_loss": 1.1072920858860016, "train/lr": 0.00012994379302662363, "perf/step_duration_sec": 4.185423576040193, "perf/samples_per_sec": 7.645582201807882, "perf/tokens_per_sec": 5578.407913994076, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23348.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553580.51171, "event": "eval_step", "step": 40, "epoch": 1, "metrics": {"eval/loss": 0.8113125132566148, "eval/duration_sec": 14.270195916993544}}
{"timestamp": 1774553601.2770982, "event": "train_step", "step": 45, "epoch": 1, "metrics": {"train/step_loss": 0.9993373155593872, "train/step_real_loss": 0.9993373155593872, "train/lr": 0.00012985161540494155, "perf/step_duration_sec": 4.19656996591948, "perf/samples_per_sec": 7.6252749888297675, "perf/tokens_per_sec": 6514.367738894631, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27338.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553621.5631545, "event": "train_step", "step": 50, "epoch": 1, "metrics": {"train/step_loss": 0.9020161479711533, "train/step_real_loss": 0.9020161479711533, "train/lr": 0.00012971562755407446, "perf/step_duration_sec": 4.047171479091048, "perf/samples_per_sec": 7.906756648519094, "perf/tokens_per_sec": 6821.060126219316, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27606.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98097944259644}}
{"timestamp": 1774553635.9007976, "event": "eval_step", "step": 50, "epoch": 1, "metrics": {"eval/loss": 0.8032787023958834, "eval/duration_sec": 14.326730190776289}}
{"timestamp": 1774553656.3728466, "event": "train_step", "step": 55, "epoch": 1, "metrics": {"train/step_loss": 1.4053459588219137, "train/step_real_loss": 0.9345862567424774, "train/lr": 0.00012953592618672818, "train/step_canary_loss": 8.9375, "perf/step_duration_sec": 4.13513494306244, "perf/samples_per_sec": 8.22222260413585, "perf/tokens_per_sec": 6847.9022788622015, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 28317.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 18.006030559539795, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553676.7669718, "event": "train_step", "step": 60, "epoch": 1, "metrics": {"train/step_loss": 1.0051756799221039, "train/step_real_loss": 1.0051756799221039, "train/lr": 0.0001293126391040657, "perf/step_duration_sec": 4.188762044068426, "perf/samples_per_sec": 7.639488627747235, "perf/tokens_per_sec": 6104.906349648509, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25572.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553691.089436, "event": "eval_step", "step": 60, "epoch": 1, "metrics": {"eval/loss": 0.7923064954917539, "eval/duration_sec": 14.3114415879827}}
{"timestamp": 1774553711.8629513, "event": "train_step", "step": 65, "epoch": 1, "metrics": {"train/step_loss": 0.9619558304548264, "train/step_real_loss": 0.9619558304548264, "train/lr": 0.00012904592510481659, "perf/step_duration_sec": 4.289028630126268, "perf/samples_per_sec": 7.460896804285946, "perf/tokens_per_sec": 6444.349614701986, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27640.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553732.4611456, "event": "train_step", "step": 70, "epoch": 1, "metrics": {"train/step_loss": 0.9246343672275543, "train/step_real_loss": 0.9246343672275543, "train/lr": 0.00012873597387234154, "perf/step_duration_sec": 4.162193527910858, "perf/samples_per_sec": 7.688253750195477, "perf/tokens_per_sec": 6811.312306813806, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28350.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553746.7873595, "event": "eval_step", "step": 70, "epoch": 1, "metrics": {"eval/loss": 0.7860731545158407, "eval/duration_sec": 14.300922275986522}}
{"timestamp": 1774553767.1752193, "event": "train_step", "step": 75, "epoch": 1, "metrics": {"train/step_loss": 0.8718184679746628, "train/step_real_loss": 0.8718184679746628, "train/lr": 0.00012838300583973213, "perf/step_duration_sec": 4.068339916877449, "perf/samples_per_sec": 7.865616112175993, "perf/tokens_per_sec": 6263.488430327146, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25482.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553787.7128725, "event": "train_step", "step": 80, "epoch": 1, "metrics": {"train/step_loss": 1.2743387117105371, "train/step_real_loss": 0.8207817375659943, "train/lr": 0.00012798727203304185, "train/step_canary_loss": 8.53125, "perf/step_duration_sec": 4.190040284069255, "perf/samples_per_sec": 8.114480457209378, "perf/tokens_per_sec": 6525.235593545929, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 27341.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.911073684692383, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553803.1246016, "event": "eval_step", "step": 80, "epoch": 1, "metrics": {"eval/loss": 0.7792497295886278, "eval/duration_sec": 15.401335851056501}}
{"timestamp": 1774553823.6495585, "event": "train_step", "step": 85, "epoch": 1, "metrics": {"train/step_loss": 0.9541886299848557, "train/step_real_loss": 0.9541886299848557, "train/lr": 0.00012754905389275982, "perf/step_duration_sec": 4.181203367188573, "perf/samples_per_sec": 7.653299107887377, "perf/tokens_per_sec": 6582.554729574508, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27523.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553844.0244865, "event": "train_step", "step": 90, "epoch": 1, "metrics": {"train/step_loss": 0.8961853533983231, "train/step_real_loss": 0.8961853533983231, "train/lr": 0.00012706866307365402, "perf/step_duration_sec": 3.914687553886324, "perf/samples_per_sec": 8.174343305695457, "perf/tokens_per_sec": 6445.980592997475, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25234.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553858.373099, "event": "eval_step", "step": 90, "epoch": 1, "metrics": {"eval/loss": 0.7738248223269528, "eval/duration_sec": 14.335029735928401}}
{"timestamp": 1774553878.8061948, "event": "train_step", "step": 95, "epoch": 1, "metrics": {"train/step_loss": 0.9209906160831451, "train/step_real_loss": 0.9209906160831451, "train/lr": 0.00012654644122312678, "perf/step_duration_sec": 4.206667701015249, "perf/samples_per_sec": 7.606971188210809, "perf/tokens_per_sec": 5925.355119916957, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24926.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553899.145707, "event": "train_step", "step": 100, "epoch": 1, "metrics": {"train/step_loss": 0.826179251074791, "train/step_real_loss": 0.826179251074791, "train/lr": 0.00012598275973823978, "perf/step_duration_sec": 3.9058698068838567, "perf/samples_per_sec": 8.192797400364435, "perf/tokens_per_sec": 7605.988286563333, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29708.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553913.5749776, "event": "eval_step", "step": 100, "epoch": 1, "metrics": {"eval/loss": 0.7697948757559061, "eval/duration_sec": 14.41546514001675}}
{"timestamp": 1774553934.2116802, "event": "train_step", "step": 105, "epoch": 1, "metrics": {"train/step_loss": 0.9759816825389862, "train/step_real_loss": 0.9759816825389862, "train/lr": 0.0001253780195015816, "perf/step_duration_sec": 4.058864116901532, "perf/samples_per_sec": 7.8839791326713, "perf/tokens_per_sec": 7491.997545168799, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 30409.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553955.2146268, "event": "train_step", "step": 110, "epoch": 1, "metrics": {"train/step_loss": 0.8773302273316816, "train/step_real_loss": 0.790977269411087, "train/lr": 0.0001247326505961653, "train/step_canary_loss": 3.640625, "perf/step_duration_sec": 4.196503607090563, "perf/samples_per_sec": 7.863689177877036, "perf/tokens_per_sec": 6355.528911004801, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26671.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774553969.738038, "event": "eval_step", "step": 110, "epoch": 1, "metrics": {"eval/loss": 0.7663149330765009, "eval/duration_sec": 14.509691495914012}}
{"timestamp": 1774553989.764575, "event": "train_step", "step": 115, "epoch": 1, "metrics": {"train/step_loss": 0.8719229400157928, "train/step_real_loss": 0.8719229400157928, "train/lr": 0.00012404711199955938, "perf/step_duration_sec": 3.913796061882749, "perf/samples_per_sec": 8.17620527335455, "perf/tokens_per_sec": 5692.17190874352, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22278.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554010.1594453, "event": "train_step", "step": 120, "epoch": 1, "metrics": {"train/step_loss": 0.891044944524765, "train/step_real_loss": 0.891044944524765, "train/lr": 0.00012332189125746921, "perf/step_duration_sec": 4.04991124686785, "perf/samples_per_sec": 7.901407722144131, "perf/tokens_per_sec": 6703.356776274028, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27148.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554024.5139399, "event": "eval_step", "step": 120, "epoch": 1, "metrics": {"eval/loss": 0.7622121890837495, "eval/duration_sec": 14.3401628381107}}
{"timestamp": 1774554045.3107219, "event": "train_step", "step": 125, "epoch": 1, "metrics": {"train/step_loss": 1.1317970824964119, "train/step_real_loss": 0.9337673038244247, "train/lr": 0.00012255750413700108, "train/step_canary_loss": 7.46875, "perf/step_duration_sec": 4.311752506066114, "perf/samples_per_sec": 7.653500509032695, "perf/tokens_per_sec": 5755.896231308286, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24818.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554065.8899317, "event": "train_step", "step": 130, "epoch": 1, "metrics": {"train/step_loss": 0.8461871594190598, "train/step_real_loss": 0.8461871594190598, "train/lr": 0.0001217544942598557, "perf/step_duration_sec": 3.903787299990654, "perf/samples_per_sec": 8.197167914367826, "perf/tokens_per_sec": 6635.607426680756, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25904.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554080.204465, "event": "eval_step", "step": 130, "epoch": 1, "metrics": {"eval/loss": 0.758760767904195, "eval/duration_sec": 14.302270620130002}}
{"timestamp": 1774554100.6311214, "event": "train_step", "step": 135, "epoch": 1, "metrics": {"train/step_loss": 0.8742941617965698, "train/step_real_loss": 0.8742941617965698, "train/lr": 0.00012091343271571188, "perf/step_duration_sec": 4.057469004997984, "perf/samples_per_sec": 7.886689944047004, "perf/tokens_per_sec": 6570.105641512657, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26658.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554121.1432147, "event": "train_step", "step": 140, "epoch": 1, "metrics": {"train/step_loss": 0.8224200904369354, "train/step_real_loss": 0.8224200904369354, "train/lr": 0.0001200349176560753, "perf/step_duration_sec": 4.174908535787836, "perf/samples_per_sec": 7.6648385768674965, "perf/tokens_per_sec": 6241.813389830941, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26059.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554136.163405, "event": "eval_step", "step": 140, "epoch": 1, "metrics": {"eval/loss": 0.7551881721751257, "eval/duration_sec": 15.001483916072175}}
{"timestamp": 1774554157.103535, "event": "train_step", "step": 145, "epoch": 1, "metrics": {"train/step_loss": 0.8607720285654068, "train/step_real_loss": 0.8607720285654068, "train/lr": 0.00011911957386888138, "perf/step_duration_sec": 4.051985569996759, "perf/samples_per_sec": 7.897362773684703, "perf/tokens_per_sec": 6919.817337854543, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28039.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554178.1048322, "event": "train_step", "step": 150, "epoch": 1, "metrics": {"train/step_loss": 0.8837369531393051, "train/step_real_loss": 0.8837369531393051, "train/lr": 0.00011816805233415453, "perf/step_duration_sec": 4.205725855194032, "perf/samples_per_sec": 7.608674721506229, "perf/tokens_per_sec": 5983.033813039414, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25163.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554192.488001, "event": "eval_step", "step": 150, "epoch": 1, "metrics": {"eval/loss": 0.7522191259671341, "eval/duration_sec": 14.370011741993949}}
{"timestamp": 1774554212.9686804, "event": "train_step", "step": 155, "epoch": 1, "metrics": {"train/step_loss": 0.9628153890371323, "train/step_real_loss": 0.9628153890371323, "train/lr": 0.00011718102976104003, "perf/step_duration_sec": 3.90387806086801, "perf/samples_per_sec": 8.19697733921662, "perf/tokens_per_sec": 7104.217797682305, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27734.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554232.9551418, "event": "train_step", "step": 160, "epoch": 1, "metrics": {"train/step_loss": 0.9038268476724625, "train/step_real_loss": 0.9038268476724625, "train/lr": 0.00011615920810653783, "perf/step_duration_sec": 3.924426285084337, "perf/samples_per_sec": 8.154058115863505, "perf/tokens_per_sec": 7195.19184430118, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28237.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554247.3196304, "event": "eval_step", "step": 160, "epoch": 1, "metrics": {"eval/loss": 0.7491022223098711, "eval/duration_sec": 14.346863486105576}}
{"timestamp": 1774554267.8224869, "event": "train_step", "step": 165, "epoch": 1, "metrics": {"train/step_loss": 0.791075736284256, "train/step_real_loss": 0.791075736284256, "train/lr": 0.00011510331407628016, "perf/step_duration_sec": 4.052512887166813, "perf/samples_per_sec": 7.8963351606691115, "perf/tokens_per_sec": 6691.65052771953, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27118.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554287.8096187, "event": "train_step", "step": 170, "epoch": 1, "metrics": {"train/step_loss": 0.8601654171943665, "train/step_real_loss": 0.8601654171943665, "train/lr": 0.00011401409860770845, "perf/step_duration_sec": 4.166308968095109, "perf/samples_per_sec": 7.680659366612174, "perf/tokens_per_sec": 6054.03972512684, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25223.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554302.128572, "event": "eval_step", "step": 170, "epoch": 1, "metrics": {"eval/loss": 0.7465825582092458, "eval/duration_sec": 14.306816569063812}}
{"timestamp": 1774554322.9486656, "event": "train_step", "step": 175, "epoch": 1, "metrics": {"train/step_loss": 0.891189843416214, "train/step_real_loss": 0.891189843416214, "train/lr": 0.00011289233633601687, "perf/step_duration_sec": 4.340210136026144, "perf/samples_per_sec": 7.37291490436887, "perf/tokens_per_sec": 5622.308421762787, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24402.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554343.5452344, "event": "train_step", "step": 180, "epoch": 1, "metrics": {"train/step_loss": 1.0502220139358982, "train/step_real_loss": 0.8965180367231369, "train/lr": 0.00011173882504324231, "train/step_canary_loss": 5.96875, "perf/step_duration_sec": 4.18826737604104, "perf/samples_per_sec": 7.8791531287558945, "perf/tokens_per_sec": 6338.18178654406, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26546.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554357.8633025, "event": "eval_step", "step": 180, "epoch": 1, "metrics": {"eval/loss": 0.7451812631704591, "eval/duration_sec": 14.310468035982922}}
{"timestamp": 1774554378.4382162, "event": "train_step", "step": 185, "epoch": 1, "metrics": {"train/step_loss": 0.7786238938570023, "train/step_real_loss": 0.7786238938570023, "train/lr": 0.00011055438509089273, "perf/step_duration_sec": 4.050103273009881, "perf/samples_per_sec": 7.901033095439768, "perf/tokens_per_sec": 6538.845608329106, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26483.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554399.6228495, "event": "train_step", "step": 190, "epoch": 1, "metrics": {"train/step_loss": 0.8064423501491547, "train/step_real_loss": 0.8064423501491547, "train/lr": 0.00010933985883651711, "perf/step_duration_sec": 4.03061717399396, "perf/samples_per_sec": 7.939230797325023, "perf/tokens_per_sec": 7372.320098203533, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29715.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554413.9763694, "event": "eval_step", "step": 190, "epoch": 1, "metrics": {"eval/loss": 0.741626805371859, "eval/duration_sec": 14.339295641053468}}
{"timestamp": 1774554434.4864867, "event": "train_step", "step": 195, "epoch": 1, "metrics": {"train/step_loss": 0.7986471503973007, "train/step_real_loss": 0.7986471503973007, "train/lr": 0.00010809611003463238, "perf/step_duration_sec": 4.04728353000246, "perf/samples_per_sec": 7.906537746314143, "perf/tokens_per_sec": 7352.585945463009, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29758.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554454.8413053, "event": "train_step", "step": 200, "epoch": 1, "metrics": {"train/step_loss": 0.9408968614809441, "train/step_real_loss": 0.8413936495780945, "train/lr": 0.00010682402322243288, "train/step_canary_loss": 4.125, "perf/step_duration_sec": 4.166537210112438, "perf/samples_per_sec": 7.920246078663838, "perf/tokens_per_sec": 6887.733998954388, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28698.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554469.1606975, "event": "eval_step", "step": 200, "epoch": 1, "metrics": {"eval/loss": 0.7398894783109428, "eval/duration_sec": 14.311247569974512}}
{"timestamp": 1774554489.4258208, "event": "train_step", "step": 205, "epoch": 1, "metrics": {"train/step_loss": 0.7850026339292526, "train/step_real_loss": 0.7850026339292526, "train/lr": 0.00010552450309071972, "perf/step_duration_sec": 3.921351762022823, "perf/samples_per_sec": 8.160451278538922, "perf/tokens_per_sec": 6559.727757433021, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25723.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 82.16860580444336}}
{"timestamp": 1774554512.2455406, "event": "train_epoch", "step": 207, "epoch": 1, "metrics": {"train/epoch_loss": 0.9802634883019666, "train/epoch_real_loss": 0.9426290949059714, "train/epoch_canary_loss": 7.064656440337764, "perf/epoch_duration_sec": 1142.747375151841, "perf/epoch_samples_per_sec": 46.667357247627876, "perf/epoch_tokens_per_sec": 38364.687553252676, "perf/epoch_samples": 53329.0, "perf/epoch_tokens": 43841146.0, "system/cuda_epoch_peak_memory_gb": 82.16860580444336, "eval/loss": 0.739349182525819, "eval/duration_sec": 14.340507389977574}}
{"timestamp": 1774554523.0134153, "event": "audit_epoch", "step": 207, "epoch": 1, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.865392, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.73, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 7.607415392994881}}
{"timestamp": 1774554535.6475203, "event": "train_step", "step": 210, "epoch": 2, "metrics": {"train/step_loss": 0.7727605104446411, "train/step_real_loss": 0.7727605104446411, "train/lr": 0.00010419847384049677, "perf/step_duration_sec": 4.040141660021618, "perf/samples_per_sec": 7.920514351427166, "perf/tokens_per_sec": 6630.708092511949, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26789.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554549.9498513, "event": "eval_step", "step": 210, "epoch": 2, "metrics": {"eval/loss": 0.7384873703122139, "eval/duration_sec": 14.297562894877046}}
{"timestamp": 1774554570.8392048, "event": "train_step", "step": 215, "epoch": 2, "metrics": {"train/step_loss": 0.7951730191707611, "train/step_real_loss": 0.7951730191707611, "train/lr": 0.00010284687852569171, "perf/step_duration_sec": 4.047564970096573, "perf/samples_per_sec": 7.905987979542301, "perf/tokens_per_sec": 6833.491297692515, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27659.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554591.4806385, "event": "train_step", "step": 220, "epoch": 2, "metrics": {"train/step_loss": 0.718672901391983, "train/step_real_loss": 0.718672901391983, "train/lr": 0.00010147067838246887, "perf/step_duration_sec": 4.330356543883681, "perf/samples_per_sec": 7.3896917437890215, "perf/tokens_per_sec": 6038.994649744584, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26151.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554605.872545, "event": "eval_step", "step": 220, "epoch": 2, "metrics": {"eval/loss": 0.7374532564797184, "eval/duration_sec": 14.375713063869625}}
{"timestamp": 1774554626.3961468, "event": "train_step", "step": 225, "epoch": 2, "metrics": {"train/step_loss": 0.8126018941402435, "train/step_real_loss": 0.8126018941402435, "train/lr": 0.00010007085214561111, "perf/step_duration_sec": 4.0536466599442065, "perf/samples_per_sec": 7.894126618436063, "perf/tokens_per_sec": 6972.9807186473045, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28266.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554646.826824, "event": "train_step", "step": 230, "epoch": 2, "metrics": {"train/step_loss": 0.7822727859020233, "train/step_real_loss": 0.7822727859020233, "train/lr": 9.864839535245708e-05, "perf/step_duration_sec": 4.038130311993882, "perf/samples_per_sec": 7.92445947198756, "perf/tokens_per_sec": 6810.329998097809, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27501.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554661.2218566, "event": "eval_step", "step": 230, "epoch": 2, "metrics": {"eval/loss": 0.7352403490380809, "eval/duration_sec": 14.3719827439636}}
{"timestamp": 1774554681.9334557, "event": "train_step", "step": 235, "epoch": 2, "metrics": {"train/step_loss": 0.8374523556593693, "train/step_real_loss": 0.8584042340517044, "train/lr": 9.72043196348886e-05, "train/step_canary_loss": 0.1669921875, "perf/step_duration_sec": 4.214924396947026, "perf/samples_per_sec": 7.829321926605069, "perf/tokens_per_sec": 6433.092849693831, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27115.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.911073684692383, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554702.4781988, "event": "train_step", "step": 240, "epoch": 2, "metrics": {"train/step_loss": 0.8251948356628418, "train/step_real_loss": 0.8251948356628418, "train/lr": 9.573965199987179e-05, "perf/step_duration_sec": 4.1865190588869154, "perf/samples_per_sec": 7.643581588879224, "perf/tokens_per_sec": 6026.964082831268, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25232.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554716.898517, "event": "eval_step", "step": 240, "epoch": 2, "metrics": {"eval/loss": 0.7342509077015248, "eval/duration_sec": 14.411627813940868}}
{"timestamp": 1774554737.3030498, "event": "train_step", "step": 245, "epoch": 2, "metrics": {"train/step_loss": 0.8118036091327667, "train/step_real_loss": 0.8118036091327667, "train/lr": 9.425543409906382e-05, "perf/step_duration_sec": 4.0564355768729, "perf/samples_per_sec": 7.888699177781285, "perf/tokens_per_sec": 6362.482408729912, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25809.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554757.7326949, "event": "train_step", "step": 250, "epoch": 2, "metrics": {"train/step_loss": 0.8139301985502243, "train/step_real_loss": 0.8139301985502243, "train/lr": 9.27527214880044e-05, "perf/step_duration_sec": 4.047397410031408, "perf/samples_per_sec": 7.906315283171483, "perf/tokens_per_sec": 7014.878234993898, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28392.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554772.115813, "event": "eval_step", "step": 250, "epoch": 2, "metrics": {"eval/loss": 0.7325799978253517, "eval/duration_sec": 14.373475875938311}}
{"timestamp": 1774554792.25671, "event": "train_step", "step": 255, "epoch": 2, "metrics": {"train/step_loss": 0.8114710757226655, "train/step_real_loss": 0.7357553690671921, "train/lr": 9.123258287541902e-05, "train/step_canary_loss": 3.234375, "perf/step_duration_sec": 4.046334458980709, "perf/samples_per_sec": 8.15552948836386, "perf/tokens_per_sec": 6339.812059545397, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25653.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554813.0769246, "event": "train_step", "step": 260, "epoch": 2, "metrics": {"train/step_loss": 0.881144106388092, "train/step_real_loss": 0.881144106388092, "train/lr": 8.969609936316798e-05, "perf/step_duration_sec": 4.312109552090988, "perf/samples_per_sec": 7.420961738897116, "perf/tokens_per_sec": 6059.215259809495, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26128.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554827.4413602, "event": "eval_step", "step": 260, "epoch": 2, "metrics": {"eval/loss": 0.731244714253328, "eval/duration_sec": 14.343937204917893}}
{"timestamp": 1774554847.8263288, "event": "train_step", "step": 265, "epoch": 2, "metrics": {"train/step_loss": 0.7969602197408676, "train/step_real_loss": 0.7969602197408676, "train/lr": 8.814436367738138e-05, "perf/step_duration_sec": 3.899073272012174, "perf/samples_per_sec": 8.20707839211391, "perf/tokens_per_sec": 6611.314587247262, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25778.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554868.0991383, "event": "train_step", "step": 270, "epoch": 2, "metrics": {"train/step_loss": 0.6912222653627396, "train/step_real_loss": 0.6912222653627396, "train/lr": 8.657847939132732e-05, "perf/step_duration_sec": 4.04085955908522, "perf/samples_per_sec": 7.919107192937, "perf/tokens_per_sec": 6261.291596515594, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25301.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554882.458734, "event": "eval_step", "step": 270, "epoch": 2, "metrics": {"eval/loss": 0.7298222538083792, "eval/duration_sec": 14.344514383934438}}
{"timestamp": 1774554903.2076921, "event": "train_step", "step": 275, "epoch": 2, "metrics": {"train/step_loss": 0.7739122202902129, "train/step_real_loss": 0.7781995087862015, "train/lr": 8.499956014056542e-05, "train/step_canary_loss": 0.63671875, "perf/step_duration_sec": 4.320872014155611, "perf/samples_per_sec": 7.637347251177235, "perf/tokens_per_sec": 5764.808566047689, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24909.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554923.6055262, "event": "train_step", "step": 280, "epoch": 2, "metrics": {"train/step_loss": 0.7707284390926361, "train/step_real_loss": 0.7707284390926361, "train/lr": 8.340872883094464e-05, "perf/step_duration_sec": 3.9115682870615274, "perf/samples_per_sec": 8.180861907958468, "perf/tokens_per_sec": 7097.409009023218, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27762.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554938.0540876, "event": "eval_step", "step": 280, "epoch": 2, "metrics": {"eval/loss": 0.7296686756678603, "eval/duration_sec": 14.431537684984505}}
{"timestamp": 1774554958.6886714, "event": "train_step", "step": 285, "epoch": 2, "metrics": {"train/step_loss": 0.8824051022529602, "train/step_real_loss": 0.8824051022529602, "train/lr": 8.180711684000775e-05, "perf/step_duration_sec": 4.0876935080159456, "perf/samples_per_sec": 7.828375571027565, "perf/tokens_per_sec": 5871.526315007269, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24001.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554979.0095606, "event": "train_step", "step": 290, "epoch": 2, "metrics": {"train/step_loss": 0.842690666516622, "train/step_real_loss": 0.8214173316955566, "train/lr": 8.019586321237117e-05, "train/step_canary_loss": 1.5234375, "perf/step_duration_sec": 4.198764802888036, "perf/samples_per_sec": 7.859454279816201, "perf/tokens_per_sec": 6287.325258571755, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26399.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774554993.5177875, "event": "eval_step", "step": 290, "epoch": 2, "metrics": {"eval/loss": 0.7285996308888901, "eval/duration_sec": 14.497949979966506}}
{"timestamp": 1774555013.82094, "event": "train_step", "step": 295, "epoch": 2, "metrics": {"train/step_loss": 0.9256528724323619, "train/step_real_loss": 0.827626422047615, "train/lr": 7.857611384965188e-05, "train/step_canary_loss": 4.0625, "perf/step_duration_sec": 4.185847251908854, "perf/samples_per_sec": 7.883708605217535, "perf/tokens_per_sec": 7198.3037570851375, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 30131.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555034.4837437, "event": "train_step", "step": 300, "epoch": 2, "metrics": {"train/step_loss": 0.735210731625557, "train/step_real_loss": 0.735210731625557, "train/lr": 7.694902069551787e-05, "perf/step_duration_sec": 4.299249371979386, "perf/samples_per_sec": 7.443159777742112, "perf/tokens_per_sec": 5895.680340200916, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25347.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555048.8314226, "event": "eval_step", "step": 300, "epoch": 2, "metrics": {"eval/loss": 0.7271712996404278, "eval/duration_sec": 14.341961710015312}}
{"timestamp": 1774555069.2779965, "event": "train_step", "step": 305, "epoch": 2, "metrics": {"train/step_loss": 0.8453395962715149, "train/step_real_loss": 0.8453395962715149, "train/lr": 7.531574091644146e-05, "perf/step_duration_sec": 4.064944384852424, "perf/samples_per_sec": 7.8721864238154255, "perf/tokens_per_sec": 7108.830346531074, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28897.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555089.8652484, "event": "train_step", "step": 310, "epoch": 2, "metrics": {"train/step_loss": 0.7461211681365967, "train/step_real_loss": 0.7461211681365967, "train/lr": 7.367743607873831e-05, "perf/step_duration_sec": 4.0785790148656815, "perf/samples_per_sec": 7.845869819700881, "perf/tokens_per_sec": 5974.875051134086, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24369.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555104.1847744, "event": "eval_step", "step": 310, "epoch": 2, "metrics": {"eval/loss": 0.7258597155186262, "eval/duration_sec": 14.312350057065487}}
{"timestamp": 1774555124.933961, "event": "train_step", "step": 315, "epoch": 2, "metrics": {"train/step_loss": 0.7120204418897629, "train/step_real_loss": 0.7120204418897629, "train/lr": 7.203527132247726e-05, "perf/step_duration_sec": 4.201116909040138, "perf/samples_per_sec": 7.617022018868617, "perf/tokens_per_sec": 6345.693437531828, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26659.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555145.193779, "event": "train_step", "step": 320, "epoch": 2, "metrics": {"train/step_loss": 0.8390506953001022, "train/step_real_loss": 0.8390506953001022, "train/lr": 7.03904145328487e-05, "perf/step_duration_sec": 3.9172310910653323, "perf/samples_per_sec": 8.169035539666684, "perf/tokens_per_sec": 5949.610696483492, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23306.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555159.5807133, "event": "eval_step", "step": 320, "epoch": 2, "metrics": {"eval/loss": 0.7247643000022932, "eval/duration_sec": 14.378157665021718}}
{"timestamp": 1774555179.7327518, "event": "train_step", "step": 325, "epoch": 2, "metrics": {"train/step_loss": 0.724028617143631, "train/step_real_loss": 0.724028617143631, "train/lr": 6.874403550958033e-05, "perf/step_duration_sec": 4.05684468196705, "perf/samples_per_sec": 7.887903656317476, "perf/tokens_per_sec": 6700.527659052435, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27183.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555200.0327628, "event": "train_step", "step": 330, "epoch": 2, "metrics": {"train/step_loss": 0.8408252596855164, "train/step_real_loss": 0.8408252596855164, "train/lr": 6.709730513499171e-05, "perf/step_duration_sec": 4.064394711051136, "perf/samples_per_sec": 7.873251068109018, "perf/tokens_per_sec": 5611.905737890581, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22809.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555214.4125364, "event": "eval_step", "step": 330, "epoch": 2, "metrics": {"eval/loss": 0.7243393357335167, "eval/duration_sec": 14.370882893912494}}
{"timestamp": 1774555234.9486976, "event": "train_step", "step": 335, "epoch": 2, "metrics": {"train/step_loss": 0.8023520559072495, "train/step_real_loss": 0.8023520559072495, "train/lr": 6.545139454127874e-05, "perf/step_duration_sec": 3.9175845759455115, "perf/samples_per_sec": 8.168298445037854, "perf/tokens_per_sec": 7135.51921039338, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27954.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555255.344877, "event": "train_step", "step": 340, "epoch": 2, "metrics": {"train/step_loss": 0.761395126581192, "train/step_real_loss": 0.761395126581192, "train/lr": 6.380747427762022e-05, "perf/step_duration_sec": 3.899482361972332, "perf/samples_per_sec": 8.206217397484167, "perf/tokens_per_sec": 7320.458807143219, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28546.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555269.7291596, "event": "eval_step", "step": 340, "epoch": 2, "metrics": {"eval/loss": 0.7232737271115184, "eval/duration_sec": 14.375522380927578}}
{"timestamp": 1774555290.010674, "event": "train_step", "step": 345, "epoch": 2, "metrics": {"train/step_loss": 0.8565814793109894, "train/step_real_loss": 0.8565814793109894, "train/lr": 6.216671347769945e-05, "perf/step_duration_sec": 3.908495286013931, "perf/samples_per_sec": 8.187293998922822, "perf/tokens_per_sec": 7205.07457198955, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28161.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555310.9461029, "event": "train_step", "step": 350, "epoch": 2, "metrics": {"train/step_loss": 0.8680592477321625, "train/step_real_loss": 0.8680592477321625, "train/lr": 6.0530279028232125e-05, "perf/step_duration_sec": 4.1912537780590355, "perf/samples_per_sec": 7.634946890478954, "perf/tokens_per_sec": 6467.992976687311, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27109.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555325.3574302, "event": "eval_step", "step": 350, "epoch": 2, "metrics": {"eval/loss": 0.7229700221426107, "eval/duration_sec": 14.39207775099203}}
{"timestamp": 1774555345.6532178, "event": "train_step", "step": 355, "epoch": 2, "metrics": {"train/step_loss": 0.8551525925145005, "train/step_real_loss": 0.7954503297805786, "train/lr": 5.8899334739092517e-05, "train/step_canary_loss": 2.765625, "perf/step_duration_sec": 4.035539146978408, "perf/samples_per_sec": 8.177346024436066, "perf/tokens_per_sec": 6861.041112866236, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27688.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.911073684692383, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555366.2652762, "event": "train_step", "step": 360, "epoch": 2, "metrics": {"train/step_loss": 0.9246349334716797, "train/step_real_loss": 0.9246349334716797, "train/lr": 5.727504051562765e-05, "perf/step_duration_sec": 4.189823837950826, "perf/samples_per_sec": 7.637552612629813, "perf/tokens_per_sec": 6692.1668032977295, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28039.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555380.6704392, "event": "eval_step", "step": 360, "epoch": 2, "metrics": {"eval/loss": 0.721110902811316, "eval/duration_sec": 14.396297077881172}}
{"timestamp": 1774555400.7964182, "event": "train_step", "step": 365, "epoch": 2, "metrics": {"train/step_loss": 0.8083836138248444, "train/step_real_loss": 0.8083836138248444, "train/lr": 5.56585515337485e-05, "perf/step_duration_sec": 3.9256410428788513, "perf/samples_per_sec": 8.151534908686644, "perf/tokens_per_sec": 7213.853658721784, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28319.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555421.5832565, "event": "train_step", "step": 370, "epoch": 2, "metrics": {"train/step_loss": 0.8698864430189133, "train/step_real_loss": 0.8698864430189133, "train/lr": 5.4051017418384655e-05, "perf/step_duration_sec": 4.0367174269631505, "perf/samples_per_sec": 7.927233099413107, "perf/tokens_per_sec": 6376.963576409132, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25742.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555435.9105296, "event": "eval_step", "step": 370, "epoch": 2, "metrics": {"eval/loss": 0.7214122042906554, "eval/duration_sec": 14.319930989062414}}
{"timestamp": 1774555456.3206792, "event": "train_step", "step": 375, "epoch": 2, "metrics": {"train/step_loss": 0.878958441994407, "train/step_real_loss": 0.776543065905571, "train/lr": 5.245358142588668e-05, "train/step_canary_loss": 4.15625, "perf/step_duration_sec": 4.203468808205798, "perf/samples_per_sec": 7.850658945197613, "perf/tokens_per_sec": 6779.876644823842, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28499.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555476.475419, "event": "train_step", "step": 380, "epoch": 2, "metrics": {"train/step_loss": 0.7718449681997299, "train/step_real_loss": 0.7718449681997299, "train/lr": 5.086737963095781e-05, "perf/step_duration_sec": 4.045331403147429, "perf/samples_per_sec": 7.910353148101222, "perf/tokens_per_sec": 7141.812900056013, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28891.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555490.878454, "event": "eval_step", "step": 380, "epoch": 2, "metrics": {"eval/loss": 0.7201131373982537, "eval/duration_sec": 14.398557532113045}}
{"timestamp": 1774555511.274831, "event": "train_step", "step": 385, "epoch": 2, "metrics": {"train/step_loss": 0.7751782536506653, "train/step_real_loss": 0.7751782536506653, "train/lr": 4.9293540118693075e-05, "perf/step_duration_sec": 3.9201855771243572, "perf/samples_per_sec": 8.162878866432013, "perf/tokens_per_sec": 6499.437207431914, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25479.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555531.8168743, "event": "train_step", "step": 390, "epoch": 2, "metrics": {"train/step_loss": 0.8287598341703415, "train/step_real_loss": 0.8287598341703415, "train/lr": 4.7733182182300636e-05, "perf/step_duration_sec": 4.05745268589817, "perf/samples_per_sec": 7.886721664362769, "perf/tokens_per_sec": 6836.062462638441, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27737.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555546.155326, "event": "eval_step", "step": 390, "epoch": 2, "metrics": {"eval/loss": 0.7203310768712651, "eval/duration_sec": 14.325756101869047}}
{"timestamp": 1774555567.6548843, "event": "train_step", "step": 395, "epoch": 2, "metrics": {"train/step_loss": 1.0545116084994692, "train/step_real_loss": 0.7984025925397873, "train/lr": 4.618741552707563e-05, "train/step_canary_loss": 9.25, "perf/step_duration_sec": 4.383844391908497, "perf/samples_per_sec": 7.527639452921714, "perf/tokens_per_sec": 5541.483188782642, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24293.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555588.0626285, "event": "train_step", "step": 400, "epoch": 2, "metrics": {"train/step_loss": 0.6683916449546814, "train/step_real_loss": 0.6683916449546814, "train/lr": 4.465733948119293e-05, "perf/step_duration_sec": 4.188642976805568, "perf/samples_per_sec": 7.639705789488061, "perf/tokens_per_sec": 6404.222118844288, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26825.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555602.4239583, "event": "eval_step", "step": 400, "epoch": 2, "metrics": {"eval/loss": 0.7195017964325169, "eval/duration_sec": 14.350818340899423}}
{"timestamp": 1774555623.628339, "event": "train_step", "step": 405, "epoch": 2, "metrics": {"train/step_loss": 0.7100922465324402, "train/step_real_loss": 0.7100922465324402, "train/lr": 4.314404221387989e-05, "perf/step_duration_sec": 4.0663633740041405, "perf/samples_per_sec": 7.869439363086152, "perf/tokens_per_sec": 6571.719628117226, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26723.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555644.0471923, "event": "train_step", "step": 410, "epoch": 2, "metrics": {"train/step_loss": 0.7728860527276993, "train/step_real_loss": 0.7728860527276993, "train/lr": 4.16485999615253e-05, "perf/step_duration_sec": 4.212369584944099, "perf/samples_per_sec": 7.596674355064849, "perf/tokens_per_sec": 6514.385655541703, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27441.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555658.4418344, "event": "eval_step", "step": 410, "epoch": 2, "metrics": {"eval/loss": 0.7190890578045086, "eval/duration_sec": 14.381551975151524}}
{"timestamp": 1774555689.6422765, "event": "train_epoch", "step": 414, "epoch": 2, "metrics": {"train/epoch_loss": 0.8086463928382063, "train/epoch_real_loss": 0.7929126681031092, "train/epoch_canary_loss": 3.2930961304990496, "perf/epoch_duration_sec": 1152.1311249691062, "perf/epoch_samples_per_sec": 46.29334182897817, "perf/epoch_tokens_per_sec": 38053.41167323782, "perf/epoch_samples": 53336.0, "perf/epoch_tokens": 43842520.0, "system/cuda_epoch_peak_memory_gb": 81.2615852355957, "eval/loss": 0.7187386602163315, "eval/duration_sec": 14.37624513101764}}
{"timestamp": 1774555701.089365, "event": "audit_epoch", "step": 414, "epoch": 2, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.962576, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.538, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 8.31035048700869}}
{"timestamp": 1774555705.7426307, "event": "train_step", "step": 415, "epoch": 3, "metrics": {"train/step_loss": 0.8108633011579514, "train/step_real_loss": 0.8108633011579514, "train/lr": 4.0172076262274715e-05, "perf/step_duration_sec": 4.203053849982098, "perf/samples_per_sec": 7.613511780282401, "perf/tokens_per_sec": 6549.285586744802, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27527.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 69.79438161849976}}
{"timestamp": 1774555726.2955208, "event": "train_step", "step": 420, "epoch": 3, "metrics": {"train/step_loss": 0.7015628616015116, "train/step_real_loss": 0.7120731174945831, "train/lr": 3.87155211996565e-05, "train/step_canary_loss": 0.365234375, "perf/step_duration_sec": 4.047287730034441, "perf/samples_per_sec": 8.153608589552684, "perf/tokens_per_sec": 6186.859366133309, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25040.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 75.98190212249756}}
{"timestamp": 1774555740.7218258, "event": "eval_step", "step": 420, "epoch": 3, "metrics": {"eval/loss": 0.7188324835151434, "eval/duration_sec": 14.415742038981989}}
{"timestamp": 1774555765.478883, "event": "train_step", "step": 425, "epoch": 3, "metrics": {"train/step_loss": 0.8183407336473465, "train/step_real_loss": 0.8183407336473465, "train/lr": 3.727997065577692e-05, "perf/step_duration_sec": 5.478755255928263, "perf/samples_per_sec": 5.84074274268312, "perf/tokens_per_sec": 4635.359459161891, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25396.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555786.0388782, "event": "train_step", "step": 430, "epoch": 3, "metrics": {"train/step_loss": 0.7322118431329727, "train/step_real_loss": 0.7322118431329727, "train/lr": 3.586644557461483e-05, "perf/step_duration_sec": 4.052030459977686, "perf/samples_per_sec": 7.897275283605893, "perf/tokens_per_sec": 6449.852798032501, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26135.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555800.447401, "event": "eval_step", "step": 430, "epoch": 3, "metrics": {"eval/loss": 0.7191672102463518, "eval/duration_sec": 14.397158123087138}}
{"timestamp": 1774555820.827929, "event": "train_step", "step": 435, "epoch": 3, "metrics": {"train/step_loss": 0.7593975961208344, "train/step_real_loss": 0.7593975961208344, "train/lr": 3.4475951235940214e-05, "perf/step_duration_sec": 3.9098970349878073, "perf/samples_per_sec": 8.184358747467575, "perf/tokens_per_sec": 6447.995886950158, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25211.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555841.5844147, "event": "train_step", "step": 440, "epoch": 3, "metrics": {"train/step_loss": 0.8359665274620056, "train/step_real_loss": 0.8359665274620056, "train/lr": 3.310947654037288e-05, "perf/step_duration_sec": 4.059369632042944, "perf/samples_per_sec": 7.882997337174116, "perf/tokens_per_sec": 5305.010864251393, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21535.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555855.9565187, "event": "eval_step", "step": 440, "epoch": 3, "metrics": {"eval/loss": 0.7184737165543166, "eval/duration_sec": 14.361595854861662}}
{"timestamp": 1774555876.2583203, "event": "train_step", "step": 445, "epoch": 3, "metrics": {"train/step_loss": 0.8348608464002609, "train/step_real_loss": 0.8348608464002609, "train/lr": 3.176799330608995e-05, "perf/step_duration_sec": 4.068460470996797, "perf/samples_per_sec": 7.865383043075212, "perf/tokens_per_sec": 6628.305766331539, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26967.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555896.4150417, "event": "train_step", "step": 450, "epoch": 3, "metrics": {"train/step_loss": 0.7213988453149796, "train/step_real_loss": 0.7213988453149796, "train/lr": 3.0452455577681885e-05, "perf/step_duration_sec": 4.0494188368320465, "perf/samples_per_sec": 7.90236853469925, "perf/tokens_per_sec": 6482.411688620478, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26250.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555910.7735739, "event": "eval_step", "step": 450, "epoch": 3, "metrics": {"eval/loss": 0.7186485396020792, "eval/duration_sec": 14.343530175974593}}
{"timestamp": 1774555931.7228012, "event": "train_step", "step": 455, "epoch": 3, "metrics": {"train/step_loss": 0.7308650314807892, "train/step_real_loss": 0.7308650314807892, "train/lr": 2.9163798947649072e-05, "perf/step_duration_sec": 4.987672682851553, "perf/samples_per_sec": 6.415817964563175, "perf/tokens_per_sec": 5199.218483032882, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25932.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555952.5065746, "event": "train_step", "step": 460, "epoch": 3, "metrics": {"train/step_loss": 0.7551419883966446, "train/step_real_loss": 0.7551419883966446, "train/lr": 2.7902939891021083e-05, "perf/step_duration_sec": 4.063495999202132, "perf/samples_per_sec": 7.874992372647396, "perf/tokens_per_sec": 6199.587745366663, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25192.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774555966.9044435, "event": "eval_step", "step": 460, "epoch": 3, "metrics": {"eval/loss": 0.7180999453094873, "eval/duration_sec": 14.38420231686905}}
{"timestamp": 1774555986.9801674, "event": "train_step", "step": 465, "epoch": 3, "metrics": {"train/step_loss": 0.8047224432229996, "train/step_real_loss": 0.8047224432229996, "train/lr": 2.667077511357227e-05, "perf/step_duration_sec": 4.183107492979616, "perf/samples_per_sec": 7.649815371396657, "perf/tokens_per_sec": 6626.89162220271, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27721.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556007.3545318, "event": "train_step", "step": 470, "epoch": 3, "metrics": {"train/step_loss": 0.7912029772996902, "train/step_real_loss": 0.7912029772996902, "train/lr": 2.5468180914096723e-05, "perf/step_duration_sec": 4.049210264114663, "perf/samples_per_sec": 7.902775581597667, "perf/tokens_per_sec": 5838.669384379126, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23642.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556021.7534742, "event": "eval_step", "step": 470, "epoch": 3, "metrics": {"eval/loss": 0.718302655118433, "eval/duration_sec": 14.383481507888064}}
{"timestamp": 1774556042.671436, "event": "train_step", "step": 475, "epoch": 3, "metrics": {"train/step_loss": 0.7446039021015167, "train/step_real_loss": 0.7446039021015167, "train/lr": 2.4296012561196572e-05, "perf/step_duration_sec": 4.204125730087981, "perf/samples_per_sec": 7.611570646182917, "perf/tokens_per_sec": 6310.229927268332, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26529.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556063.047239, "event": "train_step", "step": 480, "epoch": 3, "metrics": {"train/step_loss": 0.7637542901617108, "train/step_real_loss": 0.7610102891921997, "train/lr": 2.3155103685026555e-05, "train/step_canary_loss": 0.8515625, "perf/step_duration_sec": 4.0629472092259675, "perf/samples_per_sec": 8.122182814747138, "perf/tokens_per_sec": 7276.491295005588, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 29564.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556077.3814642, "event": "eval_step", "step": 480, "epoch": 3, "metrics": {"eval/loss": 0.7180261675437744, "eval/duration_sec": 14.308435191167518}}
{"timestamp": 1774556098.0707338, "event": "train_step", "step": 485, "epoch": 3, "metrics": {"train/step_loss": 0.7441707253456116, "train/step_real_loss": 0.7441707253456116, "train/lr": 2.2046265684427694e-05, "perf/step_duration_sec": 3.9166555600240827, "perf/samples_per_sec": 8.170235934610302, "perf/tokens_per_sec": 6479.507735891883, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25378.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556118.5760849, "event": "train_step", "step": 490, "epoch": 3, "metrics": {"train/step_loss": 0.8141974859377917, "train/step_real_loss": 0.8160125613212585, "train/lr": 2.0970287149871505e-05, "train/step_canary_loss": 0.78515625, "perf/step_duration_sec": 4.1826326318550855, "perf/samples_per_sec": 8.128851609164702, "perf/tokens_per_sec": 6459.089855093928, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 27016.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.911073684692383, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556132.918202, "event": "eval_step", "step": 490, "epoch": 3, "metrics": {"eval/loss": 0.718335594135252, "eval/duration_sec": 14.323791122995317}}
{"timestamp": 1774556153.0248682, "event": "train_step", "step": 495, "epoch": 3, "metrics": {"train/step_loss": 0.7949029505252838, "train/step_real_loss": 0.7949029505252838, "train/lr": 1.9927933302625058e-05, "perf/step_duration_sec": 4.052215630887076, "perf/samples_per_sec": 7.896914408030857, "perf/tokens_per_sec": 6438.4530282976575, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26090.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556172.9838452, "event": "train_step", "step": 500, "epoch": 3, "metrics": {"train/step_loss": 0.7189302146434784, "train/step_real_loss": 0.7189302146434784, "train/lr": 1.8919945450536213e-05, "perf/step_duration_sec": 4.032273657154292, "perf/samples_per_sec": 7.935969311810909, "perf/tokens_per_sec": 6069.5285292893805, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24474.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556187.4298997, "event": "eval_step", "step": 500, "epoch": 3, "metrics": {"eval/loss": 0.7177054143764756, "eval/duration_sec": 14.437366659054533}}
{"timestamp": 1774556208.1658196, "event": "train_step", "step": 505, "epoch": 3, "metrics": {"train/step_loss": 0.754171833395958, "train/step_real_loss": 0.754171833395958, "train/lr": 1.7947040460825512e-05, "perf/step_duration_sec": 4.17792778997682, "perf/samples_per_sec": 7.659299444277265, "perf/tokens_per_sec": 6220.308561183673, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25988.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556228.9858258, "event": "train_step", "step": 510, "epoch": 3, "metrics": {"train/step_loss": 0.6840805754517064, "train/step_real_loss": 0.7001480311155319, "train/lr": 1.7009910250259996e-05, "train/step_canary_loss": 0.169921875, "perf/step_duration_sec": 4.205554463900626, "perf/samples_per_sec": 7.846765577110777, "perf/tokens_per_sec": 6369.195840863312, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26786.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.911073684692383, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556243.3994532, "event": "eval_step", "step": 510, "epoch": 3, "metrics": {"eval/loss": 0.7176493221724576, "eval/duration_sec": 14.395424627000466}}
{"timestamp": 1774556264.4375181, "event": "train_step", "step": 515, "epoch": 3, "metrics": {"train/step_loss": 0.7091023027896881, "train/step_real_loss": 0.7091023027896881, "train/lr": 1.610922129307137e-05, "perf/step_duration_sec": 4.053495781030506, "perf/samples_per_sec": 7.894420453020616, "perf/tokens_per_sec": 6595.541587859568, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26735.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556284.8626652, "event": "train_step", "step": 520, "epoch": 3, "metrics": {"train/step_loss": 0.693090558052063, "train/step_real_loss": 0.693090558052063, "train/lr": 1.5245614146968727e-05, "perf/step_duration_sec": 4.05639239307493, "perf/samples_per_sec": 7.888783159792523, "perf/tokens_per_sec": 6549.908742891486, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26569.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556299.2575321, "event": "eval_step", "step": 520, "epoch": 3, "metrics": {"eval/loss": 0.7175734629007903, "eval/duration_sec": 14.388575801858678}}
{"timestamp": 1774556320.00824, "event": "train_step", "step": 525, "epoch": 3, "metrics": {"train/step_loss": 0.8165147751569748, "train/step_real_loss": 0.8165147751569748, "train/lr": 1.4419702997582553e-05, "perf/step_duration_sec": 4.174595175078139, "perf/samples_per_sec": 7.665413928286121, "perf/tokens_per_sec": 5831.224101758407, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24343.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556340.4206285, "event": "train_step", "step": 530, "epoch": 3, "metrics": {"train/step_loss": 0.7550336122512817, "train/step_real_loss": 0.7550336122512817, "train/lr": 1.3632075221664314e-05, "perf/step_duration_sec": 3.912369176046923, "perf/samples_per_sec": 8.179187229036744, "perf/tokens_per_sec": 6380.021638249567, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24961.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556354.8610296, "event": "eval_step", "step": 530, "epoch": 3, "metrics": {"eval/loss": 0.7174779500981624, "eval/duration_sec": 14.427679382031783}}
{"timestamp": 1774556374.7170012, "event": "train_step", "step": 535, "epoch": 3, "metrics": {"train/step_loss": 0.6727401167154312, "train/step_real_loss": 0.6727401167154312, "train/lr": 1.2883290969352076e-05, "perf/step_duration_sec": 3.9014252100605518, "perf/samples_per_sec": 8.202130830928667, "perf/tokens_per_sec": 7615.678476517267, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29712.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556395.282418, "event": "train_step", "step": 540, "epoch": 3, "metrics": {"train/step_loss": 0.6862972974777222, "train/step_real_loss": 0.6862972974777222, "train/lr": 1.2173882765799416e-05, "perf/step_duration_sec": 3.9299339859280735, "perf/samples_per_sec": 8.142630414297669, "perf/tokens_per_sec": 6189.925858068909, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24326.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556409.6665437, "event": "eval_step", "step": 540, "epoch": 3, "metrics": {"eval/loss": 0.7178084114566444, "eval/duration_sec": 14.371931984787807}}
{"timestamp": 1774556430.352163, "event": "train_step", "step": 545, "epoch": 3, "metrics": {"train/step_loss": 0.7814168930053711, "train/step_real_loss": 0.7814168930053711, "train/lr": 1.1504355132450789e-05, "perf/step_duration_sec": 4.323989515192807, "perf/samples_per_sec": 7.400572986489566, "perf/tokens_per_sec": 6434.566943846849, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27823.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556450.4184394, "event": "train_step", "step": 550, "epoch": 3, "metrics": {"train/step_loss": 0.7106742560863495, "train/step_real_loss": 0.7106742560863495, "train/lr": 1.0875184228232696e-05, "perf/step_duration_sec": 4.166487062117085, "perf/samples_per_sec": 7.68033106137622, "perf/tokens_per_sec": 6551.082385008248, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27295.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556464.7908075, "event": "eval_step", "step": 550, "epoch": 3, "metrics": {"eval/loss": 0.7176614910872146, "eval/duration_sec": 14.354724811855704}}
{"timestamp": 1774556485.5714064, "event": "train_step", "step": 555, "epoch": 3, "metrics": {"train/step_loss": 0.7228728085756302, "train/step_real_loss": 0.7228728085756302, "train/lr": 1.0286817510915984e-05, "perf/step_duration_sec": 4.180581979919225, "perf/samples_per_sec": 7.654436667838837, "perf/tokens_per_sec": 6166.127138235923, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25778.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556506.1085727, "event": "train_step", "step": 560, "epoch": 3, "metrics": {"train/step_loss": 0.7989834994077682, "train/step_real_loss": 0.7989834994077682, "train/lr": 9.739673418890007e-06, "perf/step_duration_sec": 3.905877294950187, "perf/samples_per_sec": 8.192781693723973, "perf/tokens_per_sec": 6335.836517955848, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24747.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556520.5357444, "event": "eval_step", "step": 560, "epoch": 3, "metrics": {"eval/loss": 0.7174440165134994, "eval/duration_sec": 14.415798556990921}}
{"timestamp": 1774556540.7831445, "event": "train_step", "step": 565, "epoch": 3, "metrics": {"train/step_loss": 0.7437160909175873, "train/step_real_loss": 0.7437160909175873, "train/lr": 9.234141073574882e-06, "perf/step_duration_sec": 3.9135348049458116, "perf/samples_per_sec": 8.176751094575504, "perf/tokens_per_sec": 6378.63242418401, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24963.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556561.3175313, "event": "train_step", "step": 570, "epoch": 3, "metrics": {"train/step_loss": 0.8033476173877716, "train/step_real_loss": 0.8033476173877716, "train/lr": 8.770580002683704e-06, "perf/step_duration_sec": 3.906573449028656, "perf/samples_per_sec": 8.19132173438505, "perf/tokens_per_sec": 6257.40186865758, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24445.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556575.6935048, "event": "eval_step", "step": 570, "epoch": 3, "metrics": {"eval/loss": 0.7172568004409021, "eval/duration_sec": 14.364397315075621}}
{"timestamp": 1774556596.3656428, "event": "train_step", "step": 575, "epoch": 3, "metrics": {"train/step_loss": 0.7547231316566467, "train/step_real_loss": 0.7547231316566467, "train/lr": 8.349319884531289e-06, "perf/step_duration_sec": 4.209742725128308, "perf/samples_per_sec": 7.601414644412665, "perf/tokens_per_sec": 5992.290181873559, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25226.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556616.7898061, "event": "train_step", "step": 580, "epoch": 3, "metrics": {"train/step_loss": 0.7410679310560226, "train/step_real_loss": 0.7410679310560226, "train/lr": 7.970660313571474e-06, "perf/step_duration_sec": 4.182044423883781, "perf/samples_per_sec": 7.651759942397322, "perf/tokens_per_sec": 6591.512955378893, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27566.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556631.2305882, "event": "eval_step", "step": 580, "epoch": 3, "metrics": {"eval/loss": 0.7171879657967524, "eval/duration_sec": 14.414467524038628}}
{"timestamp": 1774556651.8706462, "event": "train_step", "step": 585, "epoch": 3, "metrics": {"train/step_loss": 0.7519723176956177, "train/step_real_loss": 0.7519723176956177, "train/lr": 7.63487058732963e-06, "perf/step_duration_sec": 4.188516626134515, "perf/samples_per_sec": 7.639936248631311, "perf/tokens_per_sec": 7093.919554861942, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29713.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556672.433407, "event": "train_step", "step": 590, "epoch": 3, "metrics": {"train/step_loss": 0.7938568443059921, "train/step_real_loss": 0.7938568443059921, "train/lr": 7.3421895148819015e-06, "perf/step_duration_sec": 4.055578886065632, "perf/samples_per_sec": 7.890365567773138, "perf/tokens_per_sec": 6769.193935377373, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27453.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556686.765385, "event": "eval_step", "step": 590, "epoch": 3, "metrics": {"eval/loss": 0.7172485385090113, "eval/duration_sec": 14.316535921068862}}
{"timestamp": 1774556706.8602645, "event": "train_step", "step": 595, "epoch": 3, "metrics": {"train/step_loss": 0.7107481211423874, "train/step_real_loss": 0.7107481211423874, "train/lr": 7.092825247017485e-06, "perf/step_duration_sec": 4.0743051478639245, "perf/samples_per_sec": 7.85409998482243, "perf/tokens_per_sec": 6842.639171152017, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27879.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556727.6577299, "event": "train_step", "step": 600, "epoch": 3, "metrics": {"train/step_loss": 0.8000398129224777, "train/step_real_loss": 0.8000398129224777, "train/lr": 6.886955128204604e-06, "perf/step_duration_sec": 4.040225179865956, "perf/samples_per_sec": 7.920350617947902, "perf/tokens_per_sec": 6641.461504106127, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26833.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556742.0445197, "event": "eval_step", "step": 600, "epoch": 3, "metrics": {"eval/loss": 0.7173968511210247, "eval/duration_sec": 14.372699263971299}}
{"timestamp": 1774556762.268666, "event": "train_step", "step": 605, "epoch": 3, "metrics": {"train/step_loss": 0.8259331583976746, "train/step_real_loss": 0.8259331583976746, "train/lr": 6.724725570465559e-06, "perf/step_duration_sec": 4.035685364855453, "perf/samples_per_sec": 7.92926036273052, "perf/tokens_per_sec": 6393.709535610487, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25803.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556783.3787, "event": "train_step", "step": 610, "epoch": 3, "metrics": {"train/step_loss": 0.6746743538162925, "train/step_real_loss": 0.692965567111969, "train/lr": 6.606251949250442e-06, "train/step_canary_loss": 0.08935546875, "perf/step_duration_sec": 4.200363246025518, "perf/samples_per_sec": 7.856463374024943, "perf/tokens_per_sec": 6404.446097716515, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26901.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556797.7666376, "event": "eval_step", "step": 610, "epoch": 3, "metrics": {"eval/loss": 0.7172234126112677, "eval/duration_sec": 14.376520012039691}}
{"timestamp": 1774556818.2980213, "event": "train_step", "step": 615, "epoch": 3, "metrics": {"train/step_loss": 0.8075433671474457, "train/step_real_loss": 0.8075433671474457, "train/lr": 6.531618521383758e-06, "perf/step_duration_sec": 4.17410640604794, "perf/samples_per_sec": 7.66631151367742, "perf/tokens_per_sec": 6564.518805821093, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27401.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556838.564295, "event": "train_step", "step": 620, "epoch": 3, "metrics": {"train/step_loss": 0.691263422369957, "train/step_real_loss": 0.691263422369957, "train/lr": 6.50087836514208e-06, "perf/step_duration_sec": 4.189996084896848, "perf/samples_per_sec": 7.637238639755865, "perf/tokens_per_sec": 6137.475901873807, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25716.0, "perf/gradient_accumulation_steps": 4.0, "system/cuda_memory_allocated_gb": 17.816345691680908, "system/cuda_max_memory_allocated_gb": 81.2615852355957}}
{"timestamp": 1774556852.9742088, "event": "eval_step", "step": 620, "epoch": 3, "metrics": {"eval/loss": 0.7171307263726538, "eval/duration_sec": 14.387907515047118}}
{"timestamp": 1774556871.8652368, "event": "train_epoch", "step": 621, "epoch": 3, "metrics": {"train/epoch_loss": 0.7704904898906849, "train/epoch_real_loss": 0.762895334188057, "train/epoch_canary_loss": 1.9868655627132747, "perf/epoch_duration_sec": 1156.3163079482038, "perf/epoch_samples_per_sec": 46.12146316143534, "perf/epoch_tokens_per_sec": 37915.522507673464, "perf/epoch_samples": 53331.0, "perf/epoch_tokens": 43842337.0, "system/cuda_epoch_peak_memory_gb": 81.2615852355957, "eval/loss": 0.7171374095434493, "eval/duration_sec": 14.38155033509247}}
{"timestamp": 1774556882.959924, "event": "audit_epoch", "step": 621, "epoch": 3, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.997368, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.52, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 7.9593279850669205}}
{"timestamp": 1774556893.0941348, "event": "audit_final", "step": 621, "epoch": 3, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.997368, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.52, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0}}
{"timestamp": 1774556893.649601, "event": "energy_final", "step": 621, "epoch": null, "metrics": {"energy/codecarbon/duration": 3685.56630371185, "energy/codecarbon/emissions": 0.16952444726420043, "energy/codecarbon/emissions_rate": 4.599685185244585e-05, "energy/codecarbon/cpu_power": 80.40813034674923, "energy/codecarbon/gpu_power": 4660.729357242909, "energy/codecarbon/ram_power": 38.0, "energy/codecarbon/cpu_energy": 0.07906645068697907, "energy/codecarbon/gpu_energy": 4.748948588600271, "energy/codecarbon/ram_energy": 0.03736530293931913, "energy/codecarbon/energy_consumed": 4.865380342226571, "energy/codecarbon/water_consumed": 0.0, "energy/codecarbon/cpu_count": 8.0, "energy/codecarbon/gpu_count": 8.0, "energy/codecarbon/longitude": 8.212, "energy/codecarbon/latitude": 47.4843, "energy/codecarbon/ram_total_size": 256.0, "energy/codecarbon/cpu_utilization_percent": 8.855084745762712, "energy/codecarbon/gpu_utilization_percent": 93.662247129579, "energy/codecarbon/ram_utilization_percent": 26.020803717878625, "energy/codecarbon/ram_used_gb": 515.4519262068776, "energy/codecarbon/pue": 1.0, "energy/codecarbon/wue": 0.0}}