File size: 19,778 Bytes
903307f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | {"timestamp": 1774096500.8353336, "event": "train_step", "step": 10, "epoch": 1, "metrics": {"train/step_loss": 1.6323321043555417, "train/step_real_loss": 1.330917976796627, "train/lr": 0.00018181818181818183, "train/step_canary_loss": 8.062500158945719, "perf/step_duration_sec": 17.560116425156593, "perf/samples_per_sec": 7.573981674145647, "perf/tokens_per_sec": 6185.665138551686, "perf/logical_batch_size": 133.0, "perf/logical_token_count": 108621.0, "perf/physical_batches": 18.0, "privacy/epsilon": 0.7131647471248268, "system/cuda_memory_allocated_gb": 16.63217782974243, "system/cuda_max_memory_allocated_gb": 74.69534254074097}}
{"timestamp": 1774096676.393492, "event": "train_step", "step": 20, "epoch": 1, "metrics": {"train/step_loss": 1.826857232699429, "train/step_real_loss": 1.3395854756236076, "train/lr": 0.00019897180218885507, "train/step_canary_loss": 8.756944444444445, "perf/step_duration_sec": 18.843991773203015, "perf/samples_per_sec": 7.217154498729827, "perf/tokens_per_sec": 5434.1989336686165, "perf/logical_batch_size": 136.0, "perf/logical_token_count": 102402.0, "perf/physical_batches": 18.0, "privacy/epsilon": 0.9542480237478311, "system/cuda_memory_allocated_gb": 16.916476249694824, "system/cuda_max_memory_allocated_gb": 74.69534397125244}}
{"timestamp": 1774096852.7017646, "event": "train_step", "step": 30, "epoch": 1, "metrics": {"train/step_loss": 1.5258880311792546, "train/step_real_loss": 1.3621462360024452, "train/lr": 0.00019544467510209388, "train/step_canary_loss": 6.765625476837158, "perf/step_duration_sec": 18.006671600043774, "perf/samples_per_sec": 7.275081309289916, "perf/tokens_per_sec": 5874.1561099910805, "perf/logical_batch_size": 131.0, "perf/logical_token_count": 105774.0, "perf/physical_batches": 18.0, "privacy/epsilon": 1.145336977893831, "system/cuda_memory_allocated_gb": 16.442769050598145, "system/cuda_max_memory_allocated_gb": 74.69534397125244}}
{"timestamp": 1774097029.0227332, "event": "train_step", "step": 40, "epoch": 1, "metrics": {"train/step_loss": 1.6870967794347693, "train/step_real_loss": 1.2007466033101082, "train/lr": 0.00018949541262593762, "train/step_canary_loss": 10.580357142857142, "perf/step_duration_sec": 17.532358843833208, "perf/samples_per_sec": 7.643010344106256, "perf/tokens_per_sec": 6436.156195815627, "perf/logical_batch_size": 134.0, "perf/logical_token_count": 112841.0, "perf/physical_batches": 18.0, "privacy/epsilon": 1.311193108872294, "system/cuda_memory_allocated_gb": 16.726882934570312, "system/cuda_max_memory_allocated_gb": 74.69534683227539}}
{"timestamp": 1774097204.3534977, "event": "train_step", "step": 50, "epoch": 1, "metrics": {"train/step_loss": 1.5818433902881763, "train/step_real_loss": 1.2425691708922386, "train/lr": 0.00018127499143005268, "train/step_canary_loss": 7.785714830671038, "perf/step_duration_sec": 17.701514894142747, "perf/samples_per_sec": 7.569973575783576, "perf/tokens_per_sec": 6216.078152520672, "perf/logical_batch_size": 134.0, "perf/logical_token_count": 110034.0, "perf/physical_batches": 18.0, "privacy/epsilon": 1.4583641061524852, "system/cuda_memory_allocated_gb": 16.726882934570312, "system/cuda_max_memory_allocated_gb": 74.69534683227539}}
{"timestamp": 1774097231.3920577, "event": "eval_step", "step": 50, "epoch": 1, "metrics": {"eval/loss": 0.8423196817266531, "eval/duration_sec": 27.033981669694185}}
{"timestamp": 1774097408.5970793, "event": "train_step", "step": 60, "epoch": 1, "metrics": {"train/step_loss": 1.6164713336112804, "train/step_real_loss": 1.3143803551793098, "train/lr": 0.0001709920242324663, "train/step_canary_loss": 9.350000381469727, "perf/step_duration_sec": 17.616566620767117, "perf/samples_per_sec": 7.549711749349289, "perf/tokens_per_sec": 5881.736335493052, "perf/logical_batch_size": 133.0, "perf/logical_token_count": 103616.0, "perf/physical_batches": 17.0, "privacy/epsilon": 1.5929146459722179, "system/cuda_memory_allocated_gb": 16.63217782974243, "system/cuda_max_memory_allocated_gb": 74.69534969329834}}
{"timestamp": 1774097584.0989442, "event": "train_step", "step": 70, "epoch": 1, "metrics": {"train/step_loss": 1.5537053346633911, "train/step_real_loss": 1.274133626371622, "train/lr": 0.00015890746575622231, "train/step_canary_loss": 10.5, "perf/step_duration_sec": 17.34652972780168, "perf/samples_per_sec": 7.609591201889827, "perf/tokens_per_sec": 6165.613622913036, "perf/logical_batch_size": 132.0, "perf/logical_token_count": 106952.0, "perf/physical_batches": 17.0, "privacy/epsilon": 1.7182200620091768, "system/cuda_memory_allocated_gb": 16.537474155426025, "system/cuda_max_memory_allocated_gb": 74.69534969329834}}
{"timestamp": 1774097758.6071105, "event": "train_step", "step": 80, "epoch": 1, "metrics": {"train/step_loss": 1.6813924115403254, "train/step_real_loss": 1.3261733427643776, "train/lr": 0.00014532799038330385, "train/step_canary_loss": 10.77500057220459, "perf/step_duration_sec": 16.975525552406907, "perf/samples_per_sec": 7.834808977748694, "perf/tokens_per_sec": 6001.228043602786, "perf/logical_batch_size": 133.0, "perf/logical_token_count": 101874.0, "perf/physical_batches": 17.0, "privacy/epsilon": 1.8360751937320303, "system/cuda_memory_allocated_gb": 16.63217782974243, "system/cuda_max_memory_allocated_gb": 74.69534969329834}}
{"timestamp": 1774097934.2494926, "event": "train_step", "step": 90, "epoch": 1, "metrics": {"train/step_loss": 1.8558794460157408, "train/step_real_loss": 1.2407655008137226, "train/lr": 0.00013059820956358998, "train/step_canary_loss": 10.604166666666666, "perf/step_duration_sec": 17.994612561538815, "perf/samples_per_sec": 7.557817626520207, "perf/tokens_per_sec": 6203.967971981345, "perf/logical_batch_size": 136.0, "perf/logical_token_count": 111638.0, "perf/physical_batches": 18.0, "privacy/epsilon": 1.9476934830264792, "system/cuda_memory_allocated_gb": 16.916476249694824, "system/cuda_max_memory_allocated_gb": 74.69534969329834}}
{"timestamp": 1774098110.4647262, "event": "train_step", "step": 100, "epoch": 1, "metrics": {"train/step_loss": 1.86213939777319, "train/step_real_loss": 1.2256366163492203, "train/lr": 0.00011509192648058249, "train/step_canary_loss": 10.009375, "perf/step_duration_sec": 17.502219082787633, "perf/samples_per_sec": 7.884714466619527, "perf/tokens_per_sec": 6282.689039593839, "perf/logical_batch_size": 138.0, "perf/logical_token_count": 109961.0, "perf/physical_batches": 18.0, "privacy/epsilon": 2.054057754637441, "system/cuda_memory_allocated_gb": 16.34807252883911, "system/cuda_max_memory_allocated_gb": 74.69534969329834}}
{"timestamp": 1774098137.4743512, "event": "eval_step", "step": 100, "epoch": 1, "metrics": {"eval/loss": 0.8425039929905157, "eval/duration_sec": 27.006801065057516}}
{"timestamp": 1774098228.5487797, "event": "train_epoch", "step": 104, "epoch": 1, "metrics": {"train/epoch_loss": 1.7094427731604633, "train/epoch_real_loss": 1.285309421313279, "train/epoch_canary_loss": 9.719154772066274, "perf/epoch_duration_sec": 1878.0538540109992, "perf/epoch_samples_per_sec": 29.64824458099586, "perf/epoch_tokens_per_sec": 23429.532069074678, "perf/epoch_samples": 55681.0, "perf/epoch_tokens": 44001923.0, "system/cuda_epoch_peak_memory_gb": 74.69534969329834, "eval/loss": 0.8425441932404178, "eval/duration_sec": 26.999734753742814, "privacy/epsilon": 2.0952814257505974}}
{"timestamp": 1774098238.4572744, "event": "audit_epoch", "step": 104, "epoch": 1, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.527168, "audit/loss/empirical_epsilon/0.05": 0.09125839080661535, "audit/loss/empirical_epsilon/0.01": 0.0, "audit/loss/empirical_epsilon_details/0.05/epsilon": 0.09125839080661535, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 61.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 0.0, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 0.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 0.0, "audit/embedding/auc": 0.514792, "audit/embedding/empirical_epsilon/0.05": 0.0, "audit/embedding/empirical_epsilon/0.01": 0.0, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 0.0, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 0.0, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 0.0, "perf/audit_duration_sec": 5.7471471559256315}}
{"timestamp": 1774098344.630848, "event": "train_step", "step": 110, "epoch": 2, "metrics": {"train/step_loss": 1.570878519940732, "train/step_real_loss": 1.2690251618623734, "train/lr": 9.920264990753837e-05, "train/step_canary_loss": 8.010416825612387, "perf/step_duration_sec": 17.30536743812263, "perf/samples_per_sec": 7.685476802243991, "perf/tokens_per_sec": 6186.173184867881, "perf/logical_batch_size": 133.0, "perf/logical_token_count": 107054.0, "perf/physical_batches": 18.0, "privacy/epsilon": 2.155895236118489, "system/cuda_memory_allocated_gb": 16.63217782974243, "system/cuda_max_memory_allocated_gb": 74.69534254074097}}
{"timestamp": 1774098520.7483177, "event": "train_step", "step": 120, "epoch": 2, "metrics": {"train/step_loss": 1.733701615437974, "train/step_real_loss": 1.2730829790234566, "train/lr": 8.333360798744496e-05, "train/step_canary_loss": 8.284722222222221, "perf/step_duration_sec": 18.148367216810584, "perf/samples_per_sec": 7.493787092539381, "perf/tokens_per_sec": 5505.233545608104, "perf/logical_batch_size": 136.0, "perf/logical_token_count": 99911.0, "perf/physical_batches": 18.0, "privacy/epsilon": 2.2538737991591966, "system/cuda_memory_allocated_gb": 16.916476249694824, "system/cuda_max_memory_allocated_gb": 74.69534254074097}}
{"timestamp": 1774098695.815589, "event": "train_step", "step": 130, "epoch": 2, "metrics": {"train/step_loss": 1.4695805564071194, "train/step_real_loss": 1.2108174487948418, "train/lr": 6.788751536089739e-05, "train/step_canary_loss": 9.75, "perf/step_duration_sec": 16.7934517133981, "perf/samples_per_sec": 7.860206600331495, "perf/tokens_per_sec": 6584.88807942771, "perf/logical_batch_size": 132.0, "perf/logical_token_count": 110583.0, "perf/physical_batches": 17.0, "privacy/epsilon": 2.34829265801524, "system/cuda_memory_allocated_gb": 16.537474155426025, "system/cuda_max_memory_allocated_gb": 74.69534254074097}}
{"timestamp": 1774098871.6479974, "event": "train_step", "step": 140, "epoch": 2, "metrics": {"train/step_loss": 1.409767813354958, "train/step_real_loss": 1.280211590230465, "train/lr": 5.325635332531864e-05, "train/step_canary_loss": 6.9375, "perf/step_duration_sec": 17.296851823106408, "perf/samples_per_sec": 7.515818562215838, "perf/tokens_per_sec": 5869.218343212228, "perf/logical_batch_size": 130.0, "perf/logical_token_count": 101519.0, "perf/physical_batches": 18.0, "privacy/epsilon": 2.4397183333948855, "system/cuda_memory_allocated_gb": 16.348063945770264, "system/cuda_max_memory_allocated_gb": 74.69534254074097}}
{"timestamp": 1774099047.329602, "event": "train_step", "step": 150, "epoch": 2, "metrics": {"train/step_loss": 1.470641034250041, "train/step_real_loss": 1.2892888560891151, "train/lr": 3.981142237826332e-05, "train/step_canary_loss": 9.208333969116211, "perf/step_duration_sec": 17.535365080460906, "perf/samples_per_sec": 7.4706172012334715, "perf/tokens_per_sec": 5992.347437184814, "perf/logical_batch_size": 131.0, "perf/logical_token_count": 105078.0, "perf/physical_batches": 17.0, "privacy/epsilon": 2.5283910517887054, "system/cuda_memory_allocated_gb": 16.442769050598145, "system/cuda_max_memory_allocated_gb": 74.69534254074097}}
{"timestamp": 1774099074.3730319, "event": "eval_step", "step": 150, "epoch": 2, "metrics": {"eval/loss": 0.842570746081999, "eval/duration_sec": 27.040043698623776}}
{"timestamp": 1774099249.8653634, "event": "train_step", "step": 160, "epoch": 2, "metrics": {"train/step_loss": 1.536541504646415, "train/step_real_loss": 1.2477270364761353, "train/lr": 2.789391958515183e-05, "train/step_canary_loss": 7.697916825612386, "perf/step_duration_sec": 17.38564824871719, "perf/samples_per_sec": 7.649987972683935, "perf/tokens_per_sec": 6056.662282222899, "perf/logical_batch_size": 133.0, "perf/logical_token_count": 105299.0, "perf/physical_batches": 18.0, "privacy/epsilon": 2.6145698431381854, "system/cuda_memory_allocated_gb": 16.63217782974243, "system/cuda_max_memory_allocated_gb": 74.69534540176392}}
{"timestamp": 1774099424.502899, "event": "train_step", "step": 170, "epoch": 2, "metrics": {"train/step_loss": 1.4997789441174223, "train/step_real_loss": 1.2844417318701744, "train/lr": 1.7806279893114875e-05, "train/step_canary_loss": 10.6875, "perf/step_duration_sec": 17.294984631240368, "perf/samples_per_sec": 7.574450211616343, "perf/tokens_per_sec": 5730.794337970551, "perf/logical_batch_size": 131.0, "perf/logical_token_count": 99114.0, "perf/physical_batches": 17.0, "privacy/epsilon": 2.6985357679843074, "system/cuda_memory_allocated_gb": 16.442769050598145, "system/cuda_max_memory_allocated_gb": 74.69534540176392}}
{"timestamp": 1774099602.7806635, "event": "train_step", "step": 180, "epoch": 2, "metrics": {"train/step_loss": 1.5966231734664351, "train/step_real_loss": 1.2403349727392197, "train/lr": 9.804501125681243e-06, "train/step_canary_loss": 8.111607415335518, "perf/step_duration_sec": 17.99831911176443, "perf/samples_per_sec": 7.445139691539982, "perf/tokens_per_sec": 5983.892125215344, "perf/logical_batch_size": 134.0, "perf/logical_token_count": 107700.0, "perf/physical_batches": 18.0, "privacy/epsilon": 2.780402267783889, "system/cuda_memory_allocated_gb": 16.726882934570312, "system/cuda_max_memory_allocated_gb": 74.69534540176392}}
{"timestamp": 1774099786.8860722, "event": "train_step", "step": 190, "epoch": 2, "metrics": {"train/step_loss": 1.301622830904447, "train/step_real_loss": 1.246277093887329, "train/lr": 4.091647429802869e-06, "train/step_canary_loss": 4.84375, "perf/step_duration_sec": 17.514903953298926, "perf/samples_per_sec": 7.3651560033649455, "perf/tokens_per_sec": 6243.596898480441, "perf/logical_batch_size": 129.0, "perf/logical_token_count": 109356.0, "perf/physical_batches": 18.0, "privacy/epsilon": 2.860377969759561, "system/cuda_memory_allocated_gb": 16.253358840942383, "system/cuda_max_memory_allocated_gb": 74.69534540176392}}
{"timestamp": 1774099963.4199688, "event": "train_step", "step": 200, "epoch": 2, "metrics": {"train/step_loss": 1.5310632604541201, "train/step_real_loss": 1.2371121123433113, "train/lr": 8.126960406835249e-07, "train/step_canary_loss": 10.9375, "perf/step_duration_sec": 16.793452875688672, "perf/samples_per_sec": 7.860206056319249, "perf/tokens_per_sec": 6343.96039865215, "perf/logical_batch_size": 132.0, "perf/logical_token_count": 106537.0, "perf/physical_batches": 17.0, "privacy/epsilon": 2.938565800812133, "system/cuda_memory_allocated_gb": 16.537474155426025, "system/cuda_max_memory_allocated_gb": 74.69534540176392}}
{"timestamp": 1774099990.6645026, "event": "eval_step", "step": 200, "epoch": 2, "metrics": {"eval/loss": 0.8425621900750303, "eval/duration_sec": 27.24077794700861}}
{"timestamp": 1774100151.6417675, "event": "train_epoch", "step": 208, "epoch": 2, "metrics": {"train/epoch_loss": 1.6631105757675884, "train/epoch_real_loss": 1.2492633356516678, "train/epoch_canary_loss": 9.74998009501868, "perf/epoch_duration_sec": 1886.0955602359027, "perf/epoch_samples_per_sec": 29.488961838732855, "perf/epoch_tokens_per_sec": 23327.959583605018, "perf/epoch_samples": 55619.0, "perf/epoch_tokens": 43998761.0, "system/cuda_epoch_peak_memory_gb": 74.69534540176392, "eval/loss": 0.8425623961221212, "eval/duration_sec": 27.067450830712914, "privacy/epsilon": 2.9999680995370417}}
{"timestamp": 1774100161.8989484, "event": "audit_epoch", "step": 208, "epoch": 2, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.525808, "audit/loss/empirical_epsilon/0.05": 0.05073561053723097, "audit/loss/empirical_epsilon/0.01": 0.0, "audit/loss/empirical_epsilon_details/0.05/epsilon": 0.05073561053723097, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 60.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 0.0, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 0.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 0.0, "audit/embedding/auc": 0.520928, "audit/embedding/empirical_epsilon/0.05": 0.0, "audit/embedding/empirical_epsilon/0.01": 0.0, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 0.0, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 0.0, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 0.0, "perf/audit_duration_sec": 5.841585950925946}}
{"timestamp": 1774100172.4001799, "event": "audit_final", "step": 208, "epoch": 2, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.525808, "audit/loss/empirical_epsilon/0.05": 0.05073561053723097, "audit/loss/empirical_epsilon/0.01": 0.0, "audit/loss/empirical_epsilon_details/0.05/epsilon": 0.05073561053723097, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 60.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 0.0, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 0.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 0.0, "audit/embedding/auc": 0.520928, "audit/embedding/empirical_epsilon/0.05": 0.0, "audit/embedding/empirical_epsilon/0.01": 0.0, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 0.0, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 0.0, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 0.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 0.0}}
{"timestamp": 1774100172.924808, "event": "energy_final", "step": 208, "epoch": null, "metrics": {"energy/codecarbon/duration": 3938.0607158355415, "energy/codecarbon/emissions": 0.5702959606126596, "energy/codecarbon/emissions_rate": 0.00014481644691749236, "energy/codecarbon/cpu_power": 179.92904019353898, "energy/codecarbon/gpu_power": 2298.550082608512, "energy/codecarbon/ram_power": 70.0, "energy/codecarbon/cpu_energy": 0.18975838183590477, "energy/codecarbon/gpu_energy": 2.514705143984827, "energy/codecarbon/ram_energy": 0.07382037755078118, "energy/codecarbon/energy_consumed": 2.778283903371513, "energy/codecarbon/water_consumed": 0.0, "energy/codecarbon/cpu_count": 224.0, "energy/codecarbon/gpu_count": 4.0, "energy/codecarbon/longitude": -121.9552, "energy/codecarbon/latitude": 37.3541, "energy/codecarbon/ram_total_size": 2015.5625190734863, "energy/codecarbon/cpu_utilization_percent": 2.4580389144905275, "energy/codecarbon/gpu_utilization_percent": 95.87682458386683, "energy/codecarbon/ram_utilization_percent": 2.36410650281618, "energy/codecarbon/ram_used_gb": 47.93522234837092, "energy/codecarbon/pue": 1.0, "energy/codecarbon/wue": 0.0}}
|