kajuma commited on
Commit
a7ee9cc
·
verified ·
1 Parent(s): 71ec238

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -2192,3 +2192,4 @@ checkpoints/iter_0012700/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
2192
  checkpoints/iter_0012700/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
2193
  checkpoints/iter_0012700/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
2194
  checkpoints/iter_0012700/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
 
 
2192
  checkpoints/iter_0012700/__6_1.distcp filter=lfs diff=lfs merge=lfs -text
2193
  checkpoints/iter_0012700/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
2194
  checkpoints/iter_0012700/__7_1.distcp filter=lfs diff=lfs merge=lfs -text
2195
+ wandb/wandb/run-20251224_123436-jnwx1i3g/run-jnwx1i3g.wandb filter=lfs diff=lfs merge=lfs -text
args.json ADDED
@@ -0,0 +1,654 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "use_ray": false,
3
+ "ray_exp_name": null,
4
+ "device_groups": null,
5
+ "model": "Qwen/Qwen3-0.6B-Base",
6
+ "model_type": "qwen3",
7
+ "model_revision": null,
8
+ "task_type": "causal_lm",
9
+ "torch_dtype": "bfloat16",
10
+ "attn_impl": null,
11
+ "new_special_tokens": [],
12
+ "num_labels": null,
13
+ "problem_type": null,
14
+ "rope_scaling": null,
15
+ "device_map": null,
16
+ "max_memory": {},
17
+ "max_model_len": null,
18
+ "local_repo_path": null,
19
+ "init_strategy": null,
20
+ "template": "qwen3",
21
+ "system": null,
22
+ "max_length": 4096,
23
+ "truncation_strategy": "right",
24
+ "max_pixels": null,
25
+ "agent_template": null,
26
+ "norm_bbox": null,
27
+ "use_chat_template": false,
28
+ "padding_free": true,
29
+ "padding_side": "right",
30
+ "loss_scale": "all",
31
+ "sequence_parallel_size": 1,
32
+ "response_prefix": null,
33
+ "template_backend": "swift",
34
+ "dataset": [],
35
+ "val_dataset": [],
36
+ "cached_dataset": [
37
+ "/workspace/1of3"
38
+ ],
39
+ "cached_val_dataset": [],
40
+ "split_dataset_ratio": 0.0,
41
+ "data_seed": 42,
42
+ "dataset_num_proc": 32,
43
+ "load_from_cache_file": false,
44
+ "dataset_shuffle": true,
45
+ "val_dataset_shuffle": false,
46
+ "streaming": false,
47
+ "interleave_prob": null,
48
+ "stopping_strategy": "first_exhausted",
49
+ "shuffle_buffer_size": 1000,
50
+ "download_mode": "reuse_dataset_if_exists",
51
+ "columns": {},
52
+ "strict": false,
53
+ "remove_unused_columns": true,
54
+ "model_name": null,
55
+ "model_author": null,
56
+ "custom_dataset_info": [],
57
+ "quant_method": null,
58
+ "quant_bits": null,
59
+ "hqq_axis": null,
60
+ "bnb_4bit_compute_dtype": "bfloat16",
61
+ "bnb_4bit_quant_type": "nf4",
62
+ "bnb_4bit_use_double_quant": true,
63
+ "bnb_4bit_quant_storage": null,
64
+ "max_new_tokens": null,
65
+ "temperature": null,
66
+ "top_k": 50,
67
+ "top_p": 0.9,
68
+ "repetition_penalty": 1.0,
69
+ "num_beams": 1,
70
+ "stream": false,
71
+ "stop_words": [],
72
+ "logprobs": false,
73
+ "top_logprobs": null,
74
+ "ckpt_dir": "/workspace/halcyon-recipe2/patch",
75
+ "lora_modules": [],
76
+ "tuner_backend": "peft",
77
+ "train_type": "full",
78
+ "adapters": [],
79
+ "external_plugins": [],
80
+ "seed": 42,
81
+ "model_kwargs": {},
82
+ "load_args": false,
83
+ "load_data_args": false,
84
+ "packing": true,
85
+ "packing_length": 4096,
86
+ "packing_num_proc": 1,
87
+ "lazy_tokenize": false,
88
+ "custom_register_path": [],
89
+ "use_hf": true,
90
+ "hub_token": null,
91
+ "ddp_timeout": 18000000,
92
+ "ddp_backend": null,
93
+ "ignore_args_error": false,
94
+ "use_swift_lora": false,
95
+ "freeze_llm": false,
96
+ "freeze_vit": true,
97
+ "freeze_aligner": true,
98
+ "freeze_parameters": [],
99
+ "freeze_parameters_regex": null,
100
+ "freeze_parameters_ratio": 0.0,
101
+ "trainable_parameters": [],
102
+ "trainable_parameters_regex": null,
103
+ "adapter_load": null,
104
+ "target_modules": [
105
+ "all-linear"
106
+ ],
107
+ "target_regex": null,
108
+ "modules_to_save": [],
109
+ "lora_rank": 8,
110
+ "lora_alpha": 32,
111
+ "lora_dropout": 0.05,
112
+ "lora_bias": "none",
113
+ "lora_dtype": null,
114
+ "use_rslora": false,
115
+ "rlhf_type": null,
116
+ "ref_load": null,
117
+ "ref_adapter_load": null,
118
+ "beta": 0.1,
119
+ "rpo_alpha": null,
120
+ "reference_free": false,
121
+ "label_smoothing": 0.0,
122
+ "f_divergence_type": "reverse_kl",
123
+ "loss_type": null,
124
+ "desirable_weight": 1.0,
125
+ "undesirable_weight": 1.0,
126
+ "calculate_KL": null,
127
+ "center_rewards_coefficient": null,
128
+ "generation_batch_size": null,
129
+ "steps_per_generation": null,
130
+ "num_generations": 8,
131
+ "max_completion_length": 512,
132
+ "importance_sampling_level": "token",
133
+ "tau_pos": 1.0,
134
+ "tau_neg": 1.05,
135
+ "epsilon": 0.2,
136
+ "epsilon_high": null,
137
+ "delta": null,
138
+ "use_vllm": true,
139
+ "vllm_mode": null,
140
+ "vllm_enable_prefix_caching": true,
141
+ "vllm_gpu_memory_utilization": 0.9,
142
+ "vllm_tensor_parallel_size": 1,
143
+ "vllm_max_model_len": null,
144
+ "vllm_enforce_eager": false,
145
+ "vllm_limit_mm_per_prompt": null,
146
+ "vllm_disable_cascade_attn": false,
147
+ "vllm_max_num_seqs": null,
148
+ "vllm_mm_processor_cache_gb": null,
149
+ "vllm_engine_kwargs": null,
150
+ "sleep_level": 0,
151
+ "offload_optimizer": false,
152
+ "offload_model": false,
153
+ "offload_bridge": false,
154
+ "vllm_server_base_url": null,
155
+ "vllm_server_host": null,
156
+ "vllm_server_port": [
157
+ 8000
158
+ ],
159
+ "vllm_server_timeout": 240.0,
160
+ "vllm_server_group_port": null,
161
+ "reward_funcs": [],
162
+ "reward_weights": null,
163
+ "cosine_min_len_value_wrong": -0.5,
164
+ "cosine_max_len_value_wrong": 0.0,
165
+ "cosine_min_len_value_correct": 1.0,
166
+ "cosine_max_len_value_correct": 0.5,
167
+ "cosine_max_len": null,
168
+ "repetition_n_grams": 3,
169
+ "repetition_max_penalty": -1.0,
170
+ "soft_max_length": null,
171
+ "soft_cache_length": null,
172
+ "dynamic_sample": false,
173
+ "max_resample_times": 3,
174
+ "overlong_filter": false,
175
+ "scale_rewards": "group",
176
+ "advantage_estimator": "grpo",
177
+ "kl_in_reward": false,
178
+ "wandb_log_unique_prompts": null,
179
+ "log_completions": false,
180
+ "rollout_importance_sampling_mode": null,
181
+ "rollout_importance_sampling_threshold": 2.0,
182
+ "log_rollout_offpolicy_metrics": false,
183
+ "off_policy_sequence_mask_delta": null,
184
+ "reward_model": null,
185
+ "reward_model_plugin": null,
186
+ "sync_ref_model": false,
187
+ "ref_model_sync_steps": 512,
188
+ "ref_model_mixup_alpha": 0.6,
189
+ "async_generate": false,
190
+ "move_model_batches": null,
191
+ "multi_turn_scheduler": null,
192
+ "max_turns": null,
193
+ "completion_length_limit_scope": "per_round",
194
+ "vllm_server_pass_dataset": false,
195
+ "log_entropy": false,
196
+ "top_entropy_quantile": 1.0,
197
+ "num_iterations": 1,
198
+ "check_model": true,
199
+ "padded_vocab_size": 151936,
200
+ "initialize_embedding": false,
201
+ "mlp_padding_free": false,
202
+ "load_safetensors": false,
203
+ "save_safetensors": false,
204
+ "ref_model": null,
205
+ "ref_adapters": [],
206
+ "merge_lora": false,
207
+ "max_shard_size": "5GB",
208
+ "train_dataloader_shuffle": true,
209
+ "dataloader_pin_memory": true,
210
+ "dataloader_persistent_workers": true,
211
+ "dataloader_prefetch_factor": 10,
212
+ "architectures": "Qwen3ForCausalLM",
213
+ "llm_architectures": "Qwen3ForCausalLM",
214
+ "max_epochs": null,
215
+ "enable_dft_loss": false,
216
+ "enable_channel_loss": false,
217
+ "patch_size": 1,
218
+ "save_strategy": "steps",
219
+ "original_max_position_embeddings": null,
220
+ "partial_rotary_factor": null,
221
+ "use_shared_expert_gate": false,
222
+ "vit_gradient_checkpointing": true,
223
+ "vit_lr": null,
224
+ "aligner_lr": null,
225
+ "gradient_checkpointing_kwargs": null,
226
+ "linear_num_value_heads": null,
227
+ "linear_num_key_heads": null,
228
+ "linear_key_head_dim": null,
229
+ "linear_value_head_dim": null,
230
+ "linear_conv_kernel_dim": null,
231
+ "layer_types": null,
232
+ "mrope_interleaved": false,
233
+ "micro_batch_size": 4,
234
+ "global_batch_size": 256,
235
+ "recompute_granularity": "full",
236
+ "recompute_method": "uniform",
237
+ "recompute_num_layers": 1,
238
+ "recompute_modules": [
239
+ "core_attn"
240
+ ],
241
+ "use_cpu_initialization": false,
242
+ "deterministic_mode": false,
243
+ "train_iters": 12700,
244
+ "log_interval": 1,
245
+ "tensorboard_dir": "/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/runs",
246
+ "no_masked_softmax_fusion": false,
247
+ "no_bias_dropout_fusion": false,
248
+ "no_bias_swiglu_fusion": false,
249
+ "no_rope_fusion": false,
250
+ "no_gradient_accumulation_fusion": false,
251
+ "cross_entropy_loss_fusion": true,
252
+ "cross_entropy_fusion_impl": "native",
253
+ "calculate_per_token_loss": true,
254
+ "use_flash_attn": false,
255
+ "attention_backend": "flash",
256
+ "optimizer": "adam",
257
+ "optimizer_cpu_offload": false,
258
+ "optimizer_offload_fraction": 1.0,
259
+ "use_precision_aware_optimizer": true,
260
+ "main_grads_dtype": "fp32",
261
+ "main_params_dtype": "fp32",
262
+ "exp_avg_dtype": "fp32",
263
+ "exp_avg_sq_dtype": "fp32",
264
+ "dataloader_type": "cyclic",
265
+ "manual_gc": false,
266
+ "manual_gc_interval": 0,
267
+ "lr": 0.0001,
268
+ "lr_decay_style": "cosine",
269
+ "lr_decay_iters": null,
270
+ "lr_warmup_iters": 0,
271
+ "lr_warmup_fraction": 0.05,
272
+ "min_lr": 3e-06,
273
+ "weight_decay": 0.1,
274
+ "clip_grad": 1.0,
275
+ "adam_beta1": 0.9,
276
+ "adam_beta2": 0.95,
277
+ "adam_eps": 1e-08,
278
+ "sgd_momentum": 0.9,
279
+ "save": "/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139",
280
+ "save_interval": 100,
281
+ "save_retain_interval": null,
282
+ "no_save_optim": false,
283
+ "no_save_rng": false,
284
+ "load": "/workspace/halcyon-recipe2/patch",
285
+ "no_load_optim": false,
286
+ "no_load_rng": false,
287
+ "finetune": true,
288
+ "ckpt_format": "torch_dist",
289
+ "no_initialization": true,
290
+ "auto_detect_ckpt_format": true,
291
+ "exit_on_missing_checkpoint": true,
292
+ "async_save": false,
293
+ "use_persistent_ckpt_worker": false,
294
+ "ckpt_fully_parallel_load": false,
295
+ "ckpt_assume_constant_structure": false,
296
+ "distributed_backend": "nccl",
297
+ "local_rank": 0,
298
+ "use_distributed_optimizer": true,
299
+ "tensor_model_parallel_size": 1,
300
+ "pipeline_model_parallel_size": 1,
301
+ "decoder_first_pipeline_num_layers": null,
302
+ "decoder_last_pipeline_num_layers": null,
303
+ "account_for_embedding_in_pipeline_split": false,
304
+ "account_for_loss_in_pipeline_split": false,
305
+ "sequence_parallel": false,
306
+ "context_parallel_size": 1,
307
+ "tp_comm_overlap": false,
308
+ "overlap_grad_reduce": true,
309
+ "overlap_param_gather": true,
310
+ "distributed_timeout_minutes": 300000,
311
+ "num_layers_per_virtual_pipeline_stage": null,
312
+ "num_virtual_stages_per_pipeline_rank": null,
313
+ "microbatch_group_size_per_virtual_pipeline_stage": null,
314
+ "pipeline_model_parallel_layout": null,
315
+ "num_layers": 28,
316
+ "hidden_size": 1024,
317
+ "ffn_hidden_size": 3072,
318
+ "num_attention_heads": 16,
319
+ "group_query_attention": true,
320
+ "num_query_groups": 8,
321
+ "softmax_type": null,
322
+ "window_size": null,
323
+ "window_attn_skip_freq": null,
324
+ "max_position_embeddings": 32768,
325
+ "position_embedding_type": "rope",
326
+ "mrope_section": null,
327
+ "rotary_base": 1000000,
328
+ "rotary_percent": 1.0,
329
+ "rotary_interleaved": false,
330
+ "normalization": "RMSNorm",
331
+ "norm_epsilon": 1e-06,
332
+ "swiglu": true,
333
+ "quick_geglu": false,
334
+ "activation_func_clamp_value": null,
335
+ "glu_linear_offset": null,
336
+ "untie_embeddings_and_output_weights": false,
337
+ "disable_bias_linear": true,
338
+ "add_qkv_bias": false,
339
+ "attention_dropout": 0.0,
340
+ "hidden_dropout": 0.0,
341
+ "kv_channels": 128,
342
+ "qk_layernorm": true,
343
+ "qk_l2_norm": null,
344
+ "no_rope_freq": null,
345
+ "moe_apply_probs_on_input": null,
346
+ "transformer_impl": "transformer_engine",
347
+ "num_experts": null,
348
+ "moe_layer_freq": "1",
349
+ "moe_ffn_hidden_size": null,
350
+ "moe_shared_expert_intermediate_size": null,
351
+ "moe_router_topk": 2,
352
+ "moe_router_num_groups": null,
353
+ "moe_router_group_topk": null,
354
+ "moe_router_pre_softmax": false,
355
+ "moe_router_dtype": "fp32",
356
+ "moe_router_score_function": "softmax",
357
+ "moe_router_bias_update_rate": null,
358
+ "moe_router_enable_expert_bias": false,
359
+ "moe_router_topk_scaling_factor": null,
360
+ "moe_router_load_balancing_type": "aux_loss",
361
+ "expert_model_parallel_size": 1,
362
+ "expert_tensor_parallel_size": 1,
363
+ "moe_token_dispatcher_type": null,
364
+ "moe_enable_deepep": false,
365
+ "moe_grouped_gemm": true,
366
+ "moe_permute_fusion": false,
367
+ "moe_aux_loss_coeff": 0.0,
368
+ "moe_z_loss_coeff": null,
369
+ "moe_shared_expert_overlap": false,
370
+ "moe_layer_recompute": false,
371
+ "moe_expert_capacity_factor": null,
372
+ "moe_pad_expert_input_to_capacity": false,
373
+ "moe_token_drop_policy": null,
374
+ "multi_latent_attention": false,
375
+ "q_lora_rank": null,
376
+ "kv_lora_rank": 32,
377
+ "qk_head_dim": 128,
378
+ "qk_pos_emb_head_dim": 64,
379
+ "mtp_num_layers": null,
380
+ "mtp_loss_scaling_factor": 0.1,
381
+ "fp8_format": null,
382
+ "fp8_recipe": "delayed",
383
+ "fp8_amax_history_len": 1024,
384
+ "fp8_amax_compute_algo": "max",
385
+ "fp8_param_gather": false,
386
+ "fp16": false,
387
+ "bf16": true,
388
+ "apply_query_key_layer_scaling": false,
389
+ "attention_softmax_in_fp32": true,
390
+ "log_params_norm": false,
391
+ "log_throughput": false,
392
+ "tensorboard_log_interval": 1,
393
+ "tensorboard_queue_size": 50,
394
+ "log_timers_to_tensorboard": true,
395
+ "no_log_learning_rate_to_tensorboard": false,
396
+ "log_validation_ppl_to_tensorboard": true,
397
+ "log_memory_to_tensorboard": true,
398
+ "logging_level": "20",
399
+ "wandb_project": "plt",
400
+ "wandb_exp_name": "tlt",
401
+ "wandb_save_dir": null,
402
+ "eval_iters": -1,
403
+ "eval_interval": 100,
404
+ "seq_length": 4096,
405
+ "num_workers": 32,
406
+ "no_data_sharding": false,
407
+ "megatron_extra_kwargs": {},
408
+ "add_version": true,
409
+ "rank": 0,
410
+ "global_world_size": 8,
411
+ "local_world_size": 8,
412
+ "model_suffix": "Qwen3-0.6B-Base",
413
+ "model_info": "ModelInfo(model_type='qwen3', model_dir='/workspace/.hf_home/hub/models--Qwen--Qwen3-0.6B-Base/snapshots/da87bfb608c14b7cf20ba1ce41287e8de496c0cd', torch_dtype=torch.bfloat16, max_model_len=32768, quant_method=None, quant_bits=None, rope_scaling=None, is_moe_model=False, is_multimodal=False, config=None, task_type='causal_lm', num_labels=None)",
414
+ "model_meta": "ModelMeta(model_type='qwen3', model_groups=[ModelGroup(models=[Model(ms_model_id='Qwen/Qwen3-0.6B-Base', hf_model_id='Qwen/Qwen3-0.6B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B-Base', hf_model_id='Qwen/Qwen3-1.7B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-Base', hf_model_id='Qwen/Qwen3-4B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-Base', hf_model_id='Qwen/Qwen3-8B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-Base', hf_model_id='Qwen/Qwen3-14B-Base', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-0.6B', hf_model_id='Qwen/Qwen3-0.6B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B', hf_model_id='Qwen/Qwen3-1.7B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B', hf_model_id='Qwen/Qwen3-4B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B', hf_model_id='Qwen/Qwen3-8B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B', hf_model_id='Qwen/Qwen3-14B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B', hf_model_id='Qwen/Qwen3-32B', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-0.6B-FP8', hf_model_id='Qwen/Qwen3-0.6B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-1.7B-FP8', hf_model_id='Qwen/Qwen3-1.7B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-FP8', hf_model_id='Qwen/Qwen3-4B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-FP8', hf_model_id='Qwen/Qwen3-8B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-FP8', hf_model_id='Qwen/Qwen3-14B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B-FP8', hf_model_id='Qwen/Qwen3-32B-FP8', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-4B-AWQ', hf_model_id='Qwen/Qwen3-4B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-8B-AWQ', hf_model_id='Qwen/Qwen3-8B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-14B-AWQ', hf_model_id='Qwen/Qwen3-14B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='Qwen/Qwen3-32B-AWQ', hf_model_id='Qwen/Qwen3-32B-AWQ', model_path=None, ms_revision=None, hf_revision=None), Model(ms_model_id='swift/Qwen3-32B-AWQ', hf_model_id=None, model_path=None, ms_revision=None, hf_revision=None)], ignore_patterns=None, requires=None, tags=[])], template='qwen3', get_function=<function get_model_tokenizer_with_flash_attn at 0x7cde0db1be20>, model_arch=ModelKeys(arch_name='llama', embedding='model.embed_tokens', module_list='model.layers', lm_head='lm_head', q_proj='model.layers.{}.self_attn.q_proj', k_proj='model.layers.{}.self_attn.k_proj', v_proj='model.layers.{}.self_attn.v_proj', o_proj='model.layers.{}.self_attn.o_proj', attention='model.layers.{}.self_attn', mlp='model.layers.{}.mlp', down_proj='model.layers.{}.mlp.down_proj', qkv_proj=None, qk_proj=None, qa_proj=None, qb_proj=None, kv_proj=None, kva_proj=None, kvb_proj=None), architectures=['Qwen3ForCausalLM'], additional_saved_files=[], torch_dtype=None, is_multimodal=False, is_reward=False, is_reranker=False, task_type=None, ignore_patterns=None, requires=['transformers>=4.51'], tags=[])",
415
+ "model_dir": "/workspace/.hf_home/hub/models--Qwen--Qwen3-0.6B-Base/snapshots/da87bfb608c14b7cf20ba1ce41287e8de496c0cd",
416
+ "_val_dataset_exists": [],
417
+ "hub": "<class 'swift.hub.hub.HFHub'>",
418
+ "megatron_model_meta": "MegatronModelMeta(megatron_model_type='gpt', model_types=['qwen2', 'qwen2_5', 'qwq', 'qwq_preview', 'qwen2_5_math', 'llama', 'llama3', 'llama3_1', 'llama3_2', 'longwriter_llama3_1', 'codefuse_codellama', 'marco_o1', 'deepseek', 'deepseek_r1_distill', 'yi', 'yi_coder', 'sus', 'skywork_o1', 'openbuddy_llama', 'openbuddy_llama3', 'megrez', 'reflection', 'numina', 'ziya', 'mengzi3', 'qwen3', 'qwen3_thinking', 'qwen3_nothinking', 'qwen2_moe', 'qwen3_moe', 'qwen3_moe_thinking', 'qwen3_coder', 'internlm3', 'mimo', 'mimo_rl', 'moonlight', 'kimi_k2', 'deepseek_moe', 'deepseek_v2', 'deepseek_v2_5', 'deepseek_r1', 'dots1', 'ernie', 'glm4_5', 'deepseek_v3_1', 'ernie_thinking', 'gpt_oss'], is_multimodal=False, bridge_cls=<class 'swift.megatron.model.gpt_bridge.GPTBridge'>, model_cls=<class 'swift.megatron.model.gpt_model.GPTModel'>, get_transformer_layer_spec=None, model_provider=<function model_provider at 0x7cdd9129dc60>, visual_cls=None, extra_args_provider=None)",
419
+ "extra_args": {
420
+ "model_dir": "/workspace/.hf_home/hub/models--Qwen--Qwen3-0.6B-Base/snapshots/da87bfb608c14b7cf20ba1ce41287e8de496c0cd",
421
+ "is_multimodal": false,
422
+ "hf_model_type": "qwen3",
423
+ "use_ray": false,
424
+ "ray_exp_name": null,
425
+ "device_groups": null,
426
+ "model": "Qwen/Qwen3-0.6B-Base",
427
+ "model_type": "qwen3",
428
+ "model_revision": null,
429
+ "task_type": "causal_lm",
430
+ "torch_dtype": "bfloat16",
431
+ "attn_impl": null,
432
+ "new_special_tokens": [],
433
+ "num_labels": null,
434
+ "problem_type": null,
435
+ "rope_scaling": null,
436
+ "device_map": null,
437
+ "max_memory": {},
438
+ "max_model_len": null,
439
+ "local_repo_path": null,
440
+ "init_strategy": null,
441
+ "template": "qwen3",
442
+ "system": null,
443
+ "max_length": 4096,
444
+ "truncation_strategy": "right",
445
+ "max_pixels": null,
446
+ "agent_template": null,
447
+ "norm_bbox": null,
448
+ "use_chat_template": false,
449
+ "padding_free": true,
450
+ "padding_side": "right",
451
+ "sequence_parallel_size": 1,
452
+ "response_prefix": null,
453
+ "template_backend": "swift",
454
+ "dataset": [],
455
+ "val_dataset": [],
456
+ "cached_dataset": [
457
+ "/workspace/1of3"
458
+ ],
459
+ "cached_val_dataset": [],
460
+ "split_dataset_ratio": 0.0,
461
+ "data_seed": 42,
462
+ "dataset_num_proc": 32,
463
+ "load_from_cache_file": false,
464
+ "dataset_shuffle": true,
465
+ "val_dataset_shuffle": false,
466
+ "streaming": false,
467
+ "interleave_prob": null,
468
+ "stopping_strategy": "first_exhausted",
469
+ "shuffle_buffer_size": 1000,
470
+ "download_mode": "reuse_dataset_if_exists",
471
+ "columns": {},
472
+ "strict": false,
473
+ "remove_unused_columns": true,
474
+ "model_name": null,
475
+ "model_author": null,
476
+ "custom_dataset_info": [],
477
+ "quant_method": null,
478
+ "quant_bits": null,
479
+ "hqq_axis": null,
480
+ "bnb_4bit_compute_dtype": "bfloat16",
481
+ "bnb_4bit_quant_type": "nf4",
482
+ "bnb_4bit_use_double_quant": true,
483
+ "bnb_4bit_quant_storage": null,
484
+ "max_new_tokens": null,
485
+ "temperature": null,
486
+ "top_k": 50,
487
+ "top_p": 0.9,
488
+ "repetition_penalty": 1.0,
489
+ "num_beams": 1,
490
+ "stream": false,
491
+ "stop_words": [],
492
+ "logprobs": false,
493
+ "top_logprobs": null,
494
+ "ckpt_dir": "/workspace/halcyon-recipe2/patch",
495
+ "lora_modules": [],
496
+ "tuner_backend": "peft",
497
+ "train_type": "full",
498
+ "adapters": [],
499
+ "external_plugins": [],
500
+ "model_kwargs": {},
501
+ "load_args": false,
502
+ "load_data_args": false,
503
+ "packing": true,
504
+ "packing_length": 4096,
505
+ "packing_num_proc": 1,
506
+ "lazy_tokenize": false,
507
+ "custom_register_path": [],
508
+ "use_hf": true,
509
+ "hub_token": null,
510
+ "ddp_timeout": 18000000,
511
+ "ddp_backend": null,
512
+ "ignore_args_error": false,
513
+ "use_swift_lora": false,
514
+ "freeze_llm": false,
515
+ "freeze_vit": true,
516
+ "freeze_aligner": true,
517
+ "freeze_parameters": [],
518
+ "freeze_parameters_regex": null,
519
+ "freeze_parameters_ratio": 0.0,
520
+ "trainable_parameters": [],
521
+ "trainable_parameters_regex": null,
522
+ "adapter_load": null,
523
+ "target_modules": [
524
+ "all-linear"
525
+ ],
526
+ "target_regex": null,
527
+ "modules_to_save": [],
528
+ "lora_rank": 8,
529
+ "lora_alpha": 32,
530
+ "lora_dropout": 0.05,
531
+ "lora_bias": "none",
532
+ "lora_dtype": null,
533
+ "use_rslora": false,
534
+ "rlhf_type": null,
535
+ "ref_load": null,
536
+ "ref_adapter_load": null,
537
+ "beta": 0.1,
538
+ "rpo_alpha": null,
539
+ "reference_free": false,
540
+ "label_smoothing": 0.0,
541
+ "f_divergence_type": "reverse_kl",
542
+ "loss_type": null,
543
+ "desirable_weight": 1.0,
544
+ "undesirable_weight": 1.0,
545
+ "calculate_KL": null,
546
+ "center_rewards_coefficient": null,
547
+ "generation_batch_size": null,
548
+ "steps_per_generation": null,
549
+ "num_generations": 8,
550
+ "max_completion_length": 512,
551
+ "importance_sampling_level": "token",
552
+ "tau_pos": 1.0,
553
+ "tau_neg": 1.05,
554
+ "epsilon": 0.2,
555
+ "epsilon_high": null,
556
+ "delta": null,
557
+ "use_vllm": true,
558
+ "vllm_mode": null,
559
+ "vllm_enable_prefix_caching": true,
560
+ "vllm_gpu_memory_utilization": 0.9,
561
+ "vllm_tensor_parallel_size": 1,
562
+ "vllm_max_model_len": null,
563
+ "vllm_enforce_eager": false,
564
+ "vllm_limit_mm_per_prompt": null,
565
+ "vllm_disable_cascade_attn": false,
566
+ "vllm_max_num_seqs": null,
567
+ "vllm_mm_processor_cache_gb": null,
568
+ "vllm_engine_kwargs": null,
569
+ "sleep_level": 0,
570
+ "offload_optimizer": false,
571
+ "offload_model": false,
572
+ "offload_bridge": false,
573
+ "vllm_server_base_url": null,
574
+ "vllm_server_host": null,
575
+ "vllm_server_port": [
576
+ 8000
577
+ ],
578
+ "vllm_server_timeout": 240.0,
579
+ "vllm_server_group_port": null,
580
+ "reward_funcs": [],
581
+ "reward_weights": null,
582
+ "cosine_min_len_value_wrong": -0.5,
583
+ "cosine_max_len_value_wrong": 0.0,
584
+ "cosine_min_len_value_correct": 1.0,
585
+ "cosine_max_len_value_correct": 0.5,
586
+ "cosine_max_len": null,
587
+ "repetition_n_grams": 3,
588
+ "repetition_max_penalty": -1.0,
589
+ "soft_max_length": null,
590
+ "soft_cache_length": null,
591
+ "dynamic_sample": false,
592
+ "max_resample_times": 3,
593
+ "overlong_filter": false,
594
+ "scale_rewards": "group",
595
+ "advantage_estimator": "grpo",
596
+ "kl_in_reward": false,
597
+ "wandb_log_unique_prompts": null,
598
+ "log_completions": false,
599
+ "rollout_importance_sampling_mode": null,
600
+ "rollout_importance_sampling_threshold": 2.0,
601
+ "log_rollout_offpolicy_metrics": false,
602
+ "off_policy_sequence_mask_delta": null,
603
+ "reward_model": null,
604
+ "reward_model_plugin": null,
605
+ "sync_ref_model": false,
606
+ "ref_model_sync_steps": 512,
607
+ "ref_model_mixup_alpha": 0.6,
608
+ "async_generate": false,
609
+ "move_model_batches": null,
610
+ "multi_turn_scheduler": null,
611
+ "max_turns": null,
612
+ "completion_length_limit_scope": "per_round",
613
+ "vllm_server_pass_dataset": false,
614
+ "log_entropy": false,
615
+ "top_entropy_quantile": 1.0,
616
+ "num_iterations": 1,
617
+ "check_model": true,
618
+ "padded_vocab_size": 151936,
619
+ "initialize_embedding": false,
620
+ "mlp_padding_free": false,
621
+ "load_safetensors": false,
622
+ "save_safetensors": false,
623
+ "ref_model": null,
624
+ "ref_adapters": [],
625
+ "merge_lora": false,
626
+ "max_shard_size": "5GB",
627
+ "train_dataloader_shuffle": true,
628
+ "dataloader_pin_memory": true,
629
+ "dataloader_persistent_workers": true,
630
+ "dataloader_prefetch_factor": 10,
631
+ "architectures": "Qwen3ForCausalLM",
632
+ "llm_architectures": "Qwen3ForCausalLM",
633
+ "max_epochs": null,
634
+ "enable_dft_loss": false,
635
+ "enable_channel_loss": false,
636
+ "patch_size": 1,
637
+ "save_strategy": "steps",
638
+ "original_max_position_embeddings": null,
639
+ "partial_rotary_factor": null,
640
+ "use_shared_expert_gate": false,
641
+ "vit_gradient_checkpointing": true,
642
+ "vit_lr": null,
643
+ "aligner_lr": null,
644
+ "gradient_checkpointing_kwargs": null,
645
+ "linear_num_value_heads": null,
646
+ "linear_num_key_heads": null,
647
+ "linear_key_head_dim": null,
648
+ "linear_value_head_dim": null,
649
+ "linear_conv_kernel_dim": null,
650
+ "layer_types": null,
651
+ "mrope_interleaved": false,
652
+ "add_version": true
653
+ }
654
+ }
images/batch-size vs samples.png ADDED
images/batch-size.png ADDED
images/grad-norm vs samples.png ADDED
images/grad-norm.png ADDED
images/iteration-time.png ADDED
images/learning-rate vs samples.png ADDED
images/learning-rate.png ADDED
images/lm loss vs samples.png ADDED
images/lm loss.png ADDED
images/loss-scale vs samples.png ADDED
images/loss-scale.png ADDED
images/mem-allocated-bytes.png ADDED
images/mem-allocated-count.png ADDED
images/mem-max-allocated-bytes.png ADDED
images/mem-reserved-bytes.png ADDED
latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 12700
latest_wandb_artifact_path.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ tepic/plt
logging.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
runs/events.out.tfevents.1766579674.13f078589dd5.21342.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eccc4da7b38e65e2ba7e25c5ca1e2c56c3c4619d4ac2cef56099751e21f6a923
3
+ size 10705212
wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-24T12:34:36.254351955Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-24T12:34:36.629472532Z","level":"INFO","msg":"stream: created new stream","id":"jnwx1i3g"}
3
+ {"time":"2025-12-24T12:34:36.629612876Z","level":"INFO","msg":"stream: started","id":"jnwx1i3g"}
4
+ {"time":"2025-12-24T12:34:36.629619447Z","level":"INFO","msg":"handler: started","stream_id":"jnwx1i3g"}
5
+ {"time":"2025-12-24T12:34:36.629720929Z","level":"INFO","msg":"sender: started","stream_id":"jnwx1i3g"}
6
+ {"time":"2025-12-24T12:34:36.629753337Z","level":"INFO","msg":"writer: started","stream_id":"jnwx1i3g"}
7
+ {"time":"2025-12-24T12:43:42.584434646Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
8
+ {"time":"2025-12-24T12:43:42.585040454Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
9
+ {"time":"2025-12-24T12:51:53.304049519Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
10
+ {"time":"2025-12-24T12:51:53.304246963Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
11
+ {"time":"2025-12-24T13:00:03.564144396Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
12
+ {"time":"2025-12-24T13:00:03.56447829Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
13
+ {"time":"2025-12-24T13:08:14.558022794Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
14
+ {"time":"2025-12-24T13:08:14.5581774Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
15
+ {"time":"2025-12-24T13:16:25.335922211Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
16
+ {"time":"2025-12-24T13:16:25.336311204Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
17
+ {"time":"2025-12-24T13:24:35.625138091Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
18
+ {"time":"2025-12-24T13:24:35.625500792Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
19
+ {"time":"2025-12-24T13:32:44.717934227Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
20
+ {"time":"2025-12-24T13:32:44.718329206Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
21
+ {"time":"2025-12-24T13:40:54.863924529Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
22
+ {"time":"2025-12-24T13:40:54.864133024Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
23
+ {"time":"2025-12-24T13:49:06.109993778Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
24
+ {"time":"2025-12-24T13:49:06.110370855Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
25
+ {"time":"2025-12-24T13:57:16.591602528Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
26
+ {"time":"2025-12-24T13:57:16.591806752Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
27
+ {"time":"2025-12-24T14:05:26.943714374Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
28
+ {"time":"2025-12-24T14:05:26.944085153Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
29
+ {"time":"2025-12-24T14:13:37.420574607Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
30
+ {"time":"2025-12-24T14:13:37.420932956Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
31
+ {"time":"2025-12-24T14:21:46.523528811Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
32
+ {"time":"2025-12-24T14:21:46.523877521Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
33
+ {"time":"2025-12-24T14:29:57.069808445Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
34
+ {"time":"2025-12-24T14:29:57.070179761Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
35
+ {"time":"2025-12-24T14:38:07.967571682Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
36
+ {"time":"2025-12-24T14:38:07.967803509Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
37
+ {"time":"2025-12-24T14:46:20.208911528Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
38
+ {"time":"2025-12-24T14:46:20.209263601Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
39
+ {"time":"2025-12-24T14:54:30.621968556Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
40
+ {"time":"2025-12-24T14:54:30.622280257Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
41
+ {"time":"2025-12-24T15:02:42.550303767Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
42
+ {"time":"2025-12-24T15:02:42.550629852Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
43
+ {"time":"2025-12-24T15:10:52.245924491Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
44
+ {"time":"2025-12-24T15:10:52.246280808Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
45
+ {"time":"2025-12-24T15:19:00.372310237Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
46
+ {"time":"2025-12-24T15:19:00.374238029Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
47
+ {"time":"2025-12-24T15:27:10.911178895Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
48
+ {"time":"2025-12-24T15:27:10.911505559Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
49
+ {"time":"2025-12-24T15:35:19.936356498Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
50
+ {"time":"2025-12-24T15:35:19.936680052Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
51
+ {"time":"2025-12-24T15:43:29.284803428Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
52
+ {"time":"2025-12-24T15:43:29.285168114Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
53
+ {"time":"2025-12-24T15:51:39.188922955Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
54
+ {"time":"2025-12-24T15:51:39.189302898Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
55
+ {"time":"2025-12-24T15:59:47.388163574Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
56
+ {"time":"2025-12-24T15:59:47.388480368Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
57
+ {"time":"2025-12-24T16:07:56.763401597Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
58
+ {"time":"2025-12-24T16:07:56.76373407Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
59
+ {"time":"2025-12-24T16:16:07.65283364Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
60
+ {"time":"2025-12-24T16:16:07.653055175Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
61
+ {"time":"2025-12-24T16:24:17.518829539Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
62
+ {"time":"2025-12-24T16:24:17.519219117Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
63
+ {"time":"2025-12-24T16:32:27.874143459Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
64
+ {"time":"2025-12-24T16:32:27.874487207Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
65
+ {"time":"2025-12-24T16:40:37.983683733Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
66
+ {"time":"2025-12-24T16:40:37.984023837Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
67
+ {"time":"2025-12-24T16:48:48.345448271Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
68
+ {"time":"2025-12-24T16:48:48.345778297Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
69
+ {"time":"2025-12-24T16:56:59.153705518Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
70
+ {"time":"2025-12-24T16:56:59.154038502Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
71
+ {"time":"2025-12-24T17:05:11.162594185Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
72
+ {"time":"2025-12-24T17:05:11.162915092Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
73
+ {"time":"2025-12-24T17:13:20.786866394Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
74
+ {"time":"2025-12-24T17:13:20.787245898Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
75
+ {"time":"2025-12-24T17:21:30.700064755Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
76
+ {"time":"2025-12-24T17:21:30.70039016Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
77
+ {"time":"2025-12-24T17:29:40.974243157Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
78
+ {"time":"2025-12-24T17:29:40.974568866Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
79
+ {"time":"2025-12-24T17:37:51.016380953Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
80
+ {"time":"2025-12-24T17:37:51.016705232Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
81
+ {"time":"2025-12-24T17:46:01.693995869Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
82
+ {"time":"2025-12-24T17:46:01.694332853Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
83
+ {"time":"2025-12-24T17:54:12.669825778Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
84
+ {"time":"2025-12-24T17:54:12.670236873Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
85
+ {"time":"2025-12-24T18:02:23.894617076Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
86
+ {"time":"2025-12-24T18:02:23.894867356Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
87
+ {"time":"2025-12-24T18:10:34.692364906Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
88
+ {"time":"2025-12-24T18:10:34.692517453Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
89
+ {"time":"2025-12-24T18:18:45.299371338Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
90
+ {"time":"2025-12-24T18:18:45.299686654Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
91
+ {"time":"2025-12-24T18:26:55.63456397Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
92
+ {"time":"2025-12-24T18:26:55.634882583Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
93
+ {"time":"2025-12-24T18:35:05.874459884Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
94
+ {"time":"2025-12-24T18:35:05.875666521Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
95
+ {"time":"2025-12-24T18:43:16.877827268Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
96
+ {"time":"2025-12-24T18:43:16.878202101Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
97
+ {"time":"2025-12-24T18:51:26.582238369Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
98
+ {"time":"2025-12-24T18:51:26.582564827Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
99
+ {"time":"2025-12-24T18:59:36.695031564Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
100
+ {"time":"2025-12-24T18:59:36.695392522Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
101
+ {"time":"2025-12-24T19:07:46.806583963Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
102
+ {"time":"2025-12-24T19:07:46.806971962Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
103
+ {"time":"2025-12-24T19:15:57.691287153Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
104
+ {"time":"2025-12-24T19:15:57.691615356Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
105
+ {"time":"2025-12-24T19:24:06.859357828Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
106
+ {"time":"2025-12-24T19:24:06.859682526Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
107
+ {"time":"2025-12-24T19:32:17.073718182Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
108
+ {"time":"2025-12-24T19:32:17.074732163Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
109
+ {"time":"2025-12-24T19:40:28.693074445Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
110
+ {"time":"2025-12-24T19:40:28.693416512Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
111
+ {"time":"2025-12-24T19:48:40.093961976Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
112
+ {"time":"2025-12-24T19:48:40.094302557Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
113
+ {"time":"2025-12-24T19:56:50.657707896Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
114
+ {"time":"2025-12-24T19:56:50.658063296Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
115
+ {"time":"2025-12-24T20:04:59.961877315Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
116
+ {"time":"2025-12-24T20:04:59.962242367Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
117
+ {"time":"2025-12-24T20:13:09.787644404Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
118
+ {"time":"2025-12-24T20:13:09.787999328Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
119
+ {"time":"2025-12-24T20:21:20.333968946Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
120
+ {"time":"2025-12-24T20:21:20.334269312Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
121
+ {"time":"2025-12-24T20:29:30.053711709Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
122
+ {"time":"2025-12-24T20:29:30.054100568Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
123
+ {"time":"2025-12-24T20:37:39.665343253Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
124
+ {"time":"2025-12-24T20:37:39.66633448Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
125
+ {"time":"2025-12-24T20:45:50.003211645Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
126
+ {"time":"2025-12-24T20:45:50.003534079Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
127
+ {"time":"2025-12-24T20:54:01.715666664Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
128
+ {"time":"2025-12-24T20:54:01.716017645Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
129
+ {"time":"2025-12-24T21:02:11.738223974Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
130
+ {"time":"2025-12-24T21:02:11.738547408Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
131
+ {"time":"2025-12-24T21:10:23.891058434Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
132
+ {"time":"2025-12-24T21:10:23.891412255Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
133
+ {"time":"2025-12-24T21:18:36.292452235Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
134
+ {"time":"2025-12-24T21:18:36.292823337Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
135
+ {"time":"2025-12-24T21:26:49.034015246Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
136
+ {"time":"2025-12-24T21:26:49.034419061Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
137
+ {"time":"2025-12-24T21:35:01.603575566Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
138
+ {"time":"2025-12-24T21:35:01.603971538Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
139
+ {"time":"2025-12-24T21:43:13.316216606Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
140
+ {"time":"2025-12-24T21:43:13.316541769Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
141
+ {"time":"2025-12-24T21:51:25.997399812Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
142
+ {"time":"2025-12-24T21:51:25.997667925Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
143
+ {"time":"2025-12-24T21:59:39.789298033Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
144
+ {"time":"2025-12-24T21:59:39.790207643Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
145
+ {"time":"2025-12-24T22:07:54.143020496Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
146
+ {"time":"2025-12-24T22:07:54.143366049Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
147
+ {"time":"2025-12-24T22:16:08.452133657Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
148
+ {"time":"2025-12-24T22:16:08.452476455Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
149
+ {"time":"2025-12-24T22:24:23.379531769Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
150
+ {"time":"2025-12-24T22:24:23.379848632Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
151
+ {"time":"2025-12-24T22:32:38.276795958Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
152
+ {"time":"2025-12-24T22:32:38.277168288Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
153
+ {"time":"2025-12-24T22:40:52.266886392Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
154
+ {"time":"2025-12-24T22:40:52.267278516Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
155
+ {"time":"2025-12-24T22:49:09.028682461Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
156
+ {"time":"2025-12-24T22:49:09.029040894Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
157
+ {"time":"2025-12-24T22:57:25.755586067Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
158
+ {"time":"2025-12-24T22:57:25.755904861Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
159
+ {"time":"2025-12-24T23:05:40.737128657Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
160
+ {"time":"2025-12-24T23:05:40.737310219Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
161
+ {"time":"2025-12-24T23:13:56.773269428Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
162
+ {"time":"2025-12-24T23:13:56.773691076Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
163
+ {"time":"2025-12-24T23:22:15.672018155Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
164
+ {"time":"2025-12-24T23:22:15.6723556Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
165
+ {"time":"2025-12-24T23:30:35.439905149Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
166
+ {"time":"2025-12-24T23:30:35.440310014Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
167
+ {"time":"2025-12-24T23:38:59.310113201Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
168
+ {"time":"2025-12-24T23:38:59.310284434Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
169
+ {"time":"2025-12-24T23:47:20.026480138Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
170
+ {"time":"2025-12-24T23:47:20.026806646Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
171
+ {"time":"2025-12-24T23:55:32.645270845Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
172
+ {"time":"2025-12-24T23:55:32.645481056Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
173
+ {"time":"2025-12-25T00:03:45.918216315Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
174
+ {"time":"2025-12-25T00:03:45.918431055Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
175
+ {"time":"2025-12-25T00:11:57.5112067Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
176
+ {"time":"2025-12-25T00:11:57.511559149Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
177
+ {"time":"2025-12-25T00:20:09.569694025Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
178
+ {"time":"2025-12-25T00:20:09.570080729Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
179
+ {"time":"2025-12-25T00:28:20.955104121Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
180
+ {"time":"2025-12-25T00:28:20.956071728Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
181
+ {"time":"2025-12-25T00:36:33.563783853Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
182
+ {"time":"2025-12-25T00:36:33.564115691Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
183
+ {"time":"2025-12-25T00:44:43.757782469Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
184
+ {"time":"2025-12-25T00:44:43.758006362Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
185
+ {"time":"2025-12-25T00:52:58.268283552Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
186
+ {"time":"2025-12-25T00:52:58.268608464Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
187
+ {"time":"2025-12-25T01:01:09.687704628Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
188
+ {"time":"2025-12-25T01:01:09.688073676Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
189
+ {"time":"2025-12-25T01:09:20.318697053Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
190
+ {"time":"2025-12-25T01:09:20.319079623Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
191
+ {"time":"2025-12-25T01:17:32.301193277Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
192
+ {"time":"2025-12-25T01:17:32.301449848Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
193
+ {"time":"2025-12-25T01:25:44.359347604Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
194
+ {"time":"2025-12-25T01:25:44.359728332Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
195
+ {"time":"2025-12-25T01:33:57.086186112Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
196
+ {"time":"2025-12-25T01:33:57.086520137Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
197
+ {"time":"2025-12-25T01:42:08.920919181Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
198
+ {"time":"2025-12-25T01:42:08.921261155Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
199
+ {"time":"2025-12-25T01:50:20.750970195Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
200
+ {"time":"2025-12-25T01:50:20.751300379Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
201
+ {"time":"2025-12-25T01:58:32.458173785Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
202
+ {"time":"2025-12-25T01:58:32.458529561Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
203
+ {"time":"2025-12-25T02:06:43.381564296Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
204
+ {"time":"2025-12-25T02:06:43.381928657Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
205
+ {"time":"2025-12-25T02:14:54.992304689Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
206
+ {"time":"2025-12-25T02:14:54.992635156Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
207
+ {"time":"2025-12-25T02:23:05.286275247Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
208
+ {"time":"2025-12-25T02:23:05.28660512Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
209
+ {"time":"2025-12-25T02:31:17.016762344Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
210
+ {"time":"2025-12-25T02:31:17.017127063Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
211
+ {"time":"2025-12-25T02:39:29.007341384Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
212
+ {"time":"2025-12-25T02:39:29.007700568Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
213
+ {"time":"2025-12-25T02:47:40.47371979Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
214
+ {"time":"2025-12-25T02:47:40.474061876Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
215
+ {"time":"2025-12-25T02:55:51.070757563Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
216
+ {"time":"2025-12-25T02:55:51.070933152Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
217
+ {"time":"2025-12-25T03:04:02.308727067Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
218
+ {"time":"2025-12-25T03:04:02.309098675Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
219
+ {"time":"2025-12-25T03:12:15.598454409Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
220
+ {"time":"2025-12-25T03:12:15.598766388Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
221
+ {"time":"2025-12-25T03:20:27.576979046Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
222
+ {"time":"2025-12-25T03:20:27.577315736Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
223
+ {"time":"2025-12-25T03:28:40.335273131Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
224
+ {"time":"2025-12-25T03:28:40.335469564Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
225
+ {"time":"2025-12-25T03:36:52.589010731Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
226
+ {"time":"2025-12-25T03:36:52.589329744Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
227
+ {"time":"2025-12-25T03:45:05.27775084Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
228
+ {"time":"2025-12-25T03:45:05.278112785Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
229
+ {"time":"2025-12-25T03:53:17.114516703Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
230
+ {"time":"2025-12-25T03:53:17.114671442Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
231
+ {"time":"2025-12-25T04:01:29.804012319Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
232
+ {"time":"2025-12-25T04:01:29.804331445Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
233
+ {"time":"2025-12-25T04:09:43.395724296Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
234
+ {"time":"2025-12-25T04:09:43.396068292Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
235
+ {"time":"2025-12-25T04:17:55.706513202Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
236
+ {"time":"2025-12-25T04:17:55.706830938Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
237
+ {"time":"2025-12-25T04:26:07.589828131Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
238
+ {"time":"2025-12-25T04:26:07.590209964Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
239
+ {"time":"2025-12-25T04:34:18.693182664Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
240
+ {"time":"2025-12-25T04:34:18.693523762Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
241
+ {"time":"2025-12-25T04:42:28.816766346Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
242
+ {"time":"2025-12-25T04:42:28.817136062Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
243
+ {"time":"2025-12-25T04:50:39.929873641Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
244
+ {"time":"2025-12-25T04:50:39.930245151Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
245
+ {"time":"2025-12-25T04:58:52.756800842Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
246
+ {"time":"2025-12-25T04:58:52.757138458Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
247
+ {"time":"2025-12-25T05:07:05.0386095Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
248
+ {"time":"2025-12-25T05:07:05.038933569Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
249
+ {"time":"2025-12-25T05:15:18.105198463Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
250
+ {"time":"2025-12-25T05:15:18.105515364Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
251
+ {"time":"2025-12-25T05:23:30.021612042Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
252
+ {"time":"2025-12-25T05:23:30.021974929Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
253
+ {"time":"2025-12-25T05:31:41.352144216Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
254
+ {"time":"2025-12-25T05:31:41.352480289Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
255
+ {"time":"2025-12-25T05:39:52.438308201Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
256
+ {"time":"2025-12-25T05:39:52.43863069Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
257
+ {"time":"2025-12-25T05:48:02.463179774Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
258
+ {"time":"2025-12-25T05:48:02.46350624Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
259
+ {"time":"2025-12-25T05:56:13.686377322Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
260
+ {"time":"2025-12-25T05:56:13.686580128Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
261
+ {"time":"2025-12-25T05:56:14.243412654Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
262
+ {"time":"2025-12-25T05:56:14.458949469Z","level":"INFO","msg":"handler: operation stats","stats":{}}
263
+ {"time":"2025-12-25T05:56:14.464678677Z","level":"INFO","msg":"stream: closing","id":"jnwx1i3g"}
264
+ {"time":"2025-12-25T05:56:14.464692292Z","level":"INFO","msg":"handler: closed","stream_id":"jnwx1i3g"}
265
+ {"time":"2025-12-25T05:56:14.46481017Z","level":"INFO","msg":"sender: closed","stream_id":"jnwx1i3g"}
266
+ {"time":"2025-12-25T05:56:14.464828776Z","level":"INFO","msg":"stream: closed","id":"jnwx1i3g"}
wandb/wandb/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Configure stats pid to 21342
3
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Loading settings from /workspace/halcyon-recipe2/wandb/settings
5
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/wandb/wandb/run-20251224_123436-jnwx1i3g/logs/debug.log
7
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/wandb/wandb/run-20251224_123436-jnwx1i3g/logs/debug-internal.log
8
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'num_layers': 28, 'encoder_num_layers': 28, 'decoder_num_layers': None, 'hidden_size': 1024, 'ffn_hidden_size': 3072, 'num_attention_heads': 16, 'attention_backend': <AttnBackend.flash: 1>, 'kv_channels': 128, 'group_query_attention': True, 'num_query_groups': 8, 'max_position_embeddings': 32768, 'position_embedding_type': 'rope', 'relative_attention_num_buckets': 32, 'relative_attention_max_distance': 128, 'use_rotary_position_embeddings': False, 'rotary_base': 1000000, 'rotary_percent': 1.0, 'rotary_interleaved': False, 'rotary_seq_len_interpolation_factor': None, 'use_rope_scaling': False, 'rope_scaling_factor': 8.0, 'no_rope_freq': None, 'add_position_embedding': True, 'mrope_section': None, 'make_vocab_size_divisible_by': 128, 'normalization': 'RMSNorm', 'norm_epsilon': 1e-06, 'apply_layernorm_1p': False, 'apply_residual_connection_post_layernorm': False, 'openai_gelu': False, 'squared_relu': False, 'swiglu': True, 'onnx_safe': None, 'bert_binary_head': True, 'untie_embeddings_and_output_weights': False, 'multi_latent_attention': False, 'mtp_num_layers': None, 'mtp_loss_scaling_factor': 0.1, 'attention_dropout': 0.0, 'hidden_dropout': 0.0, 'weight_decay': 0.1, 'start_weight_decay': 0.1, 'end_weight_decay': 0.1, 'weight_decay_incr_style': 'constant', 'clip_grad': 1.0, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-08, 'sgd_momentum': 0.9, 'micro_batch_size': 4, 'global_batch_size': 256, 'rampup_batch_size': None, 'decrease_batch_size_if_needed': False, 'recompute_granularity': 'full', 'check_for_nan_in_loss_and_grad': True, 'check_for_spiky_loss': False, 'check_for_large_grads': False, 'distribute_saved_activations': False, 'recompute_method': 'uniform', 'recompute_num_layers': 1, 'recompute_modules': ['core_attn'], 'clone_scatter_output_in_embedding': True, 'profile': False, 'profile_step_start': 10, 'profile_step_end': 12, 'iterations_to_skip': [], 'result_rejected_tracker_filename': None, 'enable_gloo_process_groups': True, 'use_pytorch_profiler': False, 'profile_ranks': [0], 'record_memory_history': False, 'memory_snapshot_path': 'snapshot.pickle', 'tp_comm_overlap': False, 'tp_comm_overlap_cfg': None, 'tp_comm_overlap_ag': True, 'tp_comm_overlap_rs': True, 'tp_comm_overlap_rs_dgrad': False, 'tp_comm_bulk_dgrad': True, 'tp_comm_bulk_wgrad': True, 'tp_comm_bootstrap_backend': 'nccl', 'use_cpu_initialization': None, 'empty_unused_memory_level': 0, 'deterministic_mode': False, 'check_weight_hash_across_dp_replicas_interval': None, 'calculate_per_token_loss': True, 'train_sync_interval': None, 'train_iters': 12700, 'train_samples': None, 'log_interval': 1, 'exit_interval': None, 'exit_duration_in_mins': None, 'exit_signal_handler': False, 'tensorboard_dir': '/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/runs', 'masked_softmax_fusion': True, 'bias_gelu_fusion': False, 'bias_swiglu_fusion': True, 'use_fused_weighted_squared_relu': False, 'bias_dropout_fusion': True, 'apply_rope_fusion': True, 'rope_type': None, 'cross_entropy_loss_fusion': True, 'cross_entropy_fusion_impl': 'native', 'use_flash_attn': False, 'add_bias_linear': False, 'add_qkv_bias': False, 'optimizer': 'adam', 'optimizer_cpu_offload': False, 'optimizer_offload_fraction': 1.0, 'use_torch_optimizer_for_cpu_offload': False, 'overlap_cpu_optimizer_d2h_h2d': False, 'pin_cpu_grads': True, 'pin_cpu_params': True, 'dataloader_type': 'cyclic', 'async_tensor_model_parallel_allreduce': True, 'no_persist_layer_norm': False, 'sequence_parallel': False, 'gradient_accumulation_fusion': True, 'deprecated_use_mcore_models': False, 'use_legacy_models': False, 'manual_gc': False, 'manual_gc_interval': 0, 'manual_gc_eval': True, 'tp_comm_split_ag': True, 'tp_comm_split_rs': True, 'pipeline_model_parallel_comm_backend': None, 'high_priority_stream_groups': [], 'seed': 42, 'data_parallel_random_init': False, 'init_method_std': 0.02, 'embedding_init_method_std': None, 'init_method_xavier_uniform': False, 'lr': 0.0001, 'lr_decay_style': 'cosine', 'lr_wsd_decay_style': 'exponential', 'lr_decay_iters': None, 'lr_decay_samples': None, 'lr_wsd_decay_samples': None, 'lr_wsd_decay_iters': None, 'lr_warmup_fraction': 0.05, 'lr_warmup_iters': 0, 'lr_warmup_samples': 0, 'lr_warmup_init': 0.0, 'min_lr': 3e-06, 'override_opt_param_scheduler': False, 'use_checkpoint_opt_param_scheduler': False, 'decoupled_lr': None, 'decoupled_min_lr': None, 'save': '/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139', 'save_interval': 100, 'save_retain_interval': None, 'no_save_optim': None, 'no_save_rng': None, 'load': '/workspace/halcyon-recipe2/patch', 'no_load_optim': None, 'load_main_params_from_ckpt': None, 'no_load_rng': None, 'strict_fsdp_dtensor_load': True, 'non_persistent_save_interval': None, 'non_persistent_ckpt_type': None, 'non_persistent_global_ckpt_dir': None, 'non_persistent_local_ckpt_dir': None, 'non_persistent_local_ckpt_algo': 'fully_parallel', 'finetune': True, 'pretrained_checkpoint': None, 'ckpt_step': None, 'perform_initialization': False, 'use_checkpoint_args': False, 'use_mp_args_from_checkpoint_args': False, 'use_tokenizer_model_from_checkpoint_args': True, 'exit_on_missing_checkpoint': True, 'use_dist_ckpt_deprecated': False, 'use_persistent_ckpt_worker': False, 'auto_detect_ckpt_format': True, 'dist_ckpt_format_deprecated': None, 'ckpt_format': 'torch_dist', 'ckpt_convert_format': None, 'ckpt_convert_save': None, 'ckpt_convert_update_legacy_dist_opt_format': False, 'ckpt_fully_parallel_save_deprecated': False, 'ckpt_fully_parallel_save': True, 'async_save': None, 'ckpt_fully_parallel_load': False, 'ckpt_assume_constant_structure': False, 'dist_ckpt_strictness': 'assume_ok_unexpected', 'load_model_opt_format': False, 'fp16': False, 'bf16': True, 'grad_reduce_in_bf16': False, 'loss_scale': None, 'initial_loss_scale': 4294967296, 'min_loss_scale': 1.0, 'loss_scale_window': 1000, 'hysteresis': 2, 'fp32_residual_connection': False, 'apply_query_key_layer_scaling': False, 'attention_softmax_in_fp32': True, 'accumulate_allreduce_grads_in_fp32': True, 'fp16_lm_cross_entropy': False, 'disable_bf16_reduced_precision_matmul': False, 'reuse_grad_buf_for_mxfp8_param_ag': False, 'tensor_model_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'decoder_first_pipeline_num_layers': None, 'decoder_last_pipeline_num_layers': None, 'pipeline_model_parallel_layout': None, 'num_layers_per_virtual_pipeline_stage': None, 'num_virtual_stages_per_pipeline_rank': None, 'microbatch_group_size_per_vp_stage': None, 'overlap_p2p_comm': False, 'overlap_p2p_comm_warmup_flush': False, 'distributed_backend': 'nccl', 'distributed_timeout_minutes': 300000, 'overlap_grad_reduce': True, 'defer_embedding_wgrad_compute': False, 'wgrad_deferral_limit': 0, 'align_grad_reduce': True, 'ddp_num_buckets': None, 'ddp_bucket_size': None, 'ddp_pad_buckets_for_high_nccl_busbw': False, 'ddp_average_in_collective': False, 'overlap_param_gather': True, 'overlap_param_gather_with_optimizer_step': False, 'align_param_gather': False, 'scatter_gather_tensors_in_pipeline': True, 'use_ring_exchange_p2p': False, 'local_rank': 7, 'lazy_mpu_init': None, 'account_for_embedding_in_pipeline_split': False, 'account_for_loss_in_pipeline_split': False, 'use_distributed_optimizer': True, 'nccl_ub': False, 'use_sharp': False, 'sharp_enabled_group': None, 'use_megatron_fsdp': False, 'init_model_with_meta_device': False, 'data_parallel_sharding_strategy': 'no_shard', 'gradient_reduce_div_fusion': True, 'fsdp_double_buffer': False, 'suggested_communication_unit_size': None, 'keep_fp8_transpose_cache': False, 'enable_full_sharding_in_hsdp': False, 'num_distributed_optimizer_instances': 1, 'use_torch_fsdp2': False, 'torch_fsdp2_reshard_after_forward': True, 'context_parallel_size': 1, 'cp_comm_type': ['p2p'], 'hierarchical_context_parallel_sizes': None, 'nccl_communicator_config_path': None, 'use_tp_pp_dp_mapping': False, 'replication': False, 'replication_jump': None, 'replication_factor': 2, 'full_validation': False, 'multiple_validation_sets': False, 'eval_iters': -1, 'eval_interval': 100, 'test_mode': False, 'skip_train': False, 'data_path': None, 'split': None, 'train_data_path': None, 'valid_data_path': None, 'test_data_path': None, 'data_args_path': None, 'per_split_data_args_path': None, 'data_cache_path': None, 'mmap_bin_files': True, 'mock_data': False, 'seq_length': 4096, 'encoder_seq_length': 4096, 'decoder_seq_length': None, 'retriever_seq_length': 256, 'sample_rate': 1.0, 'mask_prob': 0.15, 'short_seq_prob': 0.1, 'num_workers': 32, 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'create_attention_mask_in_dataloader': True, 'num_dataset_builder_threads': 1, 'object_storage_cache_path': None, 'mid_level_dataset_surplus': 0.005, 'vocab_size': None, 'padded_vocab_size': 151936, 'vocab_file': None, 'merge_file': None, 'vocab_extra_ids': 0, 'tokenizer_type': None, 'tokenizer_model': None, 'tiktoken_pattern': None, 'tiktoken_num_special_tokens': 1000, 'tiktoken_special_tokens': None, 'adlr_autoresume': False, 'adlr_autoresume_interval': 1000, 'ict_head_size': None, 'biencoder_projection_dim': 0, 'biencoder_shared_query_context_model': False, 'ict_load': None, 'bert_load': None, 'titles_data_path': None, 'query_in_block_prob': 0.1, 'use_one_sent_docs': False, 'evidence_data_path': None, 'retriever_report_topk_accuracies': [], 'retriever_score_scaling': False, 'block_data_path': None, 'embedding_path': None, 'indexer_batch_size': 128, 'indexer_log_interval': 1000, 'num_classes': 1000, 'img_h': 224, 'img_w': 224, 'num_channels': 3, 'patch_dim': 16, 'classes_fraction': 1.0, 'data_per_class_fraction': 1.0, 'data_sharding': True, 'head_lr_mult': 1.0, 'vision_pretraining': False, 'vision_pretraining_type': 'classify', 'vision_backbone_type': 'vit', 'swin_backbone_type': 'tiny', 'mask_type': 'random', 'mask_factor': 1.0, 'iter_per_epoch': 1250, 'dino_local_img_size': 96, 'dino_local_crops_number': 10, 'dino_head_hidden_size': 2048, 'dino_bottleneck_size': 256, 'dino_freeze_last_layer': 1, 'dino_norm_last_layer': False, 'dino_warmup_teacher_temp': 0.04, 'dino_teacher_temp': 0.07, 'dino_warmup_teacher_temp_epochs': 30, 'qk_layernorm': True, 'qk_l2_norm': False, 'expert_model_parallel_size': 1, 'expert_tensor_parallel_size': 1, 'num_experts': None, 'moe_layer_freq': 1, 'moe_ffn_hidden_size': None, 'moe_shared_expert_intermediate_size': None, 'moe_shared_expert_overlap': False, 'moe_grouped_gemm': True, 'moe_use_legacy_grouped_gemm': False, 'moe_layer_recompute': False, 'moe_extended_tp': False, 'moe_use_upcycling': False, 'moe_router_load_balancing_type': 'aux_loss', 'moe_router_dtype': 'fp32', 'moe_router_fusion': False, 'moe_router_score_function': 'softmax', 'moe_router_topk': 2, 'moe_router_pre_softmax': False, 'moe_router_num_groups': None, 'moe_router_group_topk': None, 'moe_router_topk_scaling_factor': None, 'moe_router_enable_expert_bias': False, 'moe_router_bias_update_rate': 0.001, 'moe_router_force_load_balancing': False, 'moe_router_padding_for_fp8': False, 'moe_aux_loss_coeff': 0.0, 'moe_z_loss_coeff': None, 'moe_input_jitter_eps': None, 'moe_per_layer_logging': False, 'moe_token_dispatcher_type': 'alltoall', 'moe_enable_deepep': False, 'moe_deepep_num_sms': 20, 'moe_permute_fusion': False, 'moe_expert_capacity_factor': None, 'moe_pad_expert_input_to_capacity': False, 'moe_token_drop_policy': 'probs', 'moe_apply_probs_on_input': False, 'overlap_moe_expert_parallel_comm': False, 'delay_wgrad_compute': False, 'moe_upcycling_granularity': 1, 'q_lora_rank': None, 'kv_lora_rank': 32, 'qk_head_dim': 128, 'qk_pos_emb_head_dim': 64, 'v_head_dim': 128, 'rotary_scaling_factor': 1.0, 'mscale': 1.0, 'mscale_all_dim': 0.0, 'cache_mla_latents': False, 'heterogeneous_layers_config_path': None, 'heterogeneous_layers_config_encoded_json': None, 'log_params_norm': False, 'log_num_zeros_in_grad': False, 'log_throughput': False, 'log_progress': False, 'timing_log_level': 0, 'log_energy': False, 'barrier_with_L1_time': True, 'timing_log_option': 'minmax', 'tensorboard_log_interval': 1, 'tensorboard_queue_size': 50, 'log_timers_to_tensorboard': True, 'log_loss_scale_to_tensorboard': True, 'log_validation_ppl_to_tensorboard': True, 'log_memory_to_tensorboard': True, 'log_world_size_to_tensorboard': False, 'wandb_project': 'plt', 'wandb_exp_name': 'tlt', 'wandb_save_dir': '', 'logging_level': 20, 'log_straggler': False, 'disable_straggler_on_startup': False, 'straggler_ctrlr_port': 65535, 'straggler_minmax_count': 1, 'run_workload_inspector_server': False, 'inference_batch_times_seqlen_threshold': -1, 'max_tokens_to_oom': 12000, 'output_bert_embeddings': False, 'bert_embedder_type': 'megatron', 'flash_decode': False, 'enable_cuda_graph': False, 'cuda_graph_warmup_steps': 3, 'external_cuda_graph': False, 'cuda_graph_scope': 'full', 'inference_max_batch_size': 8, 'inference_max_seq_length': 2560, 'inference_dynamic_batching': False, 'inference_dynamic_batching_buffer_size_gb': 40.0, 'inference_dynamic_batching_chunk_size': 256, 'inference_dynamic_batching_buffer_guaranteed_fraction': 0.2, 'inference_dynamic_batching_buffer_overflow_factor': None, 'inference_dynamic_batching_max_requests_override': None, 'inference_dynamic_batching_max_tokens_override': None, 'inference_dynamic_batching_num_cuda_graphs': 16, 'symmetric_ar_type': None, 'nccl_all_reduce_for_prefill': False, 'mlp_chunks_for_prefill': 1, 'fp8': None, 'fp8_recipe': 'delayed', 'fp8_margin': 0, 'fp8_interval': 1, 'fp8_amax_history_len': 1024, 'fp8_amax_compute_algo': 'max', 'fp8_wgrad': True, 'transformer_impl': 'transformer_engine', 'fp8_param_gather': False, 'first_last_layers_bf16': False, 'num_layers_at_start_in_bf16': 1, 'num_layers_at_end_in_bf16': 1, 'te_rng_tracker': False, 'inference_rng_tracker': False, 'retro_project_dir': None, 'retro_add_retriever': False, 'retro_cyclic_train_iters': None, 'retro_encoder_layers': 2, 'retro_encoder_hidden_dropout': 0.1, 'retro_encoder_attention_dropout': 0.1, 'retro_num_neighbors': 2, 'retro_num_retrieved_chunks': 2, 'retro_attention_gate': 1, 'retro_verify_neighbor_count': True, 'enable_experimental': False, 'spec': None, 'hybrid_attention_ratio': 0.0, 'hybrid_mlp_ratio': 0.0, 'hybrid_override_pattern': None, 'mamba_state_dim': 128, 'mamba_head_dim': 64, 'mamba_num_groups': 8, 'mamba_num_heads': None, 'is_hybrid_model': False, 'disable_mamba_mem_eff_path': False, 'yaml_cfg': None, 'use_precision_aware_optimizer': True, 'main_grads_dtype': torch.float32, 'main_params_dtype': torch.float32, 'exp_avg_dtype': torch.float32, 'exp_avg_sq_dtype': torch.float32, 'enable_one_logger': True, 'one_logger_project': 'megatron-lm', 'one_logger_run_name': None, 'one_logger_async': False, 'app_tag_run_name': None, 'app_tag_run_version': '0.0.0', 'inprocess_restart': False, 'inprocess_max_iterations': None, 'inprocess_monitor_thread_interval': 1.0, 'inprocess_monitor_process_interval': 1.0, 'inprocess_progress_watchdog_interval': 1.0, 'inprocess_heartbeat_interval': 30, 'inprocess_soft_timeout': 60, 'inprocess_hard_timeout': 90, 'inprocess_heartbeat_timeout': 60, 'inprocess_barrier_timeout': 120, 'inprocess_completion_timeout': 120, 'inprocess_last_call_wait': 1, 'inprocess_termination_grace_time': 1, 'inprocess_granularity': 'node', 'inprocess_active_world_size': 8, 'inprocess_empty_cuda_cache': False, 'enable_ft_package': False, 'calc_ft_timeouts': False, 'config_logger_dir': '', 'error_injection_rate': 0, 'error_injection_type': 'transient_error', 'rerun_mode': 'validate_results', 'enable_msc': True, 'kitchen_config_file': None, 'kitchen_recipe_number': None, 'sft': False, 'sft_tokenizer_prompt_format': 'nemotron-h-aligned', 'rank': 7, 'world_size': 8, 'use_dist_ckpt': True, 'transformer_pipeline_model_parallel_size': 1, 'data_parallel_size': 8, 'model_dir': '/workspace/.hf_home/hub/models--Qwen--Qwen3-0.6B-Base/snapshots/da87bfb608c14b7cf20ba1ce41287e8de496c0cd', 'is_multimodal': False, 'hf_model_type': 'qwen3', 'use_ray': False, 'ray_exp_name': None, 'device_groups': None, 'model': 'Qwen/Qwen3-0.6B-Base', 'model_type': 'qwen3', 'model_revision': None, 'task_type': 'causal_lm', 'torch_dtype': torch.bfloat16, 'attn_impl': None, 'new_special_tokens': [], 'num_labels': None, 'problem_type': None, 'rope_scaling': None, 'device_map': None, 'max_memory': {}, 'max_model_len': None, 'local_repo_path': None, 'init_strategy': None, 'template': 'qwen3', 'system': None, 'max_length': 4096, 'truncation_strategy': 'right', 'max_pixels': None, 'agent_template': None, 'norm_bbox': None, 'use_chat_template': False, 'padding_free': True, 'padding_side': 'right', 'sequence_parallel_size': 1, 'response_prefix': None, 'template_backend': 'swift', 'dataset': [], 'val_dataset': [], 'cached_dataset': ['/workspace/1of3'], 'cached_val_dataset': [], 'split_dataset_ratio': 0.0, 'data_seed': 42, 'dataset_num_proc': 32, 'load_from_cache_file': False, 'dataset_shuffle': True, 'val_dataset_shuffle': False, 'streaming': False, 'interleave_prob': None, 'stopping_strategy': 'first_exhausted', 'shuffle_buffer_size': 1000, 'download_mode': 'reuse_dataset_if_exists', 'columns': {}, 'strict': False, 'remove_unused_columns': True, 'model_name': None, 'model_author': None, 'custom_dataset_info': [], 'quant_method': None, 'quant_bits': None, 'hqq_axis': None, 'bnb_4bit_compute_dtype': torch.bfloat16, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_quant_storage': None, 'max_new_tokens': None, 'temperature': None, 'top_k': 50, 'top_p': 0.9, 'repetition_penalty': 1.0, 'num_beams': 1, 'stream': False, 'stop_words': [], 'logprobs': False, 'top_logprobs': None, 'ckpt_dir': '/workspace/halcyon-recipe2/patch', 'lora_modules': [], 'tuner_backend': 'peft', 'train_type': 'full', 'adapters': [], 'external_plugins': [], 'model_kwargs': {}, 'load_args': False, 'load_data_args': False, 'packing': True, 'packing_length': 4096, 'packing_num_proc': 1, 'lazy_tokenize': False, 'custom_register_path': [], 'use_hf': True, 'hub_token': None, 'ddp_timeout': 18000000, 'ddp_backend': None, 'ignore_args_error': False, 'use_swift_lora': False, 'freeze_llm': False, 'freeze_vit': True, 'freeze_aligner': True, 'freeze_parameters': [], 'freeze_parameters_regex': None, 'freeze_parameters_ratio': 0.0, 'trainable_parameters': [], 'trainable_parameters_regex': None, 'adapter_load': None, 'target_modules': ['all-linear'], 'target_regex': None, 'modules_to_save': [], 'lora_rank': 8, 'lora_alpha': 32, 'lora_dropout': 0.05, 'lora_bias': 'none', 'lora_dtype': None, 'use_rslora': False, 'rlhf_type': None, 'ref_load': None, 'ref_adapter_load': None, 'beta': 0.1, 'rpo_alpha': None, 'reference_free': False, 'label_smoothing': 0.0, 'f_divergence_type': 'reverse_kl', 'loss_type': None, 'desirable_weight': 1.0, 'undesirable_weight': 1.0, 'calculate_KL': None, 'center_rewards_coefficient': None, 'generation_batch_size': None, 'steps_per_generation': None, 'num_generations': 8, 'max_completion_length': 512, 'importance_sampling_level': 'token', 'tau_pos': 1.0, 'tau_neg': 1.05, 'epsilon': 0.2, 'epsilon_high': None, 'delta': None, 'use_vllm': True, 'vllm_mode': None, 'vllm_enable_prefix_caching': True, 'vllm_gpu_memory_utilization': 0.9, 'vllm_tensor_parallel_size': 1, 'vllm_max_model_len': None, 'vllm_enforce_eager': False, 'vllm_limit_mm_per_prompt': None, 'vllm_disable_cascade_attn': False, 'vllm_max_num_seqs': None, 'vllm_mm_processor_cache_gb': None, 'vllm_engine_kwargs': None, 'sleep_level': 0, 'offload_optimizer': False, 'offload_model': False, 'offload_bridge': False, 'vllm_server_base_url': None, 'vllm_server_host': None, 'vllm_server_port': [8000], 'vllm_server_timeout': 240.0, 'vllm_server_group_port': None, 'reward_funcs': [], 'reward_weights': None, 'cosine_min_len_value_wrong': -0.5, 'cosine_max_len_value_wrong': 0.0, 'cosine_min_len_value_correct': 1.0, 'cosine_max_len_value_correct': 0.5, 'cosine_max_len': None, 'repetition_n_grams': 3, 'repetition_max_penalty': -1.0, 'soft_max_length': None, 'soft_cache_length': None, 'dynamic_sample': False, 'max_resample_times': 3, 'overlong_filter': False, 'scale_rewards': 'group', 'advantage_estimator': 'grpo', 'kl_in_reward': False, 'wandb_log_unique_prompts': None, 'log_completions': False, 'rollout_importance_sampling_mode': None, 'rollout_importance_sampling_threshold': 2.0, 'log_rollout_offpolicy_metrics': False, 'off_policy_sequence_mask_delta': None, 'reward_model': None, 'reward_model_plugin': None, 'sync_ref_model': False, 'ref_model_sync_steps': 512, 'ref_model_mixup_alpha': 0.6, 'async_generate': False, 'move_model_batches': None, 'multi_turn_scheduler': None, 'max_turns': None, 'completion_length_limit_scope': 'per_round', 'vllm_server_pass_dataset': False, 'log_entropy': False, 'top_entropy_quantile': 1.0, 'num_iterations': 1, 'check_model': True, 'initialize_embedding': False, 'mlp_padding_free': False, 'load_safetensors': False, 'save_safetensors': False, 'ref_model': None, 'ref_adapters': [], 'merge_lora': False, 'max_shard_size': '5GB', 'train_dataloader_shuffle': True, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': True, 'dataloader_prefetch_factor': 10, 'architectures': 'Qwen3ForCausalLM', 'llm_architectures': 'Qwen3ForCausalLM', 'max_epochs': None, 'enable_dft_loss': False, 'enable_channel_loss': False, 'patch_size': 1, 'save_strategy': 'steps', 'original_max_position_embeddings': None, 'partial_rotary_factor': None, 'use_shared_expert_gate': False, 'vit_gradient_checkpointing': True, 'vit_lr': None, 'aligner_lr': None, 'gradient_checkpointing_kwargs': None, 'linear_num_value_heads': None, 'linear_num_key_heads': None, 'linear_key_head_dim': None, 'linear_value_head_dim': None, 'linear_conv_kernel_dim': None, 'layer_types': None, 'mrope_interleaved': False, 'add_version': True, 'virtual_pipeline_model_parallel_size': None, 'params_dtype': torch.bfloat16, 'consumed_train_samples': 0, 'skipped_train_samples': 0, 'consumed_valid_samples': 0, 'variable_seq_lengths': False, '_wandb': {}}
11
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:init():889] starting backend
12
+ 2025-12-24 12:34:36,246 INFO MainThread:21342 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-24 12:34:36,249 INFO MainThread:21342 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-24 12:34:36,252 INFO MainThread:21342 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-24 12:34:36,258 INFO MainThread:21342 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-24 12:34:36,960 INFO MainThread:21342 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-24 12:34:37,056 INFO MainThread:21342 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-25 05:56:13,622 INFO MainThread:21342 [wandb_run.py:_finish():2287] finishing run tepic/plt/jnwx1i3g
23
+ 2025-12-25 05:56:13,623 INFO MainThread:21342 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 0
24
+ 2025-12-25 05:56:13,624 INFO MainThread:21342 [wandb_run.py:_restore():2468] restore
25
+ 2025-12-25 05:56:13,625 INFO MainThread:21342 [wandb_run.py:_restore():2474] restore done
26
+ 2025-12-25 05:56:14,463 INFO MainThread:21342 [wandb_run.py:_footer_sync_info():3862] logging synced files
wandb/wandb/run-20251224_123436-jnwx1i3g/files/config.yaml ADDED
@@ -0,0 +1,1779 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.23.1
4
+ e:
5
+ bpm0umriqwkux3bqnxj3du3nf1ugtqjj:
6
+ args:
7
+ - --seed
8
+ - "42"
9
+ - --micro-batch-size
10
+ - "4"
11
+ - --global-batch-size
12
+ - "256"
13
+ - --recompute-granularity
14
+ - full
15
+ - --recompute-method
16
+ - uniform
17
+ - --recompute-num-layers
18
+ - "1"
19
+ - --recompute-modules
20
+ - core_attn
21
+ - --train-iters
22
+ - "12700"
23
+ - --log-interval
24
+ - "1"
25
+ - --tensorboard-dir
26
+ - /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/runs
27
+ - --cross-entropy-loss-fusion
28
+ - --cross-entropy-fusion-impl
29
+ - native
30
+ - --calculate-per-token-loss
31
+ - --attention-backend
32
+ - flash
33
+ - --optimizer
34
+ - adam
35
+ - --optimizer-offload-fraction
36
+ - "1.0"
37
+ - --use-precision-aware-optimizer
38
+ - --main-grads-dtype
39
+ - fp32
40
+ - --main-params-dtype
41
+ - fp32
42
+ - --exp-avg-dtype
43
+ - fp32
44
+ - --exp-avg-sq-dtype
45
+ - fp32
46
+ - --dataloader-type
47
+ - cyclic
48
+ - --manual-gc-interval
49
+ - "0"
50
+ - --lr
51
+ - "0.0001"
52
+ - --lr-decay-style
53
+ - cosine
54
+ - --lr-warmup-iters
55
+ - "0"
56
+ - --lr-warmup-fraction
57
+ - "0.05"
58
+ - --min-lr
59
+ - "3e-06"
60
+ - --weight-decay
61
+ - "0.1"
62
+ - --clip-grad
63
+ - "1.0"
64
+ - --adam-beta1
65
+ - "0.9"
66
+ - --adam-beta2
67
+ - "0.95"
68
+ - --adam-eps
69
+ - "1e-08"
70
+ - --sgd-momentum
71
+ - "0.9"
72
+ - --save
73
+ - /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139
74
+ - --save-interval
75
+ - "100"
76
+ - --load
77
+ - /workspace/halcyon-recipe2/patch
78
+ - --finetune
79
+ - --ckpt-format
80
+ - torch_dist
81
+ - --no-initialization
82
+ - --auto-detect-ckpt-format
83
+ - --exit-on-missing-checkpoint
84
+ - --distributed-backend
85
+ - nccl
86
+ - --local-rank
87
+ - "7"
88
+ - --use-distributed-optimizer
89
+ - --tensor-model-parallel-size
90
+ - "1"
91
+ - --pipeline-model-parallel-size
92
+ - "1"
93
+ - --context-parallel-size
94
+ - "1"
95
+ - --overlap-grad-reduce
96
+ - --overlap-param-gather
97
+ - --distributed-timeout-minutes
98
+ - "300000"
99
+ - --num-layers
100
+ - "28"
101
+ - --hidden-size
102
+ - "1024"
103
+ - --ffn-hidden-size
104
+ - "3072"
105
+ - --num-attention-heads
106
+ - "16"
107
+ - --group-query-attention
108
+ - --num-query-groups
109
+ - "8"
110
+ - --max-position-embeddings
111
+ - "32768"
112
+ - --position-embedding-type
113
+ - rope
114
+ - --rotary-base
115
+ - "1000000"
116
+ - --rotary-percent
117
+ - "1.0"
118
+ - --normalization
119
+ - RMSNorm
120
+ - --norm-epsilon
121
+ - "1e-06"
122
+ - --swiglu
123
+ - --disable-bias-linear
124
+ - --attention-dropout
125
+ - "0.0"
126
+ - --hidden-dropout
127
+ - "0.0"
128
+ - --kv-channels
129
+ - "128"
130
+ - --qk-layernorm
131
+ - --transformer-impl
132
+ - transformer_engine
133
+ - --moe-layer-freq
134
+ - "1"
135
+ - --moe-router-topk
136
+ - "2"
137
+ - --moe-router-dtype
138
+ - fp32
139
+ - --moe-router-score-function
140
+ - softmax
141
+ - --moe-router-load-balancing-type
142
+ - aux_loss
143
+ - --expert-model-parallel-size
144
+ - "1"
145
+ - --expert-tensor-parallel-size
146
+ - "1"
147
+ - --moe-token-dispatcher-type
148
+ - alltoall
149
+ - --moe-grouped-gemm
150
+ - --moe-aux-loss-coeff
151
+ - "0.0"
152
+ - --moe-token-drop-policy
153
+ - probs
154
+ - --kv-lora-rank
155
+ - "32"
156
+ - --qk-head-dim
157
+ - "128"
158
+ - --qk-pos-emb-head-dim
159
+ - "64"
160
+ - --mtp-loss-scaling-factor
161
+ - "0.1"
162
+ - --fp8-recipe
163
+ - delayed
164
+ - --fp8-amax-history-len
165
+ - "1024"
166
+ - --fp8-amax-compute-algo
167
+ - max
168
+ - --bf16
169
+ - --attention-softmax-in-fp32
170
+ - --tensorboard-log-interval
171
+ - "1"
172
+ - --tensorboard-queue-size
173
+ - "50"
174
+ - --log-timers-to-tensorboard
175
+ - --log-validation-ppl-to-tensorboard
176
+ - --log-memory-to-tensorboard
177
+ - --logging-level
178
+ - "20"
179
+ - --wandb-project
180
+ - plt
181
+ - --wandb-exp-name
182
+ - tlt
183
+ - --eval-iters
184
+ - "-1"
185
+ - --eval-interval
186
+ - "100"
187
+ - --seq-length
188
+ - "4096"
189
+ - --num-workers
190
+ - "32"
191
+ codePath: swift/cli/_megatron/pt.py
192
+ codePathLocal: swift/cli/_megatron/pt.py
193
+ cpu_count: 72
194
+ cpu_count_logical: 144
195
+ cudaVersion: "13.0"
196
+ disk:
197
+ /:
198
+ total: "7669363507200"
199
+ used: "1006439596032"
200
+ email: kazuma826826@gmail.com
201
+ executable: /venv/main/bin/python3.12
202
+ git:
203
+ commit: ea7cc214b68fb511dd83bff83a504b7f43053577
204
+ remote: https://github.com/weak-kajuma/halcyon-recipe2.git
205
+ gpu: NVIDIA GeForce RTX 5090
206
+ gpu_count: 8
207
+ gpu_nvidia:
208
+ - architecture: Blackwell
209
+ cudaCores: 21760
210
+ memoryTotal: "34190917632"
211
+ name: NVIDIA GeForce RTX 5090
212
+ uuid: GPU-32487176-4d38-3e1d-696d-ce9cd9f7e666
213
+ - architecture: Blackwell
214
+ cudaCores: 21760
215
+ memoryTotal: "34190917632"
216
+ name: NVIDIA GeForce RTX 5090
217
+ uuid: GPU-19c921f5-05b1-51fa-fb5f-e08deed52308
218
+ - architecture: Blackwell
219
+ cudaCores: 21760
220
+ memoryTotal: "34190917632"
221
+ name: NVIDIA GeForce RTX 5090
222
+ uuid: GPU-dd59a530-3e2a-2e22-24e1-54e3ff1082b7
223
+ - architecture: Blackwell
224
+ cudaCores: 21760
225
+ memoryTotal: "34190917632"
226
+ name: NVIDIA GeForce RTX 5090
227
+ uuid: GPU-5ce39532-f9c0-ffd8-19f0-7bb854ee835e
228
+ - architecture: Blackwell
229
+ cudaCores: 21760
230
+ memoryTotal: "34190917632"
231
+ name: NVIDIA GeForce RTX 5090
232
+ uuid: GPU-6a7a11ff-b8b4-6532-d873-b1003d6fe3f0
233
+ - architecture: Blackwell
234
+ cudaCores: 21760
235
+ memoryTotal: "34190917632"
236
+ name: NVIDIA GeForce RTX 5090
237
+ uuid: GPU-4daa8877-6665-3cb8-e22f-1f0bf8189c80
238
+ - architecture: Blackwell
239
+ cudaCores: 21760
240
+ memoryTotal: "34190917632"
241
+ name: NVIDIA GeForce RTX 5090
242
+ uuid: GPU-859daa9f-b13c-2da4-1dc4-271ee55b347c
243
+ - architecture: Blackwell
244
+ cudaCores: 21760
245
+ memoryTotal: "34190917632"
246
+ name: NVIDIA GeForce RTX 5090
247
+ uuid: GPU-63d123a0-a5fe-1583-9ab7-42bba078df53
248
+ host: 13f078589dd5
249
+ memory:
250
+ total: "540643295232"
251
+ os: Linux-6.8.0-58-generic-x86_64-with-glibc2.39
252
+ program: /workspace/halcyon-recipe2/swift/cli/_megatron/pt.py
253
+ python: CPython 3.12.12
254
+ root: /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/wandb
255
+ startedAt: "2025-12-24T12:34:36.002087Z"
256
+ writerId: bpm0umriqwkux3bqnxj3du3nf1ugtqjj
257
+ m: []
258
+ python_version: 3.12.12
259
+ t:
260
+ "1":
261
+ - 1
262
+ - 11
263
+ - 41
264
+ - 49
265
+ - 51
266
+ - 71
267
+ - 84
268
+ - 98
269
+ - 105
270
+ "2":
271
+ - 1
272
+ - 11
273
+ - 41
274
+ - 49
275
+ - 51
276
+ - 71
277
+ - 84
278
+ - 98
279
+ - 105
280
+ "3":
281
+ - 2
282
+ - 13
283
+ - 16
284
+ - 61
285
+ "4": 3.12.12
286
+ "5": 0.23.1
287
+ "6": 4.57.3
288
+ "12": 0.23.1
289
+ "13": linux-x86_64
290
+ account_for_embedding_in_pipeline_split:
291
+ value: false
292
+ account_for_loss_in_pipeline_split:
293
+ value: false
294
+ accumulate_allreduce_grads_in_fp32:
295
+ value: true
296
+ adam_beta1:
297
+ value: 0.9
298
+ adam_beta2:
299
+ value: 0.95
300
+ adam_eps:
301
+ value: 1e-08
302
+ adapter_load:
303
+ value: null
304
+ adapters:
305
+ value: []
306
+ add_bias_linear:
307
+ value: false
308
+ add_position_embedding:
309
+ value: true
310
+ add_qkv_bias:
311
+ value: false
312
+ add_version:
313
+ value: true
314
+ adlr_autoresume:
315
+ value: false
316
+ adlr_autoresume_interval:
317
+ value: 1000
318
+ advantage_estimator:
319
+ value: grpo
320
+ agent_template:
321
+ value: null
322
+ align_grad_reduce:
323
+ value: true
324
+ align_param_gather:
325
+ value: false
326
+ aligner_lr:
327
+ value: null
328
+ app_tag_run_name:
329
+ value: null
330
+ app_tag_run_version:
331
+ value: 0.0.0
332
+ apply_layernorm_1p:
333
+ value: false
334
+ apply_query_key_layer_scaling:
335
+ value: false
336
+ apply_residual_connection_post_layernorm:
337
+ value: false
338
+ apply_rope_fusion:
339
+ value: true
340
+ architectures:
341
+ value: Qwen3ForCausalLM
342
+ async_generate:
343
+ value: false
344
+ async_save:
345
+ value: null
346
+ async_tensor_model_parallel_allreduce:
347
+ value: true
348
+ attention_backend:
349
+ value: flash
350
+ attention_dropout:
351
+ value: 0
352
+ attention_softmax_in_fp32:
353
+ value: true
354
+ attn_impl:
355
+ value: null
356
+ auto_detect_ckpt_format:
357
+ value: true
358
+ barrier_with_L1_time:
359
+ value: true
360
+ bert_binary_head:
361
+ value: true
362
+ bert_embedder_type:
363
+ value: megatron
364
+ bert_load:
365
+ value: null
366
+ beta:
367
+ value: 0.1
368
+ bf16:
369
+ value: true
370
+ bias_dropout_fusion:
371
+ value: true
372
+ bias_gelu_fusion:
373
+ value: false
374
+ bias_swiglu_fusion:
375
+ value: true
376
+ biencoder_projection_dim:
377
+ value: 0
378
+ biencoder_shared_query_context_model:
379
+ value: false
380
+ block_data_path:
381
+ value: null
382
+ bnb_4bit_compute_dtype:
383
+ value: torch.bfloat16
384
+ bnb_4bit_quant_storage:
385
+ value: null
386
+ bnb_4bit_quant_type:
387
+ value: nf4
388
+ bnb_4bit_use_double_quant:
389
+ value: true
390
+ cache_mla_latents:
391
+ value: false
392
+ cached_dataset:
393
+ value:
394
+ - /workspace/1of3
395
+ cached_val_dataset:
396
+ value: []
397
+ calc_ft_timeouts:
398
+ value: false
399
+ calculate_KL:
400
+ value: null
401
+ calculate_per_token_loss:
402
+ value: true
403
+ center_rewards_coefficient:
404
+ value: null
405
+ check_for_large_grads:
406
+ value: false
407
+ check_for_nan_in_loss_and_grad:
408
+ value: true
409
+ check_for_spiky_loss:
410
+ value: false
411
+ check_model:
412
+ value: true
413
+ check_weight_hash_across_dp_replicas_interval:
414
+ value: null
415
+ ckpt_assume_constant_structure:
416
+ value: false
417
+ ckpt_convert_format:
418
+ value: null
419
+ ckpt_convert_save:
420
+ value: null
421
+ ckpt_convert_update_legacy_dist_opt_format:
422
+ value: false
423
+ ckpt_dir:
424
+ value: /workspace/halcyon-recipe2/patch
425
+ ckpt_format:
426
+ value: torch_dist
427
+ ckpt_fully_parallel_load:
428
+ value: false
429
+ ckpt_fully_parallel_save:
430
+ value: true
431
+ ckpt_fully_parallel_save_deprecated:
432
+ value: false
433
+ ckpt_step:
434
+ value: null
435
+ classes_fraction:
436
+ value: 1
437
+ clip_grad:
438
+ value: 1
439
+ clone_scatter_output_in_embedding:
440
+ value: true
441
+ completion_length_limit_scope:
442
+ value: per_round
443
+ config_logger_dir:
444
+ value: ""
445
+ consumed_train_samples:
446
+ value: 0
447
+ consumed_valid_samples:
448
+ value: 0
449
+ context_parallel_size:
450
+ value: 1
451
+ cosine_max_len:
452
+ value: null
453
+ cosine_max_len_value_correct:
454
+ value: 0.5
455
+ cosine_max_len_value_wrong:
456
+ value: 0
457
+ cosine_min_len_value_correct:
458
+ value: 1
459
+ cosine_min_len_value_wrong:
460
+ value: -0.5
461
+ cp_comm_type:
462
+ value:
463
+ - p2p
464
+ create_attention_mask_in_dataloader:
465
+ value: true
466
+ cross_entropy_fusion_impl:
467
+ value: native
468
+ cross_entropy_loss_fusion:
469
+ value: true
470
+ cuda_graph_scope:
471
+ value: full
472
+ cuda_graph_warmup_steps:
473
+ value: 3
474
+ custom_dataset_info:
475
+ value: []
476
+ custom_register_path:
477
+ value: []
478
+ data_args_path:
479
+ value: null
480
+ data_cache_path:
481
+ value: null
482
+ data_parallel_random_init:
483
+ value: false
484
+ data_parallel_sharding_strategy:
485
+ value: no_shard
486
+ data_parallel_size:
487
+ value: 8
488
+ data_path:
489
+ value: null
490
+ data_per_class_fraction:
491
+ value: 1
492
+ data_seed:
493
+ value: 42
494
+ data_sharding:
495
+ value: true
496
+ dataloader_persistent_workers:
497
+ value: true
498
+ dataloader_pin_memory:
499
+ value: true
500
+ dataloader_prefetch_factor:
501
+ value: 10
502
+ dataloader_type:
503
+ value: cyclic
504
+ dataset:
505
+ value: []
506
+ dataset_num_proc:
507
+ value: 32
508
+ dataset_shuffle:
509
+ value: true
510
+ ddp_average_in_collective:
511
+ value: false
512
+ ddp_backend:
513
+ value: null
514
+ ddp_bucket_size:
515
+ value: null
516
+ ddp_num_buckets:
517
+ value: null
518
+ ddp_pad_buckets_for_high_nccl_busbw:
519
+ value: false
520
+ ddp_timeout:
521
+ value: 18000000
522
+ decoder_first_pipeline_num_layers:
523
+ value: null
524
+ decoder_last_pipeline_num_layers:
525
+ value: null
526
+ decoder_num_layers:
527
+ value: null
528
+ decoder_seq_length:
529
+ value: null
530
+ decoupled_lr:
531
+ value: null
532
+ decoupled_min_lr:
533
+ value: null
534
+ decrease_batch_size_if_needed:
535
+ value: false
536
+ defer_embedding_wgrad_compute:
537
+ value: false
538
+ delay_wgrad_compute:
539
+ value: false
540
+ delta:
541
+ value: null
542
+ deprecated_use_mcore_models:
543
+ value: false
544
+ desirable_weight:
545
+ value: 1
546
+ deterministic_mode:
547
+ value: false
548
+ device_groups:
549
+ value: null
550
+ device_map:
551
+ value: null
552
+ dino_bottleneck_size:
553
+ value: 256
554
+ dino_freeze_last_layer:
555
+ value: 1
556
+ dino_head_hidden_size:
557
+ value: 2048
558
+ dino_local_crops_number:
559
+ value: 10
560
+ dino_local_img_size:
561
+ value: 96
562
+ dino_norm_last_layer:
563
+ value: false
564
+ dino_teacher_temp:
565
+ value: 0.07
566
+ dino_warmup_teacher_temp:
567
+ value: 0.04
568
+ dino_warmup_teacher_temp_epochs:
569
+ value: 30
570
+ disable_bf16_reduced_precision_matmul:
571
+ value: false
572
+ disable_mamba_mem_eff_path:
573
+ value: false
574
+ disable_straggler_on_startup:
575
+ value: false
576
+ dist_ckpt_format_deprecated:
577
+ value: null
578
+ dist_ckpt_strictness:
579
+ value: assume_ok_unexpected
580
+ distribute_saved_activations:
581
+ value: false
582
+ distributed_backend:
583
+ value: nccl
584
+ distributed_timeout_minutes:
585
+ value: 300000
586
+ download_mode:
587
+ value: reuse_dataset_if_exists
588
+ dynamic_sample:
589
+ value: false
590
+ embedding_init_method_std:
591
+ value: null
592
+ embedding_path:
593
+ value: null
594
+ empty_unused_memory_level:
595
+ value: 0
596
+ enable_channel_loss:
597
+ value: false
598
+ enable_cuda_graph:
599
+ value: false
600
+ enable_dft_loss:
601
+ value: false
602
+ enable_experimental:
603
+ value: false
604
+ enable_ft_package:
605
+ value: false
606
+ enable_full_sharding_in_hsdp:
607
+ value: false
608
+ enable_gloo_process_groups:
609
+ value: true
610
+ enable_msc:
611
+ value: true
612
+ enable_one_logger:
613
+ value: true
614
+ encoder_num_layers:
615
+ value: 28
616
+ encoder_seq_length:
617
+ value: 4096
618
+ end_weight_decay:
619
+ value: 0.1
620
+ eod_mask_loss:
621
+ value: false
622
+ epsilon:
623
+ value: 0.2
624
+ epsilon_high:
625
+ value: null
626
+ error_injection_rate:
627
+ value: 0
628
+ error_injection_type:
629
+ value: transient_error
630
+ eval_interval:
631
+ value: 100
632
+ eval_iters:
633
+ value: -1
634
+ evidence_data_path:
635
+ value: null
636
+ exit_duration_in_mins:
637
+ value: null
638
+ exit_interval:
639
+ value: null
640
+ exit_on_missing_checkpoint:
641
+ value: true
642
+ exit_signal_handler:
643
+ value: false
644
+ exp_avg_dtype:
645
+ value: torch.float32
646
+ exp_avg_sq_dtype:
647
+ value: torch.float32
648
+ expert_model_parallel_size:
649
+ value: 1
650
+ expert_tensor_parallel_size:
651
+ value: 1
652
+ external_cuda_graph:
653
+ value: false
654
+ external_plugins:
655
+ value: []
656
+ f_divergence_type:
657
+ value: reverse_kl
658
+ ffn_hidden_size:
659
+ value: 3072
660
+ finetune:
661
+ value: true
662
+ first_last_layers_bf16:
663
+ value: false
664
+ flash_decode:
665
+ value: false
666
+ fp8:
667
+ value: null
668
+ fp8_amax_compute_algo:
669
+ value: max
670
+ fp8_amax_history_len:
671
+ value: 1024
672
+ fp8_interval:
673
+ value: 1
674
+ fp8_margin:
675
+ value: 0
676
+ fp8_param_gather:
677
+ value: false
678
+ fp8_recipe:
679
+ value: delayed
680
+ fp8_wgrad:
681
+ value: true
682
+ fp16:
683
+ value: false
684
+ fp16_lm_cross_entropy:
685
+ value: false
686
+ fp32_residual_connection:
687
+ value: false
688
+ freeze_aligner:
689
+ value: true
690
+ freeze_llm:
691
+ value: false
692
+ freeze_parameters:
693
+ value: []
694
+ freeze_parameters_ratio:
695
+ value: 0
696
+ freeze_parameters_regex:
697
+ value: null
698
+ freeze_vit:
699
+ value: true
700
+ fsdp_double_buffer:
701
+ value: false
702
+ full_validation:
703
+ value: false
704
+ generation_batch_size:
705
+ value: null
706
+ global_batch_size:
707
+ value: 256
708
+ grad_reduce_in_bf16:
709
+ value: false
710
+ gradient_accumulation_fusion:
711
+ value: true
712
+ gradient_checkpointing_kwargs:
713
+ value: null
714
+ gradient_reduce_div_fusion:
715
+ value: true
716
+ group_query_attention:
717
+ value: true
718
+ head_lr_mult:
719
+ value: 1
720
+ heterogeneous_layers_config_encoded_json:
721
+ value: null
722
+ heterogeneous_layers_config_path:
723
+ value: null
724
+ hf_model_type:
725
+ value: qwen3
726
+ hidden_dropout:
727
+ value: 0
728
+ hidden_size:
729
+ value: 1024
730
+ hierarchical_context_parallel_sizes:
731
+ value: null
732
+ high_priority_stream_groups:
733
+ value: []
734
+ hqq_axis:
735
+ value: null
736
+ hub_token:
737
+ value: null
738
+ hybrid_attention_ratio:
739
+ value: 0
740
+ hybrid_mlp_ratio:
741
+ value: 0
742
+ hybrid_override_pattern:
743
+ value: null
744
+ hysteresis:
745
+ value: 2
746
+ ict_head_size:
747
+ value: null
748
+ ict_load:
749
+ value: null
750
+ ignore_args_error:
751
+ value: false
752
+ img_h:
753
+ value: 224
754
+ img_w:
755
+ value: 224
756
+ importance_sampling_level:
757
+ value: token
758
+ indexer_batch_size:
759
+ value: 128
760
+ indexer_log_interval:
761
+ value: 1000
762
+ inference_batch_times_seqlen_threshold:
763
+ value: -1
764
+ inference_dynamic_batching:
765
+ value: false
766
+ inference_dynamic_batching_buffer_guaranteed_fraction:
767
+ value: 0.2
768
+ inference_dynamic_batching_buffer_overflow_factor:
769
+ value: null
770
+ inference_dynamic_batching_buffer_size_gb:
771
+ value: 40
772
+ inference_dynamic_batching_chunk_size:
773
+ value: 256
774
+ inference_dynamic_batching_max_requests_override:
775
+ value: null
776
+ inference_dynamic_batching_max_tokens_override:
777
+ value: null
778
+ inference_dynamic_batching_num_cuda_graphs:
779
+ value: 16
780
+ inference_max_batch_size:
781
+ value: 8
782
+ inference_max_seq_length:
783
+ value: 2560
784
+ inference_rng_tracker:
785
+ value: false
786
+ init_method_std:
787
+ value: 0.02
788
+ init_method_xavier_uniform:
789
+ value: false
790
+ init_model_with_meta_device:
791
+ value: false
792
+ init_strategy:
793
+ value: null
794
+ initial_loss_scale:
795
+ value: 4294967296
796
+ initialize_embedding:
797
+ value: false
798
+ inprocess_active_world_size:
799
+ value: 8
800
+ inprocess_barrier_timeout:
801
+ value: 120
802
+ inprocess_completion_timeout:
803
+ value: 120
804
+ inprocess_empty_cuda_cache:
805
+ value: false
806
+ inprocess_granularity:
807
+ value: node
808
+ inprocess_hard_timeout:
809
+ value: 90
810
+ inprocess_heartbeat_interval:
811
+ value: 30
812
+ inprocess_heartbeat_timeout:
813
+ value: 60
814
+ inprocess_last_call_wait:
815
+ value: 1
816
+ inprocess_max_iterations:
817
+ value: null
818
+ inprocess_monitor_process_interval:
819
+ value: 1
820
+ inprocess_monitor_thread_interval:
821
+ value: 1
822
+ inprocess_progress_watchdog_interval:
823
+ value: 1
824
+ inprocess_restart:
825
+ value: false
826
+ inprocess_soft_timeout:
827
+ value: 60
828
+ inprocess_termination_grace_time:
829
+ value: 1
830
+ interleave_prob:
831
+ value: null
832
+ is_hybrid_model:
833
+ value: false
834
+ is_multimodal:
835
+ value: false
836
+ iter_per_epoch:
837
+ value: 1250
838
+ iterations_to_skip:
839
+ value: []
840
+ keep_fp8_transpose_cache:
841
+ value: false
842
+ kitchen_config_file:
843
+ value: null
844
+ kitchen_recipe_number:
845
+ value: null
846
+ kl_in_reward:
847
+ value: false
848
+ kv_channels:
849
+ value: 128
850
+ kv_lora_rank:
851
+ value: 32
852
+ label_smoothing:
853
+ value: 0
854
+ layer_types:
855
+ value: null
856
+ lazy_mpu_init:
857
+ value: null
858
+ lazy_tokenize:
859
+ value: false
860
+ linear_conv_kernel_dim:
861
+ value: null
862
+ linear_key_head_dim:
863
+ value: null
864
+ linear_num_key_heads:
865
+ value: null
866
+ linear_num_value_heads:
867
+ value: null
868
+ linear_value_head_dim:
869
+ value: null
870
+ llm_architectures:
871
+ value: Qwen3ForCausalLM
872
+ load:
873
+ value: /workspace/halcyon-recipe2/patch
874
+ load_args:
875
+ value: false
876
+ load_data_args:
877
+ value: false
878
+ load_from_cache_file:
879
+ value: false
880
+ load_main_params_from_ckpt:
881
+ value: null
882
+ load_model_opt_format:
883
+ value: false
884
+ load_safetensors:
885
+ value: false
886
+ local_rank:
887
+ value: 7
888
+ local_repo_path:
889
+ value: null
890
+ log_completions:
891
+ value: false
892
+ log_energy:
893
+ value: false
894
+ log_entropy:
895
+ value: false
896
+ log_interval:
897
+ value: 1
898
+ log_loss_scale_to_tensorboard:
899
+ value: true
900
+ log_memory_to_tensorboard:
901
+ value: true
902
+ log_num_zeros_in_grad:
903
+ value: false
904
+ log_params_norm:
905
+ value: false
906
+ log_progress:
907
+ value: false
908
+ log_rollout_offpolicy_metrics:
909
+ value: false
910
+ log_straggler:
911
+ value: false
912
+ log_throughput:
913
+ value: false
914
+ log_timers_to_tensorboard:
915
+ value: true
916
+ log_validation_ppl_to_tensorboard:
917
+ value: true
918
+ log_world_size_to_tensorboard:
919
+ value: false
920
+ logging_level:
921
+ value: 20
922
+ logprobs:
923
+ value: false
924
+ lora_alpha:
925
+ value: 32
926
+ lora_bias:
927
+ value: none
928
+ lora_dropout:
929
+ value: 0.05
930
+ lora_dtype:
931
+ value: null
932
+ lora_modules:
933
+ value: []
934
+ lora_rank:
935
+ value: 8
936
+ loss_scale:
937
+ value: null
938
+ loss_scale_window:
939
+ value: 1000
940
+ loss_type:
941
+ value: null
942
+ lr:
943
+ value: 0.0001
944
+ lr_decay_iters:
945
+ value: null
946
+ lr_decay_samples:
947
+ value: null
948
+ lr_decay_style:
949
+ value: cosine
950
+ lr_warmup_fraction:
951
+ value: 0.05
952
+ lr_warmup_init:
953
+ value: 0
954
+ lr_warmup_iters:
955
+ value: 0
956
+ lr_warmup_samples:
957
+ value: 0
958
+ lr_wsd_decay_iters:
959
+ value: null
960
+ lr_wsd_decay_samples:
961
+ value: null
962
+ lr_wsd_decay_style:
963
+ value: exponential
964
+ main_grads_dtype:
965
+ value: torch.float32
966
+ main_params_dtype:
967
+ value: torch.float32
968
+ make_vocab_size_divisible_by:
969
+ value: 128
970
+ mamba_head_dim:
971
+ value: 64
972
+ mamba_num_groups:
973
+ value: 8
974
+ mamba_num_heads:
975
+ value: null
976
+ mamba_state_dim:
977
+ value: 128
978
+ manual_gc:
979
+ value: false
980
+ manual_gc_eval:
981
+ value: true
982
+ manual_gc_interval:
983
+ value: 0
984
+ mask_factor:
985
+ value: 1
986
+ mask_prob:
987
+ value: 0.15
988
+ mask_type:
989
+ value: random
990
+ masked_softmax_fusion:
991
+ value: true
992
+ max_completion_length:
993
+ value: 512
994
+ max_epochs:
995
+ value: null
996
+ max_length:
997
+ value: 4096
998
+ max_model_len:
999
+ value: null
1000
+ max_new_tokens:
1001
+ value: null
1002
+ max_pixels:
1003
+ value: null
1004
+ max_position_embeddings:
1005
+ value: 32768
1006
+ max_resample_times:
1007
+ value: 3
1008
+ max_shard_size:
1009
+ value: 5GB
1010
+ max_tokens_to_oom:
1011
+ value: 12000
1012
+ max_turns:
1013
+ value: null
1014
+ memory_snapshot_path:
1015
+ value: snapshot.pickle
1016
+ merge_file:
1017
+ value: null
1018
+ merge_lora:
1019
+ value: false
1020
+ micro_batch_size:
1021
+ value: 4
1022
+ microbatch_group_size_per_vp_stage:
1023
+ value: null
1024
+ mid_level_dataset_surplus:
1025
+ value: 0.005
1026
+ min_loss_scale:
1027
+ value: 1
1028
+ min_lr:
1029
+ value: 3e-06
1030
+ mlp_chunks_for_prefill:
1031
+ value: 1
1032
+ mlp_padding_free:
1033
+ value: false
1034
+ mmap_bin_files:
1035
+ value: true
1036
+ mock_data:
1037
+ value: false
1038
+ model:
1039
+ value: Qwen/Qwen3-0.6B-Base
1040
+ model_author:
1041
+ value: null
1042
+ model_dir:
1043
+ value: /workspace/.hf_home/hub/models--Qwen--Qwen3-0.6B-Base/snapshots/da87bfb608c14b7cf20ba1ce41287e8de496c0cd
1044
+ model_name:
1045
+ value: null
1046
+ model_revision:
1047
+ value: null
1048
+ model_type:
1049
+ value: qwen3
1050
+ modules_to_save:
1051
+ value: []
1052
+ moe_apply_probs_on_input:
1053
+ value: false
1054
+ moe_aux_loss_coeff:
1055
+ value: 0
1056
+ moe_deepep_num_sms:
1057
+ value: 20
1058
+ moe_enable_deepep:
1059
+ value: false
1060
+ moe_expert_capacity_factor:
1061
+ value: null
1062
+ moe_extended_tp:
1063
+ value: false
1064
+ moe_ffn_hidden_size:
1065
+ value: null
1066
+ moe_grouped_gemm:
1067
+ value: true
1068
+ moe_input_jitter_eps:
1069
+ value: null
1070
+ moe_layer_freq:
1071
+ value: 1
1072
+ moe_layer_recompute:
1073
+ value: false
1074
+ moe_pad_expert_input_to_capacity:
1075
+ value: false
1076
+ moe_per_layer_logging:
1077
+ value: false
1078
+ moe_permute_fusion:
1079
+ value: false
1080
+ moe_router_bias_update_rate:
1081
+ value: 0.001
1082
+ moe_router_dtype:
1083
+ value: fp32
1084
+ moe_router_enable_expert_bias:
1085
+ value: false
1086
+ moe_router_force_load_balancing:
1087
+ value: false
1088
+ moe_router_fusion:
1089
+ value: false
1090
+ moe_router_group_topk:
1091
+ value: null
1092
+ moe_router_load_balancing_type:
1093
+ value: aux_loss
1094
+ moe_router_num_groups:
1095
+ value: null
1096
+ moe_router_padding_for_fp8:
1097
+ value: false
1098
+ moe_router_pre_softmax:
1099
+ value: false
1100
+ moe_router_score_function:
1101
+ value: softmax
1102
+ moe_router_topk:
1103
+ value: 2
1104
+ moe_router_topk_scaling_factor:
1105
+ value: null
1106
+ moe_shared_expert_intermediate_size:
1107
+ value: null
1108
+ moe_shared_expert_overlap:
1109
+ value: false
1110
+ moe_token_dispatcher_type:
1111
+ value: alltoall
1112
+ moe_token_drop_policy:
1113
+ value: probs
1114
+ moe_upcycling_granularity:
1115
+ value: 1
1116
+ moe_use_legacy_grouped_gemm:
1117
+ value: false
1118
+ moe_use_upcycling:
1119
+ value: false
1120
+ moe_z_loss_coeff:
1121
+ value: null
1122
+ move_model_batches:
1123
+ value: null
1124
+ mrope_interleaved:
1125
+ value: false
1126
+ mrope_section:
1127
+ value: null
1128
+ mscale:
1129
+ value: 1
1130
+ mscale_all_dim:
1131
+ value: 0
1132
+ mtp_loss_scaling_factor:
1133
+ value: 0.1
1134
+ mtp_num_layers:
1135
+ value: null
1136
+ multi_latent_attention:
1137
+ value: false
1138
+ multi_turn_scheduler:
1139
+ value: null
1140
+ multiple_validation_sets:
1141
+ value: false
1142
+ nccl_all_reduce_for_prefill:
1143
+ value: false
1144
+ nccl_communicator_config_path:
1145
+ value: null
1146
+ nccl_ub:
1147
+ value: false
1148
+ new_special_tokens:
1149
+ value: []
1150
+ no_load_optim:
1151
+ value: null
1152
+ no_load_rng:
1153
+ value: null
1154
+ no_persist_layer_norm:
1155
+ value: false
1156
+ no_rope_freq:
1157
+ value: null
1158
+ no_save_optim:
1159
+ value: null
1160
+ no_save_rng:
1161
+ value: null
1162
+ non_persistent_ckpt_type:
1163
+ value: null
1164
+ non_persistent_global_ckpt_dir:
1165
+ value: null
1166
+ non_persistent_local_ckpt_algo:
1167
+ value: fully_parallel
1168
+ non_persistent_local_ckpt_dir:
1169
+ value: null
1170
+ non_persistent_save_interval:
1171
+ value: null
1172
+ norm_bbox:
1173
+ value: null
1174
+ norm_epsilon:
1175
+ value: 1e-06
1176
+ normalization:
1177
+ value: RMSNorm
1178
+ num_attention_heads:
1179
+ value: 16
1180
+ num_beams:
1181
+ value: 1
1182
+ num_channels:
1183
+ value: 3
1184
+ num_classes:
1185
+ value: 1000
1186
+ num_dataset_builder_threads:
1187
+ value: 1
1188
+ num_distributed_optimizer_instances:
1189
+ value: 1
1190
+ num_experts:
1191
+ value: null
1192
+ num_generations:
1193
+ value: 8
1194
+ num_iterations:
1195
+ value: 1
1196
+ num_labels:
1197
+ value: null
1198
+ num_layers:
1199
+ value: 28
1200
+ num_layers_at_end_in_bf16:
1201
+ value: 1
1202
+ num_layers_at_start_in_bf16:
1203
+ value: 1
1204
+ num_layers_per_virtual_pipeline_stage:
1205
+ value: null
1206
+ num_query_groups:
1207
+ value: 8
1208
+ num_virtual_stages_per_pipeline_rank:
1209
+ value: null
1210
+ num_workers:
1211
+ value: 32
1212
+ object_storage_cache_path:
1213
+ value: null
1214
+ off_policy_sequence_mask_delta:
1215
+ value: null
1216
+ offload_bridge:
1217
+ value: false
1218
+ offload_model:
1219
+ value: false
1220
+ offload_optimizer:
1221
+ value: false
1222
+ one_logger_async:
1223
+ value: false
1224
+ one_logger_project:
1225
+ value: megatron-lm
1226
+ one_logger_run_name:
1227
+ value: null
1228
+ onnx_safe:
1229
+ value: null
1230
+ openai_gelu:
1231
+ value: false
1232
+ optimizer:
1233
+ value: adam
1234
+ optimizer_cpu_offload:
1235
+ value: false
1236
+ optimizer_offload_fraction:
1237
+ value: 1
1238
+ original_max_position_embeddings:
1239
+ value: null
1240
+ output_bert_embeddings:
1241
+ value: false
1242
+ overlap_cpu_optimizer_d2h_h2d:
1243
+ value: false
1244
+ overlap_grad_reduce:
1245
+ value: true
1246
+ overlap_moe_expert_parallel_comm:
1247
+ value: false
1248
+ overlap_p2p_comm:
1249
+ value: false
1250
+ overlap_p2p_comm_warmup_flush:
1251
+ value: false
1252
+ overlap_param_gather:
1253
+ value: true
1254
+ overlap_param_gather_with_optimizer_step:
1255
+ value: false
1256
+ overlong_filter:
1257
+ value: false
1258
+ override_opt_param_scheduler:
1259
+ value: false
1260
+ packing:
1261
+ value: true
1262
+ packing_length:
1263
+ value: 4096
1264
+ packing_num_proc:
1265
+ value: 1
1266
+ padded_vocab_size:
1267
+ value: 151936
1268
+ padding_free:
1269
+ value: true
1270
+ padding_side:
1271
+ value: right
1272
+ params_dtype:
1273
+ value: torch.bfloat16
1274
+ partial_rotary_factor:
1275
+ value: null
1276
+ patch_dim:
1277
+ value: 16
1278
+ patch_size:
1279
+ value: 1
1280
+ per_split_data_args_path:
1281
+ value: null
1282
+ perform_initialization:
1283
+ value: false
1284
+ pin_cpu_grads:
1285
+ value: true
1286
+ pin_cpu_params:
1287
+ value: true
1288
+ pipeline_model_parallel_comm_backend:
1289
+ value: null
1290
+ pipeline_model_parallel_layout:
1291
+ value: null
1292
+ pipeline_model_parallel_size:
1293
+ value: 1
1294
+ position_embedding_type:
1295
+ value: rope
1296
+ pretrained_checkpoint:
1297
+ value: null
1298
+ problem_type:
1299
+ value: null
1300
+ profile:
1301
+ value: false
1302
+ profile_ranks:
1303
+ value:
1304
+ - 0
1305
+ profile_step_end:
1306
+ value: 12
1307
+ profile_step_start:
1308
+ value: 10
1309
+ q_lora_rank:
1310
+ value: null
1311
+ qk_head_dim:
1312
+ value: 128
1313
+ qk_l2_norm:
1314
+ value: false
1315
+ qk_layernorm:
1316
+ value: true
1317
+ qk_pos_emb_head_dim:
1318
+ value: 64
1319
+ quant_bits:
1320
+ value: null
1321
+ quant_method:
1322
+ value: null
1323
+ query_in_block_prob:
1324
+ value: 0.1
1325
+ rampup_batch_size:
1326
+ value: null
1327
+ rank:
1328
+ value: 7
1329
+ ray_exp_name:
1330
+ value: null
1331
+ recompute_granularity:
1332
+ value: full
1333
+ recompute_method:
1334
+ value: uniform
1335
+ recompute_modules:
1336
+ value:
1337
+ - core_attn
1338
+ recompute_num_layers:
1339
+ value: 1
1340
+ record_memory_history:
1341
+ value: false
1342
+ ref_adapter_load:
1343
+ value: null
1344
+ ref_adapters:
1345
+ value: []
1346
+ ref_load:
1347
+ value: null
1348
+ ref_model:
1349
+ value: null
1350
+ ref_model_mixup_alpha:
1351
+ value: 0.6
1352
+ ref_model_sync_steps:
1353
+ value: 512
1354
+ reference_free:
1355
+ value: false
1356
+ relative_attention_max_distance:
1357
+ value: 128
1358
+ relative_attention_num_buckets:
1359
+ value: 32
1360
+ remove_unused_columns:
1361
+ value: true
1362
+ repetition_max_penalty:
1363
+ value: -1
1364
+ repetition_n_grams:
1365
+ value: 3
1366
+ repetition_penalty:
1367
+ value: 1
1368
+ replication:
1369
+ value: false
1370
+ replication_factor:
1371
+ value: 2
1372
+ replication_jump:
1373
+ value: null
1374
+ rerun_mode:
1375
+ value: validate_results
1376
+ reset_attention_mask:
1377
+ value: false
1378
+ reset_position_ids:
1379
+ value: false
1380
+ response_prefix:
1381
+ value: null
1382
+ result_rejected_tracker_filename:
1383
+ value: null
1384
+ retriever_report_topk_accuracies:
1385
+ value: []
1386
+ retriever_score_scaling:
1387
+ value: false
1388
+ retriever_seq_length:
1389
+ value: 256
1390
+ retro_add_retriever:
1391
+ value: false
1392
+ retro_attention_gate:
1393
+ value: 1
1394
+ retro_cyclic_train_iters:
1395
+ value: null
1396
+ retro_encoder_attention_dropout:
1397
+ value: 0.1
1398
+ retro_encoder_hidden_dropout:
1399
+ value: 0.1
1400
+ retro_encoder_layers:
1401
+ value: 2
1402
+ retro_num_neighbors:
1403
+ value: 2
1404
+ retro_num_retrieved_chunks:
1405
+ value: 2
1406
+ retro_project_dir:
1407
+ value: null
1408
+ retro_verify_neighbor_count:
1409
+ value: true
1410
+ reuse_grad_buf_for_mxfp8_param_ag:
1411
+ value: false
1412
+ reward_funcs:
1413
+ value: []
1414
+ reward_model:
1415
+ value: null
1416
+ reward_model_plugin:
1417
+ value: null
1418
+ reward_weights:
1419
+ value: null
1420
+ rlhf_type:
1421
+ value: null
1422
+ rollout_importance_sampling_mode:
1423
+ value: null
1424
+ rollout_importance_sampling_threshold:
1425
+ value: 2
1426
+ rope_scaling:
1427
+ value: null
1428
+ rope_scaling_factor:
1429
+ value: 8
1430
+ rope_type:
1431
+ value: null
1432
+ rotary_base:
1433
+ value: 1000000
1434
+ rotary_interleaved:
1435
+ value: false
1436
+ rotary_percent:
1437
+ value: 1
1438
+ rotary_scaling_factor:
1439
+ value: 1
1440
+ rotary_seq_len_interpolation_factor:
1441
+ value: null
1442
+ rpo_alpha:
1443
+ value: null
1444
+ run_workload_inspector_server:
1445
+ value: false
1446
+ sample_rate:
1447
+ value: 1
1448
+ save:
1449
+ value: /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139
1450
+ save_interval:
1451
+ value: 100
1452
+ save_retain_interval:
1453
+ value: null
1454
+ save_safetensors:
1455
+ value: false
1456
+ save_strategy:
1457
+ value: steps
1458
+ scale_rewards:
1459
+ value: group
1460
+ scatter_gather_tensors_in_pipeline:
1461
+ value: true
1462
+ seed:
1463
+ value: 42
1464
+ seq_length:
1465
+ value: 4096
1466
+ sequence_parallel:
1467
+ value: false
1468
+ sequence_parallel_size:
1469
+ value: 1
1470
+ sft:
1471
+ value: false
1472
+ sft_tokenizer_prompt_format:
1473
+ value: nemotron-h-aligned
1474
+ sgd_momentum:
1475
+ value: 0.9
1476
+ sharp_enabled_group:
1477
+ value: null
1478
+ short_seq_prob:
1479
+ value: 0.1
1480
+ shuffle_buffer_size:
1481
+ value: 1000
1482
+ skip_train:
1483
+ value: false
1484
+ skipped_train_samples:
1485
+ value: 0
1486
+ sleep_level:
1487
+ value: 0
1488
+ soft_cache_length:
1489
+ value: null
1490
+ soft_max_length:
1491
+ value: null
1492
+ spec:
1493
+ value: null
1494
+ split:
1495
+ value: null
1496
+ split_dataset_ratio:
1497
+ value: 0
1498
+ squared_relu:
1499
+ value: false
1500
+ start_weight_decay:
1501
+ value: 0.1
1502
+ steps_per_generation:
1503
+ value: null
1504
+ stop_words:
1505
+ value: []
1506
+ stopping_strategy:
1507
+ value: first_exhausted
1508
+ straggler_ctrlr_port:
1509
+ value: 65535
1510
+ straggler_minmax_count:
1511
+ value: 1
1512
+ stream:
1513
+ value: false
1514
+ streaming:
1515
+ value: false
1516
+ strict:
1517
+ value: false
1518
+ strict_fsdp_dtensor_load:
1519
+ value: true
1520
+ suggested_communication_unit_size:
1521
+ value: null
1522
+ swiglu:
1523
+ value: true
1524
+ swin_backbone_type:
1525
+ value: tiny
1526
+ symmetric_ar_type:
1527
+ value: null
1528
+ sync_ref_model:
1529
+ value: false
1530
+ system:
1531
+ value: null
1532
+ target_modules:
1533
+ value:
1534
+ - all-linear
1535
+ target_regex:
1536
+ value: null
1537
+ task_type:
1538
+ value: causal_lm
1539
+ tau_neg:
1540
+ value: 1.05
1541
+ tau_pos:
1542
+ value: 1
1543
+ te_rng_tracker:
1544
+ value: false
1545
+ temperature:
1546
+ value: null
1547
+ template:
1548
+ value: qwen3
1549
+ template_backend:
1550
+ value: swift
1551
+ tensor_model_parallel_size:
1552
+ value: 1
1553
+ tensorboard_dir:
1554
+ value: /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/runs
1555
+ tensorboard_log_interval:
1556
+ value: 1
1557
+ tensorboard_queue_size:
1558
+ value: 50
1559
+ test_data_path:
1560
+ value: null
1561
+ test_mode:
1562
+ value: false
1563
+ tiktoken_num_special_tokens:
1564
+ value: 1000
1565
+ tiktoken_pattern:
1566
+ value: null
1567
+ tiktoken_special_tokens:
1568
+ value: null
1569
+ timing_log_level:
1570
+ value: 0
1571
+ timing_log_option:
1572
+ value: minmax
1573
+ titles_data_path:
1574
+ value: null
1575
+ tokenizer_model:
1576
+ value: null
1577
+ tokenizer_type:
1578
+ value: null
1579
+ top_entropy_quantile:
1580
+ value: 1
1581
+ top_k:
1582
+ value: 50
1583
+ top_logprobs:
1584
+ value: null
1585
+ top_p:
1586
+ value: 0.9
1587
+ torch_dtype:
1588
+ value: torch.bfloat16
1589
+ torch_fsdp2_reshard_after_forward:
1590
+ value: true
1591
+ tp_comm_bootstrap_backend:
1592
+ value: nccl
1593
+ tp_comm_bulk_dgrad:
1594
+ value: true
1595
+ tp_comm_bulk_wgrad:
1596
+ value: true
1597
+ tp_comm_overlap:
1598
+ value: false
1599
+ tp_comm_overlap_ag:
1600
+ value: true
1601
+ tp_comm_overlap_cfg:
1602
+ value: null
1603
+ tp_comm_overlap_rs:
1604
+ value: true
1605
+ tp_comm_overlap_rs_dgrad:
1606
+ value: false
1607
+ tp_comm_split_ag:
1608
+ value: true
1609
+ tp_comm_split_rs:
1610
+ value: true
1611
+ train_data_path:
1612
+ value: null
1613
+ train_dataloader_shuffle:
1614
+ value: true
1615
+ train_iters:
1616
+ value: 12700
1617
+ train_samples:
1618
+ value: null
1619
+ train_sync_interval:
1620
+ value: null
1621
+ train_type:
1622
+ value: full
1623
+ trainable_parameters:
1624
+ value: []
1625
+ trainable_parameters_regex:
1626
+ value: null
1627
+ transformer_impl:
1628
+ value: transformer_engine
1629
+ transformer_pipeline_model_parallel_size:
1630
+ value: 1
1631
+ truncation_strategy:
1632
+ value: right
1633
+ tuner_backend:
1634
+ value: peft
1635
+ undesirable_weight:
1636
+ value: 1
1637
+ untie_embeddings_and_output_weights:
1638
+ value: false
1639
+ use_chat_template:
1640
+ value: false
1641
+ use_checkpoint_args:
1642
+ value: false
1643
+ use_checkpoint_opt_param_scheduler:
1644
+ value: false
1645
+ use_cpu_initialization:
1646
+ value: null
1647
+ use_dist_ckpt:
1648
+ value: true
1649
+ use_dist_ckpt_deprecated:
1650
+ value: false
1651
+ use_distributed_optimizer:
1652
+ value: true
1653
+ use_flash_attn:
1654
+ value: false
1655
+ use_fused_weighted_squared_relu:
1656
+ value: false
1657
+ use_hf:
1658
+ value: true
1659
+ use_legacy_models:
1660
+ value: false
1661
+ use_megatron_fsdp:
1662
+ value: false
1663
+ use_mp_args_from_checkpoint_args:
1664
+ value: false
1665
+ use_one_sent_docs:
1666
+ value: false
1667
+ use_persistent_ckpt_worker:
1668
+ value: false
1669
+ use_precision_aware_optimizer:
1670
+ value: true
1671
+ use_pytorch_profiler:
1672
+ value: false
1673
+ use_ray:
1674
+ value: false
1675
+ use_ring_exchange_p2p:
1676
+ value: false
1677
+ use_rope_scaling:
1678
+ value: false
1679
+ use_rotary_position_embeddings:
1680
+ value: false
1681
+ use_rslora:
1682
+ value: false
1683
+ use_shared_expert_gate:
1684
+ value: false
1685
+ use_sharp:
1686
+ value: false
1687
+ use_swift_lora:
1688
+ value: false
1689
+ use_tokenizer_model_from_checkpoint_args:
1690
+ value: true
1691
+ use_torch_fsdp2:
1692
+ value: false
1693
+ use_torch_optimizer_for_cpu_offload:
1694
+ value: false
1695
+ use_tp_pp_dp_mapping:
1696
+ value: false
1697
+ use_vllm:
1698
+ value: true
1699
+ v_head_dim:
1700
+ value: 128
1701
+ val_dataset:
1702
+ value: []
1703
+ val_dataset_shuffle:
1704
+ value: false
1705
+ valid_data_path:
1706
+ value: null
1707
+ variable_seq_lengths:
1708
+ value: false
1709
+ virtual_pipeline_model_parallel_size:
1710
+ value: null
1711
+ vision_backbone_type:
1712
+ value: vit
1713
+ vision_pretraining:
1714
+ value: false
1715
+ vision_pretraining_type:
1716
+ value: classify
1717
+ vit_gradient_checkpointing:
1718
+ value: true
1719
+ vit_lr:
1720
+ value: null
1721
+ vllm_disable_cascade_attn:
1722
+ value: false
1723
+ vllm_enable_prefix_caching:
1724
+ value: true
1725
+ vllm_enforce_eager:
1726
+ value: false
1727
+ vllm_engine_kwargs:
1728
+ value: null
1729
+ vllm_gpu_memory_utilization:
1730
+ value: 0.9
1731
+ vllm_limit_mm_per_prompt:
1732
+ value: null
1733
+ vllm_max_model_len:
1734
+ value: null
1735
+ vllm_max_num_seqs:
1736
+ value: null
1737
+ vllm_mm_processor_cache_gb:
1738
+ value: null
1739
+ vllm_mode:
1740
+ value: null
1741
+ vllm_server_base_url:
1742
+ value: null
1743
+ vllm_server_group_port:
1744
+ value: null
1745
+ vllm_server_host:
1746
+ value: null
1747
+ vllm_server_pass_dataset:
1748
+ value: false
1749
+ vllm_server_port:
1750
+ value:
1751
+ - 8000
1752
+ vllm_server_timeout:
1753
+ value: 240
1754
+ vllm_tensor_parallel_size:
1755
+ value: 1
1756
+ vocab_extra_ids:
1757
+ value: 0
1758
+ vocab_file:
1759
+ value: null
1760
+ vocab_size:
1761
+ value: null
1762
+ wandb_exp_name:
1763
+ value: tlt
1764
+ wandb_log_unique_prompts:
1765
+ value: null
1766
+ wandb_project:
1767
+ value: plt
1768
+ wandb_save_dir:
1769
+ value: ""
1770
+ weight_decay:
1771
+ value: 0.1
1772
+ weight_decay_incr_style:
1773
+ value: constant
1774
+ wgrad_deferral_limit:
1775
+ value: 0
1776
+ world_size:
1777
+ value: 8
1778
+ yaml_cfg:
1779
+ value: null
wandb/wandb/run-20251224_123436-jnwx1i3g/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/wandb/run-20251224_123436-jnwx1i3g/files/requirements.txt ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pip==25.3
2
+ setuptools==80.9.0
3
+ wheel==0.45.1
4
+ hf_transfer==0.1.9
5
+ sortedcontainers==2.4.0
6
+ pytz==2025.2
7
+ pydub==0.25.1
8
+ jieba==0.42.1
9
+ crcmod==1.7
10
+ cpm-kernels==1.0.11
11
+ brotli==1.2.0
12
+ antlr4-python3-runtime==4.9.3
13
+ addict==2.4.0
14
+ zstandard==0.25.0
15
+ zipp==3.23.0
16
+ xxhash==3.6.0
17
+ Werkzeug==3.1.4
18
+ websockets==15.0.1
19
+ uvicorn==0.40.0
20
+ tzdata==2025.3
21
+ typing-inspection==0.4.2
22
+ tomlkit==0.13.3
23
+ tensorboard-data-server==0.7.2
24
+ sniffio==1.3.1
25
+ smmap==5.0.2
26
+ simplejson==3.20.2
27
+ sentry-sdk==2.48.0
28
+ semantic-version==2.10.0
29
+ scipy==1.16.3
30
+ safetensors==0.7.0
31
+ ruff==0.14.10
32
+ rouge==1.0.1
33
+ regex==2025.11.3
34
+ python-multipart==0.0.21
35
+ pyparsing==3.3.1
36
+ pydantic_core==2.41.4
37
+ pycryptodome==3.23.0
38
+ pycparser==2.23
39
+ pyarrow==22.0.0
40
+ protobuf==6.33.2
41
+ propcache==0.4.1
42
+ pillow==11.3.0
43
+ orjson==3.11.5
44
+ omegaconf==2.3.0
45
+ multidict==6.7.0
46
+ mdurl==0.1.2
47
+ Markdown==3.10
48
+ kiwisolver==1.4.9
49
+ json_repair==0.54.3
50
+ joblib==1.5.3
51
+ jmespath==0.10.0
52
+ jiter==0.12.0
53
+ grpcio==1.76.0
54
+ groovy==0.1.2
55
+ future==1.0.0
56
+ trl==0.24.0
57
+ fsspec==2025.3.0
58
+ frozenlist==1.8.0
59
+ fonttools==4.61.1
60
+ ffmpy==1.0.0
61
+ einops==0.8.1
62
+ distro==1.9.0
63
+ dill==0.3.8
64
+ dacite==1.9.2
65
+ cycler==0.12.1
66
+ contourpy==1.3.3
67
+ attrs==25.4.0
68
+ attrdict==2.0.1
69
+ annotated-types==0.7.0
70
+ annotated-doc==0.0.4
71
+ aiohappyeyeballs==2.6.1
72
+ aiofiles==24.1.0
73
+ absl-py==2.3.1
74
+ yarl==1.22.0
75
+ tiktoken==0.12.0
76
+ tensorboard==2.20.0
77
+ starlette==0.50.0
78
+ pydantic==2.12.3
79
+ pandas==2.3.3
80
+ nltk==3.9.2
81
+ multiprocess==0.70.16
82
+ modelscope==1.33.0
83
+ matplotlib==3.10.8
84
+ markdown-it-py==4.0.0
85
+ importlib_metadata==8.7.1
86
+ huggingface-hub==0.36.0
87
+ gitdb==4.0.12
88
+ cffi==2.0.0
89
+ binpacking==1.5.2
90
+ aiosignal==1.4.0
91
+ tokenizers==0.22.1
92
+ safehttpx==0.1.7
93
+ rich==14.2.0
94
+ openai==2.14.0
95
+ gradio_client==1.14.0
96
+ GitPython==3.1.45
97
+ fastapi==0.127.0
98
+ cryptography==46.0.3
99
+ aiohttp==3.13.2
100
+ wandb==0.23.1
101
+ typer==0.20.1
102
+ transformers==4.57.3
103
+ aliyun-python-sdk-core==2.16.0
104
+ accelerate==1.12.0
105
+ transformers-stream-generator==0.0.5
106
+ peft==0.18.0
107
+ gradio==5.50.0
108
+ datasets==3.6.0
109
+ aliyun-python-sdk-kms==2.16.5
110
+ oss2==2.19.1
111
+ ms_swift==3.12.0.dev0
112
+ liger_kernel==0.6.4
113
+ pybind11==3.0.1
114
+ transformer_engine==2.10.0
115
+ ml_dtypes==0.5.4
116
+ onnx==1.20.0
117
+ transformer_engine_cu12==2.10.0
118
+ onnx-ir==0.1.13
119
+ onnxscript==0.5.7
120
+ transformer_engine_torch==2.10.0
121
+ apex==0.1
122
+ numpy==1.26.4
123
+ megatron-core==0.15.0
124
+ flash_attn==2.8.3
125
+ charset-normalizer==3.4.4
126
+ Jinja2==3.1.6
127
+ MarkupSafe==3.0.3
128
+ mpmath==1.3.0
129
+ networkx==3.6.1
130
+ nvidia-cublas-cu12==12.8.4.1
131
+ nvidia-cuda-cupti-cu12==12.8.90
132
+ nvidia-cuda-nvrtc-cu12==12.8.93
133
+ nvidia-cuda-runtime-cu12==12.8.90
134
+ nvidia-cudnn-cu12==9.10.2.21
135
+ nvidia-cufft-cu12==11.3.3.83
136
+ nvidia-cufile-cu12==1.13.1.3
137
+ nvidia-curand-cu12==10.3.9.90
138
+ nvidia-cusolver-cu12==11.7.3.90
139
+ nvidia-cusparse-cu12==12.5.8.93
140
+ nvidia-cusparselt-cu12==0.7.1
141
+ nvidia-nccl-cu12==2.27.5
142
+ nvidia-nvjitlink-cu12==12.8.93
143
+ nvidia-nvshmem-cu12==3.3.20
144
+ nvidia-nvtx-cu12==12.8.90
145
+ requests==2.32.5
146
+ sentencepiece==0.2.1
147
+ sympy==1.14.0
148
+ torch==2.9.1+cu128
149
+ torchaudio==2.9.1+cu128
150
+ torchcodec==0.9.1
151
+ torchdata==0.10.0
152
+ torchtext==0.6.0
153
+ torchvision==0.24.1+cu128
154
+ triton==3.5.1
155
+ urllib3==2.6.2
156
+ anyio==4.12.0
157
+ asttokens==3.0.1
158
+ certifi==2025.11.12
159
+ click==8.3.1
160
+ comm==0.2.3
161
+ debugpy==1.8.18
162
+ decorator==5.2.1
163
+ executing==2.2.1
164
+ filelock==3.20.0
165
+ h11==0.16.0
166
+ hf-xet==1.2.0
167
+ httpcore==1.0.9
168
+ httpx==0.28.1
169
+ idna==3.11
170
+ ipykernel==7.1.0
171
+ ipython==9.8.0
172
+ ipython_pygments_lexers==1.1.1
173
+ ipywidgets==8.1.8
174
+ jedi==0.19.2
175
+ jupyter_client==8.7.0
176
+ jupyter_core==5.9.1
177
+ jupyterlab_widgets==3.0.16
178
+ matplotlib-inline==0.2.1
179
+ nest-asyncio==1.6.0
180
+ packaging==25.0
181
+ parso==0.8.5
182
+ pexpect==4.9.0
183
+ platformdirs==4.5.1
184
+ prompt_toolkit==3.0.52
185
+ psutil==7.1.3
186
+ ptyprocess==0.7.0
187
+ pure_eval==0.2.3
188
+ Pygments==2.19.2
189
+ python-dateutil==2.9.0.post0
190
+ PyYAML==6.0.3
191
+ pyzmq==27.1.0
192
+ shellingham==1.5.4
193
+ six==1.17.0
194
+ stack-data==0.6.3
195
+ tornado==6.5.3
196
+ tqdm==4.67.1
197
+ traitlets==5.14.3
198
+ typer-slim==0.20.0
199
+ typing_extensions==4.15.0
200
+ wcwidth==0.2.14
201
+ widgetsnbextension==4.0.15
202
+ autocommand==2.2.2
203
+ backports.tarfile==1.2.0
204
+ importlib_metadata==8.0.0
205
+ inflect==7.3.1
206
+ jaraco.collections==5.1.0
207
+ jaraco.context==5.3.0
208
+ jaraco.functools==4.0.1
209
+ jaraco.text==3.12.1
210
+ more-itertools==10.3.0
211
+ packaging==24.2
212
+ platformdirs==4.2.2
213
+ tomli==2.0.1
214
+ typeguard==4.3.0
215
+ typing_extensions==4.12.2
216
+ wheel==0.45.1
217
+ zipp==3.19.2
wandb/wandb/run-20251224_123436-jnwx1i3g/files/wandb-metadata.json ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-58-generic-x86_64-with-glibc2.39",
3
+ "python": "CPython 3.12.12",
4
+ "startedAt": "2025-12-24T12:34:36.002087Z",
5
+ "args": [
6
+ "--seed",
7
+ "42",
8
+ "--micro-batch-size",
9
+ "4",
10
+ "--global-batch-size",
11
+ "256",
12
+ "--recompute-granularity",
13
+ "full",
14
+ "--recompute-method",
15
+ "uniform",
16
+ "--recompute-num-layers",
17
+ "1",
18
+ "--recompute-modules",
19
+ "core_attn",
20
+ "--train-iters",
21
+ "12700",
22
+ "--log-interval",
23
+ "1",
24
+ "--tensorboard-dir",
25
+ "/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/runs",
26
+ "--cross-entropy-loss-fusion",
27
+ "--cross-entropy-fusion-impl",
28
+ "native",
29
+ "--calculate-per-token-loss",
30
+ "--attention-backend",
31
+ "flash",
32
+ "--optimizer",
33
+ "adam",
34
+ "--optimizer-offload-fraction",
35
+ "1.0",
36
+ "--use-precision-aware-optimizer",
37
+ "--main-grads-dtype",
38
+ "fp32",
39
+ "--main-params-dtype",
40
+ "fp32",
41
+ "--exp-avg-dtype",
42
+ "fp32",
43
+ "--exp-avg-sq-dtype",
44
+ "fp32",
45
+ "--dataloader-type",
46
+ "cyclic",
47
+ "--manual-gc-interval",
48
+ "0",
49
+ "--lr",
50
+ "0.0001",
51
+ "--lr-decay-style",
52
+ "cosine",
53
+ "--lr-warmup-iters",
54
+ "0",
55
+ "--lr-warmup-fraction",
56
+ "0.05",
57
+ "--min-lr",
58
+ "3e-06",
59
+ "--weight-decay",
60
+ "0.1",
61
+ "--clip-grad",
62
+ "1.0",
63
+ "--adam-beta1",
64
+ "0.9",
65
+ "--adam-beta2",
66
+ "0.95",
67
+ "--adam-eps",
68
+ "1e-08",
69
+ "--sgd-momentum",
70
+ "0.9",
71
+ "--save",
72
+ "/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139",
73
+ "--save-interval",
74
+ "100",
75
+ "--load",
76
+ "/workspace/halcyon-recipe2/patch",
77
+ "--finetune",
78
+ "--ckpt-format",
79
+ "torch_dist",
80
+ "--no-initialization",
81
+ "--auto-detect-ckpt-format",
82
+ "--exit-on-missing-checkpoint",
83
+ "--distributed-backend",
84
+ "nccl",
85
+ "--local-rank",
86
+ "7",
87
+ "--use-distributed-optimizer",
88
+ "--tensor-model-parallel-size",
89
+ "1",
90
+ "--pipeline-model-parallel-size",
91
+ "1",
92
+ "--context-parallel-size",
93
+ "1",
94
+ "--overlap-grad-reduce",
95
+ "--overlap-param-gather",
96
+ "--distributed-timeout-minutes",
97
+ "300000",
98
+ "--num-layers",
99
+ "28",
100
+ "--hidden-size",
101
+ "1024",
102
+ "--ffn-hidden-size",
103
+ "3072",
104
+ "--num-attention-heads",
105
+ "16",
106
+ "--group-query-attention",
107
+ "--num-query-groups",
108
+ "8",
109
+ "--max-position-embeddings",
110
+ "32768",
111
+ "--position-embedding-type",
112
+ "rope",
113
+ "--rotary-base",
114
+ "1000000",
115
+ "--rotary-percent",
116
+ "1.0",
117
+ "--normalization",
118
+ "RMSNorm",
119
+ "--norm-epsilon",
120
+ "1e-06",
121
+ "--swiglu",
122
+ "--disable-bias-linear",
123
+ "--attention-dropout",
124
+ "0.0",
125
+ "--hidden-dropout",
126
+ "0.0",
127
+ "--kv-channels",
128
+ "128",
129
+ "--qk-layernorm",
130
+ "--transformer-impl",
131
+ "transformer_engine",
132
+ "--moe-layer-freq",
133
+ "1",
134
+ "--moe-router-topk",
135
+ "2",
136
+ "--moe-router-dtype",
137
+ "fp32",
138
+ "--moe-router-score-function",
139
+ "softmax",
140
+ "--moe-router-load-balancing-type",
141
+ "aux_loss",
142
+ "--expert-model-parallel-size",
143
+ "1",
144
+ "--expert-tensor-parallel-size",
145
+ "1",
146
+ "--moe-token-dispatcher-type",
147
+ "alltoall",
148
+ "--moe-grouped-gemm",
149
+ "--moe-aux-loss-coeff",
150
+ "0.0",
151
+ "--moe-token-drop-policy",
152
+ "probs",
153
+ "--kv-lora-rank",
154
+ "32",
155
+ "--qk-head-dim",
156
+ "128",
157
+ "--qk-pos-emb-head-dim",
158
+ "64",
159
+ "--mtp-loss-scaling-factor",
160
+ "0.1",
161
+ "--fp8-recipe",
162
+ "delayed",
163
+ "--fp8-amax-history-len",
164
+ "1024",
165
+ "--fp8-amax-compute-algo",
166
+ "max",
167
+ "--bf16",
168
+ "--attention-softmax-in-fp32",
169
+ "--tensorboard-log-interval",
170
+ "1",
171
+ "--tensorboard-queue-size",
172
+ "50",
173
+ "--log-timers-to-tensorboard",
174
+ "--log-validation-ppl-to-tensorboard",
175
+ "--log-memory-to-tensorboard",
176
+ "--logging-level",
177
+ "20",
178
+ "--wandb-project",
179
+ "plt",
180
+ "--wandb-exp-name",
181
+ "tlt",
182
+ "--eval-iters",
183
+ "-1",
184
+ "--eval-interval",
185
+ "100",
186
+ "--seq-length",
187
+ "4096",
188
+ "--num-workers",
189
+ "32"
190
+ ],
191
+ "program": "/workspace/halcyon-recipe2/swift/cli/_megatron/pt.py",
192
+ "codePath": "swift/cli/_megatron/pt.py",
193
+ "codePathLocal": "swift/cli/_megatron/pt.py",
194
+ "git": {
195
+ "remote": "https://github.com/weak-kajuma/halcyon-recipe2.git",
196
+ "commit": "ea7cc214b68fb511dd83bff83a504b7f43053577"
197
+ },
198
+ "email": "kazuma826826@gmail.com",
199
+ "root": "/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/wandb",
200
+ "host": "13f078589dd5",
201
+ "executable": "/venv/main/bin/python3.12",
202
+ "cpu_count": 72,
203
+ "cpu_count_logical": 144,
204
+ "gpu": "NVIDIA GeForce RTX 5090",
205
+ "gpu_count": 8,
206
+ "disk": {
207
+ "/": {
208
+ "total": "7669363507200",
209
+ "used": "1006439596032"
210
+ }
211
+ },
212
+ "memory": {
213
+ "total": "540643295232"
214
+ },
215
+ "gpu_nvidia": [
216
+ {
217
+ "name": "NVIDIA GeForce RTX 5090",
218
+ "memoryTotal": "34190917632",
219
+ "cudaCores": 21760,
220
+ "architecture": "Blackwell",
221
+ "uuid": "GPU-32487176-4d38-3e1d-696d-ce9cd9f7e666"
222
+ },
223
+ {
224
+ "name": "NVIDIA GeForce RTX 5090",
225
+ "memoryTotal": "34190917632",
226
+ "cudaCores": 21760,
227
+ "architecture": "Blackwell",
228
+ "uuid": "GPU-19c921f5-05b1-51fa-fb5f-e08deed52308"
229
+ },
230
+ {
231
+ "name": "NVIDIA GeForce RTX 5090",
232
+ "memoryTotal": "34190917632",
233
+ "cudaCores": 21760,
234
+ "architecture": "Blackwell",
235
+ "uuid": "GPU-dd59a530-3e2a-2e22-24e1-54e3ff1082b7"
236
+ },
237
+ {
238
+ "name": "NVIDIA GeForce RTX 5090",
239
+ "memoryTotal": "34190917632",
240
+ "cudaCores": 21760,
241
+ "architecture": "Blackwell",
242
+ "uuid": "GPU-5ce39532-f9c0-ffd8-19f0-7bb854ee835e"
243
+ },
244
+ {
245
+ "name": "NVIDIA GeForce RTX 5090",
246
+ "memoryTotal": "34190917632",
247
+ "cudaCores": 21760,
248
+ "architecture": "Blackwell",
249
+ "uuid": "GPU-6a7a11ff-b8b4-6532-d873-b1003d6fe3f0"
250
+ },
251
+ {
252
+ "name": "NVIDIA GeForce RTX 5090",
253
+ "memoryTotal": "34190917632",
254
+ "cudaCores": 21760,
255
+ "architecture": "Blackwell",
256
+ "uuid": "GPU-4daa8877-6665-3cb8-e22f-1f0bf8189c80"
257
+ },
258
+ {
259
+ "name": "NVIDIA GeForce RTX 5090",
260
+ "memoryTotal": "34190917632",
261
+ "cudaCores": 21760,
262
+ "architecture": "Blackwell",
263
+ "uuid": "GPU-859daa9f-b13c-2da4-1dc4-271ee55b347c"
264
+ },
265
+ {
266
+ "name": "NVIDIA GeForce RTX 5090",
267
+ "memoryTotal": "34190917632",
268
+ "cudaCores": 21760,
269
+ "architecture": "Blackwell",
270
+ "uuid": "GPU-63d123a0-a5fe-1583-9ab7-42bba078df53"
271
+ }
272
+ ],
273
+ "cudaVersion": "13.0",
274
+ "writerId": "bpm0umriqwkux3bqnxj3du3nf1ugtqjj"
275
+ }
wandb/wandb/run-20251224_123436-jnwx1i3g/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_step":12700,"samples vs steps":3251200,"_runtime":62496.661638386,"lm loss":2.009126901626587,"iteration-time":4.912550926208496,"batch-size":256,"loss-scale":1,"_timestamp":1.7666421712995458e+09,"learning-rate":3.000000106112566e-06,"grad-norm":0.30827680230140686,"_wandb":{"runtime":62496}}
wandb/wandb/run-20251224_123436-jnwx1i3g/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-24T12:34:36.065502458Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpkpbd5yau/port-21342.txt","pid":21342,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-12-24T12:34:36.067166944Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":21342}
3
+ {"time":"2025-12-24T12:34:36.067150361Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-21342-24126-548586279/socket","Net":"unix"}}
4
+ {"time":"2025-12-24T12:34:36.246493805Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-12-24T12:34:36.254070871Z","level":"INFO","msg":"handleInformInit: received","streamId":"jnwx1i3g","id":"1(@)"}
6
+ {"time":"2025-12-24T12:34:36.629618523Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"jnwx1i3g","id":"1(@)"}
7
+ {"time":"2025-12-25T05:56:14.464641812Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"jnwx1i3g","id":"1(@)"}
8
+ {"time":"2025-12-25T05:56:14.469888589Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"jnwx1i3g","id":"1(@)"}
9
+ {"time":"2025-12-25T05:56:20.66699945Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
10
+ {"time":"2025-12-25T05:56:20.667049804Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
11
+ {"time":"2025-12-25T05:56:20.667059956Z","level":"INFO","msg":"server is shutting down"}
12
+ {"time":"2025-12-25T05:56:20.667169592Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-21342-24126-548586279/socket","Net":"unix"}}
13
+ {"time":"2025-12-25T05:56:20.667184316Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
14
+ {"time":"2025-12-25T05:56:20.667327519Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
15
+ {"time":"2025-12-25T05:56:20.667338025Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
16
+ {"time":"2025-12-25T05:56:20.667352355Z","level":"INFO","msg":"server is closed"}
wandb/wandb/run-20251224_123436-jnwx1i3g/logs/debug-internal.log ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-12-24T12:34:36.254351955Z","level":"INFO","msg":"stream: starting","core version":"0.23.1"}
2
+ {"time":"2025-12-24T12:34:36.629472532Z","level":"INFO","msg":"stream: created new stream","id":"jnwx1i3g"}
3
+ {"time":"2025-12-24T12:34:36.629612876Z","level":"INFO","msg":"stream: started","id":"jnwx1i3g"}
4
+ {"time":"2025-12-24T12:34:36.629619447Z","level":"INFO","msg":"handler: started","stream_id":"jnwx1i3g"}
5
+ {"time":"2025-12-24T12:34:36.629720929Z","level":"INFO","msg":"sender: started","stream_id":"jnwx1i3g"}
6
+ {"time":"2025-12-24T12:34:36.629753337Z","level":"INFO","msg":"writer: started","stream_id":"jnwx1i3g"}
7
+ {"time":"2025-12-24T12:43:42.584434646Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
8
+ {"time":"2025-12-24T12:43:42.585040454Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
9
+ {"time":"2025-12-24T12:51:53.304049519Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
10
+ {"time":"2025-12-24T12:51:53.304246963Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
11
+ {"time":"2025-12-24T13:00:03.564144396Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
12
+ {"time":"2025-12-24T13:00:03.56447829Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
13
+ {"time":"2025-12-24T13:08:14.558022794Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
14
+ {"time":"2025-12-24T13:08:14.5581774Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
15
+ {"time":"2025-12-24T13:16:25.335922211Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
16
+ {"time":"2025-12-24T13:16:25.336311204Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
17
+ {"time":"2025-12-24T13:24:35.625138091Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
18
+ {"time":"2025-12-24T13:24:35.625500792Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
19
+ {"time":"2025-12-24T13:32:44.717934227Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
20
+ {"time":"2025-12-24T13:32:44.718329206Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
21
+ {"time":"2025-12-24T13:40:54.863924529Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
22
+ {"time":"2025-12-24T13:40:54.864133024Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
23
+ {"time":"2025-12-24T13:49:06.109993778Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
24
+ {"time":"2025-12-24T13:49:06.110370855Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
25
+ {"time":"2025-12-24T13:57:16.591602528Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
26
+ {"time":"2025-12-24T13:57:16.591806752Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
27
+ {"time":"2025-12-24T14:05:26.943714374Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
28
+ {"time":"2025-12-24T14:05:26.944085153Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
29
+ {"time":"2025-12-24T14:13:37.420574607Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
30
+ {"time":"2025-12-24T14:13:37.420932956Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
31
+ {"time":"2025-12-24T14:21:46.523528811Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
32
+ {"time":"2025-12-24T14:21:46.523877521Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
33
+ {"time":"2025-12-24T14:29:57.069808445Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
34
+ {"time":"2025-12-24T14:29:57.070179761Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
35
+ {"time":"2025-12-24T14:38:07.967571682Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
36
+ {"time":"2025-12-24T14:38:07.967803509Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
37
+ {"time":"2025-12-24T14:46:20.208911528Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
38
+ {"time":"2025-12-24T14:46:20.209263601Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
39
+ {"time":"2025-12-24T14:54:30.621968556Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
40
+ {"time":"2025-12-24T14:54:30.622280257Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
41
+ {"time":"2025-12-24T15:02:42.550303767Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
42
+ {"time":"2025-12-24T15:02:42.550629852Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
43
+ {"time":"2025-12-24T15:10:52.245924491Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
44
+ {"time":"2025-12-24T15:10:52.246280808Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
45
+ {"time":"2025-12-24T15:19:00.372310237Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
46
+ {"time":"2025-12-24T15:19:00.374238029Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
47
+ {"time":"2025-12-24T15:27:10.911178895Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
48
+ {"time":"2025-12-24T15:27:10.911505559Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
49
+ {"time":"2025-12-24T15:35:19.936356498Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
50
+ {"time":"2025-12-24T15:35:19.936680052Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
51
+ {"time":"2025-12-24T15:43:29.284803428Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
52
+ {"time":"2025-12-24T15:43:29.285168114Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
53
+ {"time":"2025-12-24T15:51:39.188922955Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
54
+ {"time":"2025-12-24T15:51:39.189302898Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
55
+ {"time":"2025-12-24T15:59:47.388163574Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
56
+ {"time":"2025-12-24T15:59:47.388480368Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
57
+ {"time":"2025-12-24T16:07:56.763401597Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
58
+ {"time":"2025-12-24T16:07:56.76373407Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
59
+ {"time":"2025-12-24T16:16:07.65283364Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
60
+ {"time":"2025-12-24T16:16:07.653055175Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
61
+ {"time":"2025-12-24T16:24:17.518829539Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
62
+ {"time":"2025-12-24T16:24:17.519219117Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
63
+ {"time":"2025-12-24T16:32:27.874143459Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
64
+ {"time":"2025-12-24T16:32:27.874487207Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
65
+ {"time":"2025-12-24T16:40:37.983683733Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
66
+ {"time":"2025-12-24T16:40:37.984023837Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
67
+ {"time":"2025-12-24T16:48:48.345448271Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
68
+ {"time":"2025-12-24T16:48:48.345778297Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
69
+ {"time":"2025-12-24T16:56:59.153705518Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
70
+ {"time":"2025-12-24T16:56:59.154038502Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
71
+ {"time":"2025-12-24T17:05:11.162594185Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
72
+ {"time":"2025-12-24T17:05:11.162915092Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
73
+ {"time":"2025-12-24T17:13:20.786866394Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
74
+ {"time":"2025-12-24T17:13:20.787245898Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
75
+ {"time":"2025-12-24T17:21:30.700064755Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
76
+ {"time":"2025-12-24T17:21:30.70039016Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
77
+ {"time":"2025-12-24T17:29:40.974243157Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
78
+ {"time":"2025-12-24T17:29:40.974568866Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
79
+ {"time":"2025-12-24T17:37:51.016380953Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
80
+ {"time":"2025-12-24T17:37:51.016705232Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
81
+ {"time":"2025-12-24T17:46:01.693995869Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
82
+ {"time":"2025-12-24T17:46:01.694332853Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
83
+ {"time":"2025-12-24T17:54:12.669825778Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
84
+ {"time":"2025-12-24T17:54:12.670236873Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
85
+ {"time":"2025-12-24T18:02:23.894617076Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
86
+ {"time":"2025-12-24T18:02:23.894867356Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
87
+ {"time":"2025-12-24T18:10:34.692364906Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
88
+ {"time":"2025-12-24T18:10:34.692517453Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
89
+ {"time":"2025-12-24T18:18:45.299371338Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
90
+ {"time":"2025-12-24T18:18:45.299686654Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
91
+ {"time":"2025-12-24T18:26:55.63456397Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
92
+ {"time":"2025-12-24T18:26:55.634882583Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
93
+ {"time":"2025-12-24T18:35:05.874459884Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
94
+ {"time":"2025-12-24T18:35:05.875666521Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
95
+ {"time":"2025-12-24T18:43:16.877827268Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
96
+ {"time":"2025-12-24T18:43:16.878202101Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
97
+ {"time":"2025-12-24T18:51:26.582238369Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
98
+ {"time":"2025-12-24T18:51:26.582564827Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
99
+ {"time":"2025-12-24T18:59:36.695031564Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
100
+ {"time":"2025-12-24T18:59:36.695392522Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
101
+ {"time":"2025-12-24T19:07:46.806583963Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
102
+ {"time":"2025-12-24T19:07:46.806971962Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
103
+ {"time":"2025-12-24T19:15:57.691287153Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
104
+ {"time":"2025-12-24T19:15:57.691615356Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
105
+ {"time":"2025-12-24T19:24:06.859357828Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
106
+ {"time":"2025-12-24T19:24:06.859682526Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
107
+ {"time":"2025-12-24T19:32:17.073718182Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
108
+ {"time":"2025-12-24T19:32:17.074732163Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
109
+ {"time":"2025-12-24T19:40:28.693074445Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
110
+ {"time":"2025-12-24T19:40:28.693416512Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
111
+ {"time":"2025-12-24T19:48:40.093961976Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
112
+ {"time":"2025-12-24T19:48:40.094302557Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
113
+ {"time":"2025-12-24T19:56:50.657707896Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
114
+ {"time":"2025-12-24T19:56:50.658063296Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
115
+ {"time":"2025-12-24T20:04:59.961877315Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
116
+ {"time":"2025-12-24T20:04:59.962242367Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
117
+ {"time":"2025-12-24T20:13:09.787644404Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
118
+ {"time":"2025-12-24T20:13:09.787999328Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
119
+ {"time":"2025-12-24T20:21:20.333968946Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
120
+ {"time":"2025-12-24T20:21:20.334269312Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
121
+ {"time":"2025-12-24T20:29:30.053711709Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
122
+ {"time":"2025-12-24T20:29:30.054100568Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
123
+ {"time":"2025-12-24T20:37:39.665343253Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
124
+ {"time":"2025-12-24T20:37:39.66633448Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
125
+ {"time":"2025-12-24T20:45:50.003211645Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
126
+ {"time":"2025-12-24T20:45:50.003534079Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
127
+ {"time":"2025-12-24T20:54:01.715666664Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
128
+ {"time":"2025-12-24T20:54:01.716017645Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
129
+ {"time":"2025-12-24T21:02:11.738223974Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
130
+ {"time":"2025-12-24T21:02:11.738547408Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
131
+ {"time":"2025-12-24T21:10:23.891058434Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
132
+ {"time":"2025-12-24T21:10:23.891412255Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
133
+ {"time":"2025-12-24T21:18:36.292452235Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
134
+ {"time":"2025-12-24T21:18:36.292823337Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
135
+ {"time":"2025-12-24T21:26:49.034015246Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
136
+ {"time":"2025-12-24T21:26:49.034419061Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
137
+ {"time":"2025-12-24T21:35:01.603575566Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
138
+ {"time":"2025-12-24T21:35:01.603971538Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
139
+ {"time":"2025-12-24T21:43:13.316216606Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
140
+ {"time":"2025-12-24T21:43:13.316541769Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
141
+ {"time":"2025-12-24T21:51:25.997399812Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
142
+ {"time":"2025-12-24T21:51:25.997667925Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
143
+ {"time":"2025-12-24T21:59:39.789298033Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
144
+ {"time":"2025-12-24T21:59:39.790207643Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
145
+ {"time":"2025-12-24T22:07:54.143020496Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
146
+ {"time":"2025-12-24T22:07:54.143366049Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
147
+ {"time":"2025-12-24T22:16:08.452133657Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
148
+ {"time":"2025-12-24T22:16:08.452476455Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
149
+ {"time":"2025-12-24T22:24:23.379531769Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
150
+ {"time":"2025-12-24T22:24:23.379848632Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
151
+ {"time":"2025-12-24T22:32:38.276795958Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
152
+ {"time":"2025-12-24T22:32:38.277168288Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
153
+ {"time":"2025-12-24T22:40:52.266886392Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
154
+ {"time":"2025-12-24T22:40:52.267278516Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
155
+ {"time":"2025-12-24T22:49:09.028682461Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
156
+ {"time":"2025-12-24T22:49:09.029040894Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
157
+ {"time":"2025-12-24T22:57:25.755586067Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
158
+ {"time":"2025-12-24T22:57:25.755904861Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
159
+ {"time":"2025-12-24T23:05:40.737128657Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
160
+ {"time":"2025-12-24T23:05:40.737310219Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
161
+ {"time":"2025-12-24T23:13:56.773269428Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
162
+ {"time":"2025-12-24T23:13:56.773691076Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
163
+ {"time":"2025-12-24T23:22:15.672018155Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
164
+ {"time":"2025-12-24T23:22:15.6723556Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
165
+ {"time":"2025-12-24T23:30:35.439905149Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
166
+ {"time":"2025-12-24T23:30:35.440310014Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
167
+ {"time":"2025-12-24T23:38:59.310113201Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
168
+ {"time":"2025-12-24T23:38:59.310284434Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
169
+ {"time":"2025-12-24T23:47:20.026480138Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
170
+ {"time":"2025-12-24T23:47:20.026806646Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
171
+ {"time":"2025-12-24T23:55:32.645270845Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
172
+ {"time":"2025-12-24T23:55:32.645481056Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
173
+ {"time":"2025-12-25T00:03:45.918216315Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
174
+ {"time":"2025-12-25T00:03:45.918431055Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
175
+ {"time":"2025-12-25T00:11:57.5112067Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
176
+ {"time":"2025-12-25T00:11:57.511559149Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
177
+ {"time":"2025-12-25T00:20:09.569694025Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
178
+ {"time":"2025-12-25T00:20:09.570080729Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
179
+ {"time":"2025-12-25T00:28:20.955104121Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
180
+ {"time":"2025-12-25T00:28:20.956071728Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
181
+ {"time":"2025-12-25T00:36:33.563783853Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
182
+ {"time":"2025-12-25T00:36:33.564115691Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
183
+ {"time":"2025-12-25T00:44:43.757782469Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
184
+ {"time":"2025-12-25T00:44:43.758006362Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
185
+ {"time":"2025-12-25T00:52:58.268283552Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
186
+ {"time":"2025-12-25T00:52:58.268608464Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
187
+ {"time":"2025-12-25T01:01:09.687704628Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
188
+ {"time":"2025-12-25T01:01:09.688073676Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
189
+ {"time":"2025-12-25T01:09:20.318697053Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
190
+ {"time":"2025-12-25T01:09:20.319079623Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
191
+ {"time":"2025-12-25T01:17:32.301193277Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
192
+ {"time":"2025-12-25T01:17:32.301449848Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
193
+ {"time":"2025-12-25T01:25:44.359347604Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
194
+ {"time":"2025-12-25T01:25:44.359728332Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
195
+ {"time":"2025-12-25T01:33:57.086186112Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
196
+ {"time":"2025-12-25T01:33:57.086520137Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
197
+ {"time":"2025-12-25T01:42:08.920919181Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
198
+ {"time":"2025-12-25T01:42:08.921261155Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
199
+ {"time":"2025-12-25T01:50:20.750970195Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
200
+ {"time":"2025-12-25T01:50:20.751300379Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
201
+ {"time":"2025-12-25T01:58:32.458173785Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
202
+ {"time":"2025-12-25T01:58:32.458529561Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
203
+ {"time":"2025-12-25T02:06:43.381564296Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
204
+ {"time":"2025-12-25T02:06:43.381928657Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
205
+ {"time":"2025-12-25T02:14:54.992304689Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
206
+ {"time":"2025-12-25T02:14:54.992635156Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
207
+ {"time":"2025-12-25T02:23:05.286275247Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
208
+ {"time":"2025-12-25T02:23:05.28660512Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
209
+ {"time":"2025-12-25T02:31:17.016762344Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
210
+ {"time":"2025-12-25T02:31:17.017127063Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
211
+ {"time":"2025-12-25T02:39:29.007341384Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
212
+ {"time":"2025-12-25T02:39:29.007700568Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
213
+ {"time":"2025-12-25T02:47:40.47371979Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
214
+ {"time":"2025-12-25T02:47:40.474061876Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
215
+ {"time":"2025-12-25T02:55:51.070757563Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
216
+ {"time":"2025-12-25T02:55:51.070933152Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
217
+ {"time":"2025-12-25T03:04:02.308727067Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
218
+ {"time":"2025-12-25T03:04:02.309098675Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
219
+ {"time":"2025-12-25T03:12:15.598454409Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
220
+ {"time":"2025-12-25T03:12:15.598766388Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
221
+ {"time":"2025-12-25T03:20:27.576979046Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
222
+ {"time":"2025-12-25T03:20:27.577315736Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
223
+ {"time":"2025-12-25T03:28:40.335273131Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
224
+ {"time":"2025-12-25T03:28:40.335469564Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
225
+ {"time":"2025-12-25T03:36:52.589010731Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
226
+ {"time":"2025-12-25T03:36:52.589329744Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
227
+ {"time":"2025-12-25T03:45:05.27775084Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
228
+ {"time":"2025-12-25T03:45:05.278112785Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
229
+ {"time":"2025-12-25T03:53:17.114516703Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
230
+ {"time":"2025-12-25T03:53:17.114671442Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
231
+ {"time":"2025-12-25T04:01:29.804012319Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
232
+ {"time":"2025-12-25T04:01:29.804331445Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
233
+ {"time":"2025-12-25T04:09:43.395724296Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
234
+ {"time":"2025-12-25T04:09:43.396068292Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
235
+ {"time":"2025-12-25T04:17:55.706513202Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
236
+ {"time":"2025-12-25T04:17:55.706830938Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
237
+ {"time":"2025-12-25T04:26:07.589828131Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
238
+ {"time":"2025-12-25T04:26:07.590209964Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
239
+ {"time":"2025-12-25T04:34:18.693182664Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
240
+ {"time":"2025-12-25T04:34:18.693523762Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
241
+ {"time":"2025-12-25T04:42:28.816766346Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
242
+ {"time":"2025-12-25T04:42:28.817136062Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
243
+ {"time":"2025-12-25T04:50:39.929873641Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
244
+ {"time":"2025-12-25T04:50:39.930245151Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
245
+ {"time":"2025-12-25T04:58:52.756800842Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
246
+ {"time":"2025-12-25T04:58:52.757138458Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
247
+ {"time":"2025-12-25T05:07:05.0386095Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
248
+ {"time":"2025-12-25T05:07:05.038933569Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
249
+ {"time":"2025-12-25T05:15:18.105198463Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
250
+ {"time":"2025-12-25T05:15:18.105515364Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
251
+ {"time":"2025-12-25T05:23:30.021612042Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
252
+ {"time":"2025-12-25T05:23:30.021974929Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
253
+ {"time":"2025-12-25T05:31:41.352144216Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
254
+ {"time":"2025-12-25T05:31:41.352480289Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
255
+ {"time":"2025-12-25T05:39:52.438308201Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
256
+ {"time":"2025-12-25T05:39:52.43863069Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
257
+ {"time":"2025-12-25T05:48:02.463179774Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
258
+ {"time":"2025-12-25T05:48:02.46350624Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
259
+ {"time":"2025-12-25T05:56:13.686377322Z","level":"ERROR","msg":"HTTP error","status":400,"method":"POST","url":"https://api.wandb.ai/graphql"}
260
+ {"time":"2025-12-25T05:56:13.686580128Z","level":"ERROR","msg":"sender: failed to log artifact: ArtifactSaver.createArtifact: returned error 400: {\"data\":{\"createArtifact\":null},\"errors\":[{\"message\":\"Invalid Client ID digest\",\"path\":[\"createArtifact\"]}]}","artifactID":""}
261
+ {"time":"2025-12-25T05:56:14.243412654Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
262
+ {"time":"2025-12-25T05:56:14.458949469Z","level":"INFO","msg":"handler: operation stats","stats":{}}
263
+ {"time":"2025-12-25T05:56:14.464678677Z","level":"INFO","msg":"stream: closing","id":"jnwx1i3g"}
264
+ {"time":"2025-12-25T05:56:14.464692292Z","level":"INFO","msg":"handler: closed","stream_id":"jnwx1i3g"}
265
+ {"time":"2025-12-25T05:56:14.46481017Z","level":"INFO","msg":"sender: closed","stream_id":"jnwx1i3g"}
266
+ {"time":"2025-12-25T05:56:14.464828776Z","level":"INFO","msg":"stream: closed","id":"jnwx1i3g"}
wandb/wandb/run-20251224_123436-jnwx1i3g/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Current SDK version is 0.23.1
2
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Configure stats pid to 21342
3
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Loading settings from /root/.config/wandb/settings
4
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Loading settings from /workspace/halcyon-recipe2/wandb/settings
5
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:setup_run_log_directory():714] Logging user logs to /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/wandb/wandb/run-20251224_123436-jnwx1i3g/logs/debug.log
7
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:setup_run_log_directory():715] Logging internal logs to /workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/wandb/wandb/run-20251224_123436-jnwx1i3g/logs/debug-internal.log
8
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:init():841] calling init triggers
9
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:init():846] wandb.init called with sweep_config: {}
10
+ config: {'num_layers': 28, 'encoder_num_layers': 28, 'decoder_num_layers': None, 'hidden_size': 1024, 'ffn_hidden_size': 3072, 'num_attention_heads': 16, 'attention_backend': <AttnBackend.flash: 1>, 'kv_channels': 128, 'group_query_attention': True, 'num_query_groups': 8, 'max_position_embeddings': 32768, 'position_embedding_type': 'rope', 'relative_attention_num_buckets': 32, 'relative_attention_max_distance': 128, 'use_rotary_position_embeddings': False, 'rotary_base': 1000000, 'rotary_percent': 1.0, 'rotary_interleaved': False, 'rotary_seq_len_interpolation_factor': None, 'use_rope_scaling': False, 'rope_scaling_factor': 8.0, 'no_rope_freq': None, 'add_position_embedding': True, 'mrope_section': None, 'make_vocab_size_divisible_by': 128, 'normalization': 'RMSNorm', 'norm_epsilon': 1e-06, 'apply_layernorm_1p': False, 'apply_residual_connection_post_layernorm': False, 'openai_gelu': False, 'squared_relu': False, 'swiglu': True, 'onnx_safe': None, 'bert_binary_head': True, 'untie_embeddings_and_output_weights': False, 'multi_latent_attention': False, 'mtp_num_layers': None, 'mtp_loss_scaling_factor': 0.1, 'attention_dropout': 0.0, 'hidden_dropout': 0.0, 'weight_decay': 0.1, 'start_weight_decay': 0.1, 'end_weight_decay': 0.1, 'weight_decay_incr_style': 'constant', 'clip_grad': 1.0, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-08, 'sgd_momentum': 0.9, 'micro_batch_size': 4, 'global_batch_size': 256, 'rampup_batch_size': None, 'decrease_batch_size_if_needed': False, 'recompute_granularity': 'full', 'check_for_nan_in_loss_and_grad': True, 'check_for_spiky_loss': False, 'check_for_large_grads': False, 'distribute_saved_activations': False, 'recompute_method': 'uniform', 'recompute_num_layers': 1, 'recompute_modules': ['core_attn'], 'clone_scatter_output_in_embedding': True, 'profile': False, 'profile_step_start': 10, 'profile_step_end': 12, 'iterations_to_skip': [], 'result_rejected_tracker_filename': None, 'enable_gloo_process_groups': True, 'use_pytorch_profiler': False, 'profile_ranks': [0], 'record_memory_history': False, 'memory_snapshot_path': 'snapshot.pickle', 'tp_comm_overlap': False, 'tp_comm_overlap_cfg': None, 'tp_comm_overlap_ag': True, 'tp_comm_overlap_rs': True, 'tp_comm_overlap_rs_dgrad': False, 'tp_comm_bulk_dgrad': True, 'tp_comm_bulk_wgrad': True, 'tp_comm_bootstrap_backend': 'nccl', 'use_cpu_initialization': None, 'empty_unused_memory_level': 0, 'deterministic_mode': False, 'check_weight_hash_across_dp_replicas_interval': None, 'calculate_per_token_loss': True, 'train_sync_interval': None, 'train_iters': 12700, 'train_samples': None, 'log_interval': 1, 'exit_interval': None, 'exit_duration_in_mins': None, 'exit_signal_handler': False, 'tensorboard_dir': '/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139/runs', 'masked_softmax_fusion': True, 'bias_gelu_fusion': False, 'bias_swiglu_fusion': True, 'use_fused_weighted_squared_relu': False, 'bias_dropout_fusion': True, 'apply_rope_fusion': True, 'rope_type': None, 'cross_entropy_loss_fusion': True, 'cross_entropy_fusion_impl': 'native', 'use_flash_attn': False, 'add_bias_linear': False, 'add_qkv_bias': False, 'optimizer': 'adam', 'optimizer_cpu_offload': False, 'optimizer_offload_fraction': 1.0, 'use_torch_optimizer_for_cpu_offload': False, 'overlap_cpu_optimizer_d2h_h2d': False, 'pin_cpu_grads': True, 'pin_cpu_params': True, 'dataloader_type': 'cyclic', 'async_tensor_model_parallel_allreduce': True, 'no_persist_layer_norm': False, 'sequence_parallel': False, 'gradient_accumulation_fusion': True, 'deprecated_use_mcore_models': False, 'use_legacy_models': False, 'manual_gc': False, 'manual_gc_interval': 0, 'manual_gc_eval': True, 'tp_comm_split_ag': True, 'tp_comm_split_rs': True, 'pipeline_model_parallel_comm_backend': None, 'high_priority_stream_groups': [], 'seed': 42, 'data_parallel_random_init': False, 'init_method_std': 0.02, 'embedding_init_method_std': None, 'init_method_xavier_uniform': False, 'lr': 0.0001, 'lr_decay_style': 'cosine', 'lr_wsd_decay_style': 'exponential', 'lr_decay_iters': None, 'lr_decay_samples': None, 'lr_wsd_decay_samples': None, 'lr_wsd_decay_iters': None, 'lr_warmup_fraction': 0.05, 'lr_warmup_iters': 0, 'lr_warmup_samples': 0, 'lr_warmup_init': 0.0, 'min_lr': 3e-06, 'override_opt_param_scheduler': False, 'use_checkpoint_opt_param_scheduler': False, 'decoupled_lr': None, 'decoupled_min_lr': None, 'save': '/workspace/halcyon-recipe2/megatron_output/qwen3-tlt/v1-20251224-123139', 'save_interval': 100, 'save_retain_interval': None, 'no_save_optim': None, 'no_save_rng': None, 'load': '/workspace/halcyon-recipe2/patch', 'no_load_optim': None, 'load_main_params_from_ckpt': None, 'no_load_rng': None, 'strict_fsdp_dtensor_load': True, 'non_persistent_save_interval': None, 'non_persistent_ckpt_type': None, 'non_persistent_global_ckpt_dir': None, 'non_persistent_local_ckpt_dir': None, 'non_persistent_local_ckpt_algo': 'fully_parallel', 'finetune': True, 'pretrained_checkpoint': None, 'ckpt_step': None, 'perform_initialization': False, 'use_checkpoint_args': False, 'use_mp_args_from_checkpoint_args': False, 'use_tokenizer_model_from_checkpoint_args': True, 'exit_on_missing_checkpoint': True, 'use_dist_ckpt_deprecated': False, 'use_persistent_ckpt_worker': False, 'auto_detect_ckpt_format': True, 'dist_ckpt_format_deprecated': None, 'ckpt_format': 'torch_dist', 'ckpt_convert_format': None, 'ckpt_convert_save': None, 'ckpt_convert_update_legacy_dist_opt_format': False, 'ckpt_fully_parallel_save_deprecated': False, 'ckpt_fully_parallel_save': True, 'async_save': None, 'ckpt_fully_parallel_load': False, 'ckpt_assume_constant_structure': False, 'dist_ckpt_strictness': 'assume_ok_unexpected', 'load_model_opt_format': False, 'fp16': False, 'bf16': True, 'grad_reduce_in_bf16': False, 'loss_scale': None, 'initial_loss_scale': 4294967296, 'min_loss_scale': 1.0, 'loss_scale_window': 1000, 'hysteresis': 2, 'fp32_residual_connection': False, 'apply_query_key_layer_scaling': False, 'attention_softmax_in_fp32': True, 'accumulate_allreduce_grads_in_fp32': True, 'fp16_lm_cross_entropy': False, 'disable_bf16_reduced_precision_matmul': False, 'reuse_grad_buf_for_mxfp8_param_ag': False, 'tensor_model_parallel_size': 1, 'pipeline_model_parallel_size': 1, 'decoder_first_pipeline_num_layers': None, 'decoder_last_pipeline_num_layers': None, 'pipeline_model_parallel_layout': None, 'num_layers_per_virtual_pipeline_stage': None, 'num_virtual_stages_per_pipeline_rank': None, 'microbatch_group_size_per_vp_stage': None, 'overlap_p2p_comm': False, 'overlap_p2p_comm_warmup_flush': False, 'distributed_backend': 'nccl', 'distributed_timeout_minutes': 300000, 'overlap_grad_reduce': True, 'defer_embedding_wgrad_compute': False, 'wgrad_deferral_limit': 0, 'align_grad_reduce': True, 'ddp_num_buckets': None, 'ddp_bucket_size': None, 'ddp_pad_buckets_for_high_nccl_busbw': False, 'ddp_average_in_collective': False, 'overlap_param_gather': True, 'overlap_param_gather_with_optimizer_step': False, 'align_param_gather': False, 'scatter_gather_tensors_in_pipeline': True, 'use_ring_exchange_p2p': False, 'local_rank': 7, 'lazy_mpu_init': None, 'account_for_embedding_in_pipeline_split': False, 'account_for_loss_in_pipeline_split': False, 'use_distributed_optimizer': True, 'nccl_ub': False, 'use_sharp': False, 'sharp_enabled_group': None, 'use_megatron_fsdp': False, 'init_model_with_meta_device': False, 'data_parallel_sharding_strategy': 'no_shard', 'gradient_reduce_div_fusion': True, 'fsdp_double_buffer': False, 'suggested_communication_unit_size': None, 'keep_fp8_transpose_cache': False, 'enable_full_sharding_in_hsdp': False, 'num_distributed_optimizer_instances': 1, 'use_torch_fsdp2': False, 'torch_fsdp2_reshard_after_forward': True, 'context_parallel_size': 1, 'cp_comm_type': ['p2p'], 'hierarchical_context_parallel_sizes': None, 'nccl_communicator_config_path': None, 'use_tp_pp_dp_mapping': False, 'replication': False, 'replication_jump': None, 'replication_factor': 2, 'full_validation': False, 'multiple_validation_sets': False, 'eval_iters': -1, 'eval_interval': 100, 'test_mode': False, 'skip_train': False, 'data_path': None, 'split': None, 'train_data_path': None, 'valid_data_path': None, 'test_data_path': None, 'data_args_path': None, 'per_split_data_args_path': None, 'data_cache_path': None, 'mmap_bin_files': True, 'mock_data': False, 'seq_length': 4096, 'encoder_seq_length': 4096, 'decoder_seq_length': None, 'retriever_seq_length': 256, 'sample_rate': 1.0, 'mask_prob': 0.15, 'short_seq_prob': 0.1, 'num_workers': 32, 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'create_attention_mask_in_dataloader': True, 'num_dataset_builder_threads': 1, 'object_storage_cache_path': None, 'mid_level_dataset_surplus': 0.005, 'vocab_size': None, 'padded_vocab_size': 151936, 'vocab_file': None, 'merge_file': None, 'vocab_extra_ids': 0, 'tokenizer_type': None, 'tokenizer_model': None, 'tiktoken_pattern': None, 'tiktoken_num_special_tokens': 1000, 'tiktoken_special_tokens': None, 'adlr_autoresume': False, 'adlr_autoresume_interval': 1000, 'ict_head_size': None, 'biencoder_projection_dim': 0, 'biencoder_shared_query_context_model': False, 'ict_load': None, 'bert_load': None, 'titles_data_path': None, 'query_in_block_prob': 0.1, 'use_one_sent_docs': False, 'evidence_data_path': None, 'retriever_report_topk_accuracies': [], 'retriever_score_scaling': False, 'block_data_path': None, 'embedding_path': None, 'indexer_batch_size': 128, 'indexer_log_interval': 1000, 'num_classes': 1000, 'img_h': 224, 'img_w': 224, 'num_channels': 3, 'patch_dim': 16, 'classes_fraction': 1.0, 'data_per_class_fraction': 1.0, 'data_sharding': True, 'head_lr_mult': 1.0, 'vision_pretraining': False, 'vision_pretraining_type': 'classify', 'vision_backbone_type': 'vit', 'swin_backbone_type': 'tiny', 'mask_type': 'random', 'mask_factor': 1.0, 'iter_per_epoch': 1250, 'dino_local_img_size': 96, 'dino_local_crops_number': 10, 'dino_head_hidden_size': 2048, 'dino_bottleneck_size': 256, 'dino_freeze_last_layer': 1, 'dino_norm_last_layer': False, 'dino_warmup_teacher_temp': 0.04, 'dino_teacher_temp': 0.07, 'dino_warmup_teacher_temp_epochs': 30, 'qk_layernorm': True, 'qk_l2_norm': False, 'expert_model_parallel_size': 1, 'expert_tensor_parallel_size': 1, 'num_experts': None, 'moe_layer_freq': 1, 'moe_ffn_hidden_size': None, 'moe_shared_expert_intermediate_size': None, 'moe_shared_expert_overlap': False, 'moe_grouped_gemm': True, 'moe_use_legacy_grouped_gemm': False, 'moe_layer_recompute': False, 'moe_extended_tp': False, 'moe_use_upcycling': False, 'moe_router_load_balancing_type': 'aux_loss', 'moe_router_dtype': 'fp32', 'moe_router_fusion': False, 'moe_router_score_function': 'softmax', 'moe_router_topk': 2, 'moe_router_pre_softmax': False, 'moe_router_num_groups': None, 'moe_router_group_topk': None, 'moe_router_topk_scaling_factor': None, 'moe_router_enable_expert_bias': False, 'moe_router_bias_update_rate': 0.001, 'moe_router_force_load_balancing': False, 'moe_router_padding_for_fp8': False, 'moe_aux_loss_coeff': 0.0, 'moe_z_loss_coeff': None, 'moe_input_jitter_eps': None, 'moe_per_layer_logging': False, 'moe_token_dispatcher_type': 'alltoall', 'moe_enable_deepep': False, 'moe_deepep_num_sms': 20, 'moe_permute_fusion': False, 'moe_expert_capacity_factor': None, 'moe_pad_expert_input_to_capacity': False, 'moe_token_drop_policy': 'probs', 'moe_apply_probs_on_input': False, 'overlap_moe_expert_parallel_comm': False, 'delay_wgrad_compute': False, 'moe_upcycling_granularity': 1, 'q_lora_rank': None, 'kv_lora_rank': 32, 'qk_head_dim': 128, 'qk_pos_emb_head_dim': 64, 'v_head_dim': 128, 'rotary_scaling_factor': 1.0, 'mscale': 1.0, 'mscale_all_dim': 0.0, 'cache_mla_latents': False, 'heterogeneous_layers_config_path': None, 'heterogeneous_layers_config_encoded_json': None, 'log_params_norm': False, 'log_num_zeros_in_grad': False, 'log_throughput': False, 'log_progress': False, 'timing_log_level': 0, 'log_energy': False, 'barrier_with_L1_time': True, 'timing_log_option': 'minmax', 'tensorboard_log_interval': 1, 'tensorboard_queue_size': 50, 'log_timers_to_tensorboard': True, 'log_loss_scale_to_tensorboard': True, 'log_validation_ppl_to_tensorboard': True, 'log_memory_to_tensorboard': True, 'log_world_size_to_tensorboard': False, 'wandb_project': 'plt', 'wandb_exp_name': 'tlt', 'wandb_save_dir': '', 'logging_level': 20, 'log_straggler': False, 'disable_straggler_on_startup': False, 'straggler_ctrlr_port': 65535, 'straggler_minmax_count': 1, 'run_workload_inspector_server': False, 'inference_batch_times_seqlen_threshold': -1, 'max_tokens_to_oom': 12000, 'output_bert_embeddings': False, 'bert_embedder_type': 'megatron', 'flash_decode': False, 'enable_cuda_graph': False, 'cuda_graph_warmup_steps': 3, 'external_cuda_graph': False, 'cuda_graph_scope': 'full', 'inference_max_batch_size': 8, 'inference_max_seq_length': 2560, 'inference_dynamic_batching': False, 'inference_dynamic_batching_buffer_size_gb': 40.0, 'inference_dynamic_batching_chunk_size': 256, 'inference_dynamic_batching_buffer_guaranteed_fraction': 0.2, 'inference_dynamic_batching_buffer_overflow_factor': None, 'inference_dynamic_batching_max_requests_override': None, 'inference_dynamic_batching_max_tokens_override': None, 'inference_dynamic_batching_num_cuda_graphs': 16, 'symmetric_ar_type': None, 'nccl_all_reduce_for_prefill': False, 'mlp_chunks_for_prefill': 1, 'fp8': None, 'fp8_recipe': 'delayed', 'fp8_margin': 0, 'fp8_interval': 1, 'fp8_amax_history_len': 1024, 'fp8_amax_compute_algo': 'max', 'fp8_wgrad': True, 'transformer_impl': 'transformer_engine', 'fp8_param_gather': False, 'first_last_layers_bf16': False, 'num_layers_at_start_in_bf16': 1, 'num_layers_at_end_in_bf16': 1, 'te_rng_tracker': False, 'inference_rng_tracker': False, 'retro_project_dir': None, 'retro_add_retriever': False, 'retro_cyclic_train_iters': None, 'retro_encoder_layers': 2, 'retro_encoder_hidden_dropout': 0.1, 'retro_encoder_attention_dropout': 0.1, 'retro_num_neighbors': 2, 'retro_num_retrieved_chunks': 2, 'retro_attention_gate': 1, 'retro_verify_neighbor_count': True, 'enable_experimental': False, 'spec': None, 'hybrid_attention_ratio': 0.0, 'hybrid_mlp_ratio': 0.0, 'hybrid_override_pattern': None, 'mamba_state_dim': 128, 'mamba_head_dim': 64, 'mamba_num_groups': 8, 'mamba_num_heads': None, 'is_hybrid_model': False, 'disable_mamba_mem_eff_path': False, 'yaml_cfg': None, 'use_precision_aware_optimizer': True, 'main_grads_dtype': torch.float32, 'main_params_dtype': torch.float32, 'exp_avg_dtype': torch.float32, 'exp_avg_sq_dtype': torch.float32, 'enable_one_logger': True, 'one_logger_project': 'megatron-lm', 'one_logger_run_name': None, 'one_logger_async': False, 'app_tag_run_name': None, 'app_tag_run_version': '0.0.0', 'inprocess_restart': False, 'inprocess_max_iterations': None, 'inprocess_monitor_thread_interval': 1.0, 'inprocess_monitor_process_interval': 1.0, 'inprocess_progress_watchdog_interval': 1.0, 'inprocess_heartbeat_interval': 30, 'inprocess_soft_timeout': 60, 'inprocess_hard_timeout': 90, 'inprocess_heartbeat_timeout': 60, 'inprocess_barrier_timeout': 120, 'inprocess_completion_timeout': 120, 'inprocess_last_call_wait': 1, 'inprocess_termination_grace_time': 1, 'inprocess_granularity': 'node', 'inprocess_active_world_size': 8, 'inprocess_empty_cuda_cache': False, 'enable_ft_package': False, 'calc_ft_timeouts': False, 'config_logger_dir': '', 'error_injection_rate': 0, 'error_injection_type': 'transient_error', 'rerun_mode': 'validate_results', 'enable_msc': True, 'kitchen_config_file': None, 'kitchen_recipe_number': None, 'sft': False, 'sft_tokenizer_prompt_format': 'nemotron-h-aligned', 'rank': 7, 'world_size': 8, 'use_dist_ckpt': True, 'transformer_pipeline_model_parallel_size': 1, 'data_parallel_size': 8, 'model_dir': '/workspace/.hf_home/hub/models--Qwen--Qwen3-0.6B-Base/snapshots/da87bfb608c14b7cf20ba1ce41287e8de496c0cd', 'is_multimodal': False, 'hf_model_type': 'qwen3', 'use_ray': False, 'ray_exp_name': None, 'device_groups': None, 'model': 'Qwen/Qwen3-0.6B-Base', 'model_type': 'qwen3', 'model_revision': None, 'task_type': 'causal_lm', 'torch_dtype': torch.bfloat16, 'attn_impl': None, 'new_special_tokens': [], 'num_labels': None, 'problem_type': None, 'rope_scaling': None, 'device_map': None, 'max_memory': {}, 'max_model_len': None, 'local_repo_path': None, 'init_strategy': None, 'template': 'qwen3', 'system': None, 'max_length': 4096, 'truncation_strategy': 'right', 'max_pixels': None, 'agent_template': None, 'norm_bbox': None, 'use_chat_template': False, 'padding_free': True, 'padding_side': 'right', 'sequence_parallel_size': 1, 'response_prefix': None, 'template_backend': 'swift', 'dataset': [], 'val_dataset': [], 'cached_dataset': ['/workspace/1of3'], 'cached_val_dataset': [], 'split_dataset_ratio': 0.0, 'data_seed': 42, 'dataset_num_proc': 32, 'load_from_cache_file': False, 'dataset_shuffle': True, 'val_dataset_shuffle': False, 'streaming': False, 'interleave_prob': None, 'stopping_strategy': 'first_exhausted', 'shuffle_buffer_size': 1000, 'download_mode': 'reuse_dataset_if_exists', 'columns': {}, 'strict': False, 'remove_unused_columns': True, 'model_name': None, 'model_author': None, 'custom_dataset_info': [], 'quant_method': None, 'quant_bits': None, 'hqq_axis': None, 'bnb_4bit_compute_dtype': torch.bfloat16, 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'bnb_4bit_quant_storage': None, 'max_new_tokens': None, 'temperature': None, 'top_k': 50, 'top_p': 0.9, 'repetition_penalty': 1.0, 'num_beams': 1, 'stream': False, 'stop_words': [], 'logprobs': False, 'top_logprobs': None, 'ckpt_dir': '/workspace/halcyon-recipe2/patch', 'lora_modules': [], 'tuner_backend': 'peft', 'train_type': 'full', 'adapters': [], 'external_plugins': [], 'model_kwargs': {}, 'load_args': False, 'load_data_args': False, 'packing': True, 'packing_length': 4096, 'packing_num_proc': 1, 'lazy_tokenize': False, 'custom_register_path': [], 'use_hf': True, 'hub_token': None, 'ddp_timeout': 18000000, 'ddp_backend': None, 'ignore_args_error': False, 'use_swift_lora': False, 'freeze_llm': False, 'freeze_vit': True, 'freeze_aligner': True, 'freeze_parameters': [], 'freeze_parameters_regex': None, 'freeze_parameters_ratio': 0.0, 'trainable_parameters': [], 'trainable_parameters_regex': None, 'adapter_load': None, 'target_modules': ['all-linear'], 'target_regex': None, 'modules_to_save': [], 'lora_rank': 8, 'lora_alpha': 32, 'lora_dropout': 0.05, 'lora_bias': 'none', 'lora_dtype': None, 'use_rslora': False, 'rlhf_type': None, 'ref_load': None, 'ref_adapter_load': None, 'beta': 0.1, 'rpo_alpha': None, 'reference_free': False, 'label_smoothing': 0.0, 'f_divergence_type': 'reverse_kl', 'loss_type': None, 'desirable_weight': 1.0, 'undesirable_weight': 1.0, 'calculate_KL': None, 'center_rewards_coefficient': None, 'generation_batch_size': None, 'steps_per_generation': None, 'num_generations': 8, 'max_completion_length': 512, 'importance_sampling_level': 'token', 'tau_pos': 1.0, 'tau_neg': 1.05, 'epsilon': 0.2, 'epsilon_high': None, 'delta': None, 'use_vllm': True, 'vllm_mode': None, 'vllm_enable_prefix_caching': True, 'vllm_gpu_memory_utilization': 0.9, 'vllm_tensor_parallel_size': 1, 'vllm_max_model_len': None, 'vllm_enforce_eager': False, 'vllm_limit_mm_per_prompt': None, 'vllm_disable_cascade_attn': False, 'vllm_max_num_seqs': None, 'vllm_mm_processor_cache_gb': None, 'vllm_engine_kwargs': None, 'sleep_level': 0, 'offload_optimizer': False, 'offload_model': False, 'offload_bridge': False, 'vllm_server_base_url': None, 'vllm_server_host': None, 'vllm_server_port': [8000], 'vllm_server_timeout': 240.0, 'vllm_server_group_port': None, 'reward_funcs': [], 'reward_weights': None, 'cosine_min_len_value_wrong': -0.5, 'cosine_max_len_value_wrong': 0.0, 'cosine_min_len_value_correct': 1.0, 'cosine_max_len_value_correct': 0.5, 'cosine_max_len': None, 'repetition_n_grams': 3, 'repetition_max_penalty': -1.0, 'soft_max_length': None, 'soft_cache_length': None, 'dynamic_sample': False, 'max_resample_times': 3, 'overlong_filter': False, 'scale_rewards': 'group', 'advantage_estimator': 'grpo', 'kl_in_reward': False, 'wandb_log_unique_prompts': None, 'log_completions': False, 'rollout_importance_sampling_mode': None, 'rollout_importance_sampling_threshold': 2.0, 'log_rollout_offpolicy_metrics': False, 'off_policy_sequence_mask_delta': None, 'reward_model': None, 'reward_model_plugin': None, 'sync_ref_model': False, 'ref_model_sync_steps': 512, 'ref_model_mixup_alpha': 0.6, 'async_generate': False, 'move_model_batches': None, 'multi_turn_scheduler': None, 'max_turns': None, 'completion_length_limit_scope': 'per_round', 'vllm_server_pass_dataset': False, 'log_entropy': False, 'top_entropy_quantile': 1.0, 'num_iterations': 1, 'check_model': True, 'initialize_embedding': False, 'mlp_padding_free': False, 'load_safetensors': False, 'save_safetensors': False, 'ref_model': None, 'ref_adapters': [], 'merge_lora': False, 'max_shard_size': '5GB', 'train_dataloader_shuffle': True, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': True, 'dataloader_prefetch_factor': 10, 'architectures': 'Qwen3ForCausalLM', 'llm_architectures': 'Qwen3ForCausalLM', 'max_epochs': None, 'enable_dft_loss': False, 'enable_channel_loss': False, 'patch_size': 1, 'save_strategy': 'steps', 'original_max_position_embeddings': None, 'partial_rotary_factor': None, 'use_shared_expert_gate': False, 'vit_gradient_checkpointing': True, 'vit_lr': None, 'aligner_lr': None, 'gradient_checkpointing_kwargs': None, 'linear_num_value_heads': None, 'linear_num_key_heads': None, 'linear_key_head_dim': None, 'linear_value_head_dim': None, 'linear_conv_kernel_dim': None, 'layer_types': None, 'mrope_interleaved': False, 'add_version': True, 'virtual_pipeline_model_parallel_size': None, 'params_dtype': torch.bfloat16, 'consumed_train_samples': 0, 'skipped_train_samples': 0, 'consumed_valid_samples': 0, 'variable_seq_lengths': False, '_wandb': {}}
11
+ 2025-12-24 12:34:36,004 INFO MainThread:21342 [wandb_init.py:init():889] starting backend
12
+ 2025-12-24 12:34:36,246 INFO MainThread:21342 [wandb_init.py:init():892] sending inform_init request
13
+ 2025-12-24 12:34:36,249 INFO MainThread:21342 [wandb_init.py:init():900] backend started and connected
14
+ 2025-12-24 12:34:36,252 INFO MainThread:21342 [wandb_init.py:init():970] updated telemetry
15
+ 2025-12-24 12:34:36,258 INFO MainThread:21342 [wandb_init.py:init():994] communicating run to backend with 90.0 second timeout
16
+ 2025-12-24 12:34:36,960 INFO MainThread:21342 [wandb_init.py:init():1041] starting run threads in backend
17
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_console_start():2521] atexit reg
18
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_redirect():2369] redirect: wrap_raw
19
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_redirect():2438] Wrapping output streams.
20
+ 2025-12-24 12:34:37,052 INFO MainThread:21342 [wandb_run.py:_redirect():2461] Redirects installed.
21
+ 2025-12-24 12:34:37,056 INFO MainThread:21342 [wandb_init.py:init():1081] run started, returning control to user process
22
+ 2025-12-25 05:56:13,622 INFO MainThread:21342 [wandb_run.py:_finish():2287] finishing run tepic/plt/jnwx1i3g
23
+ 2025-12-25 05:56:13,623 INFO MainThread:21342 [wandb_run.py:_atexit_cleanup():2486] got exitcode: 0
24
+ 2025-12-25 05:56:13,624 INFO MainThread:21342 [wandb_run.py:_restore():2468] restore
25
+ 2025-12-25 05:56:13,625 INFO MainThread:21342 [wandb_run.py:_restore():2474] restore done
26
+ 2025-12-25 05:56:14,463 INFO MainThread:21342 [wandb_run.py:_footer_sync_info():3862] logging synced files
wandb/wandb/run-20251224_123436-jnwx1i3g/run-jnwx1i3g.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab572e9ddfd6c793895846bb73a98e4c2c0ae94b591e66ce3625c09dbfbac1c
3
+ size 28734949