stdout

41205d7 10 months ago

71.9 kB

	[06-18 08:14:01] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:14:01] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:14:01] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True







	======================================================= RESTART [06-18 08:16:44] =======================================================
	[06-18 08:16:44] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:16:44] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:16:44] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True







	======================================================= RESTART [06-18 08:18:55] =======================================================
	[06-18 08:18:55] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:18:55] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:18:55] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True







	======================================================= RESTART [06-18 08:22:08] =======================================================
	[06-18 08:22:08] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:22:08] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:22:08] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 08:22:10] (/projects/VARSR/train.py, line 85)=> global bs=8, local bs=4
	[06-18 08:22:10] (/projects/VARSR/train.py, line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 08:22:10] (/projects/VARSR/train.py, line 97)=> [build dahazing paired dataset] ...








	======================================================= RESTART [06-18 08:37:53] =======================================================
	[06-18 08:37:53] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:37:53] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:37:53] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 08:37:55] (/projects/VARSR/train.py, line 85)=> global bs=8, local bs=4
	[06-18 08:37:55] (/projects/VARSR/train.py, line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 08:37:55] (/projects/VARSR/train.py, line 97)=> [build dahazing paired dataset] ...








	======================================================= RESTART [06-18 08:39:37] =======================================================







	======================================================= RESTART [06-18 08:44:29] =======================================================







	======================================================= RESTART [06-18 08:49:35] =======================================================
	[06-18 08:44:29] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:44:29] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:44:29] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 08:44:31] (train.py , line 85)=> global bs=4, local bs=4
	[06-18 08:44:31] (train.py , line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 4
	batch_size : 4
	glb_batch_size : 4
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b4ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 08:44:32] (train.py , line 97)=> [build dahazing paired dataset] ...

	[06-18 08:49:35] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:49:35] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:49:35] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 08:49:37] (train.py , line 85)=> global bs=4, local bs=4
	[06-18 08:49:37] (train.py , line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 4
	batch_size : 4
	glb_batch_size : 4
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b4ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 08:49:38] (train.py , line 97)=> [build dahazing paired dataset] ...

	[06-18 08:49:35] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 08:49:35] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 08:49:35] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 08:49:37] (train.py , line 85)=> global bs=4, local bs=4
	[06-18 08:49:37] (train.py , line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 4
	batch_size : 4
	glb_batch_size : 4
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b4ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 08:49:38] (train.py , line 97)=> [build dahazing paired dataset] ...








	======================================================= RESTART [06-18 13:47:22] =======================================================
	[06-18 13:47:22] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 13:47:22] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 13:47:22] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 13:47:23] (/projects/VARSR/train.py, line 85)=> global bs=8, local bs=4
	[06-18 13:47:23] (/projects/VARSR/train.py, line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 13:47:24] (/projects/VARSR/train.py, line 97)=> [build dahazing paired dataset] ...








	======================================================= RESTART [06-18 13:50:35] =======================================================
	[06-18 13:50:35] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 13:50:35] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 13:50:35] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 13:50:36] (/projects/VARSR/train.py, line 85)=> global bs=8, local bs=4
	[06-18 13:50:36] (/projects/VARSR/train.py, line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 13:50:37] (/projects/VARSR/train.py, line 97)=> [build dahazing paired dataset] ...








	======================================================= RESTART [06-18 18:41:00] =======================================================
	[06-18 18:41:00] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 18:41:00] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 18:41:00] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 18:41:01] (/projects/VARSR/train.py, line 85)=> global bs=8, local bs=4
	[06-18 18:41:01] (/projects/VARSR/train.py, line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 18:41:02] (/projects/VARSR/train.py, line 97)=> [build dahazing paired dataset] ...

	[06-18 18:41:18] (/VARSR/zrk_utils/data.py, line 22)=> [Dataset] len(dataset)=18130
	[06-18 18:41:18] (/VARSR/zrk_utils/data.py, line 22)=> [Dataset] len(dataset)=1135
	[06-18 18:41:18] (/projects/VARSR/train.py, line 146)=> [auto_resume] no ckpt found @ results/VARDahazing0616/ar-ckpt*.pth
	[06-18 18:41:18] (/projects/VARSR/train.py, line 146)=> [auto_resume quit]
	[06-18 18:41:18] (/projects/VARSR/train.py, line 147)=> [dataloader multi processing] ... [dataloader multi processing](*) finished! (0.00s)
	[06-18 18:41:18] (/projects/VARSR/train.py, line 153)=> [dataloader] gbs=8, lbs=4, iters_train=2267, types(tr, va)=('PairedImageDataset_Dehazing', 'PairedImageDataset_Dehazing')
	[06-18 18:41:18] (R/models/var4dehazing.py, line 183)=> [VAR config ] embed_dim=1536, num_heads=24, depth=24, mlp_ratio=4.0
	[drop ratios ] drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.1 (tensor([0.0000, 0.0043, 0.0087, 0.0130, 0.0174, 0.0217, 0.0261, 0.0304, 0.0348,
	0.0391, 0.0435, 0.0478, 0.0522, 0.0565, 0.0609, 0.0652, 0.0696, 0.0739,
	0.0783, 0.0826, 0.0870, 0.0913, 0.0957, 0.1000]))

	[06-18 18:41:18] (R/models/var4dehazing.py, line 207)=> torch.Size([1, 1, 3264, 3264])
	[06-18 18:41:19] (R/models/var4dehazing.py, line 504)=> [init_weights] VAR_dehazing with init_std=0.0147314







	======================================================= RESTART [06-18 20:21:22] =======================================================
	[06-18 20:21:22] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 20:21:22] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 20:21:22] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 20:21:23] (/projects/VARSR/train.py, line 85)=> global bs=8, local bs=4
	[06-18 20:21:23] (/projects/VARSR/train.py, line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 20:21:23] (/projects/VARSR/train.py, line 97)=> [build dahazing paired dataset] ...

	[06-18 20:21:31] (/VARSR/zrk_utils/data.py, line 22)=> [Dataset] len(dataset)=18130
	[06-18 20:21:31] (/VARSR/zrk_utils/data.py, line 22)=> [Dataset] len(dataset)=1135
	[06-18 20:21:31] (/projects/VARSR/train.py, line 146)=> [auto_resume] no ckpt found @ results/VARDahazing0616/ar-ckpt*.pth
	[06-18 20:21:31] (/projects/VARSR/train.py, line 146)=> [auto_resume quit]
	[06-18 20:21:31] (/projects/VARSR/train.py, line 147)=> [dataloader multi processing] ... [dataloader multi processing](*) finished! (0.00s)
	[06-18 20:21:31] (/projects/VARSR/train.py, line 153)=> [dataloader] gbs=8, lbs=4, iters_train=2267, types(tr, va)=('PairedImageDataset_Dehazing', 'PairedImageDataset_Dehazing')
	[06-18 20:21:32] (R/models/var4dehazing.py, line 183)=> [VAR config ] embed_dim=1536, num_heads=24, depth=24, mlp_ratio=4.0
	[drop ratios ] drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.1 (tensor([0.0000, 0.0043, 0.0087, 0.0130, 0.0174, 0.0217, 0.0261, 0.0304, 0.0348,
	0.0391, 0.0435, 0.0478, 0.0522, 0.0565, 0.0609, 0.0652, 0.0696, 0.0739,
	0.0783, 0.0826, 0.0870, 0.0913, 0.0957, 0.1000]))

	[06-18 20:21:32] (R/models/var4dehazing.py, line 207)=> torch.Size([1, 1, 3264, 3264])
	[06-18 20:21:33] (R/models/var4dehazing.py, line 504)=> [init_weights] VAR_dehazing with init_std=0.0147314







	======================================================= RESTART [06-18 20:22:42] =======================================================
	[06-18 20:22:42] (SR/zrk_utils/arg_util.py, line 196)=> [tf32] [precis] torch.get_float32_matmul_precision(): high
	[06-18 20:22:42] (SR/zrk_utils/arg_util.py, line 197)=> [tf32] [ conv ] torch.backends.cudnn.allow_tf32: True
	[06-18 20:22:42] (SR/zrk_utils/arg_util.py, line 198)=> [tf32] [matmul] torch.backends.cuda.matmul.allow_tf32: True
	[06-18 20:22:44] (/projects/VARSR/train.py, line 85)=> global bs=8, local bs=4
	[06-18 20:22:44] (/projects/VARSR/train.py, line 86)=> initial args:
	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : None
	acc_tail : None
	L_mean : None
	L_tail : None
	vacc_mean : None
	vacc_tail : None
	vL_mean : None
	vL_tail : None
	grad_norm : None
	cur_lr : None
	cur_wd : None
	cur_it :
	cur_ep :
	remain_time :
	finish_time :
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}

	[06-18 20:22:44] (/projects/VARSR/train.py, line 97)=> [build dahazing paired dataset] ...

	[06-18 20:23:08] (/VARSR/zrk_utils/data.py, line 22)=> [Dataset] len(dataset)=18130
	[06-18 20:23:08] (/VARSR/zrk_utils/data.py, line 22)=> [Dataset] len(dataset)=1135
	[06-18 20:23:08] (/projects/VARSR/train.py, line 146)=> [auto_resume] no ckpt found @ results/VARDahazing0616/ar-ckpt*.pth
	[06-18 20:23:08] (/projects/VARSR/train.py, line 146)=> [auto_resume quit]
	[06-18 20:23:08] (/projects/VARSR/train.py, line 147)=> [dataloader multi processing] ... [dataloader multi processing](*) finished! (0.00s)
	[06-18 20:23:08] (/projects/VARSR/train.py, line 153)=> [dataloader] gbs=8, lbs=4, iters_train=2267, types(tr, va)=('PairedImageDataset_Dehazing', 'PairedImageDataset_Dehazing')
	[06-18 20:23:09] (R/models/var4dehazing.py, line 183)=> [VAR config ] embed_dim=1536, num_heads=24, depth=24, mlp_ratio=4.0
	[drop ratios ] drop_rate=0.0, attn_drop_rate=0.0, drop_path_rate=0.1 (tensor([0.0000, 0.0043, 0.0087, 0.0130, 0.0174, 0.0217, 0.0261, 0.0304, 0.0348,
	0.0391, 0.0435, 0.0478, 0.0522, 0.0565, 0.0609, 0.0652, 0.0696, 0.0739,
	0.0783, 0.0826, 0.0870, 0.0913, 0.0957, 0.1000]))

	[06-18 20:23:09] (R/models/var4dehazing.py, line 207)=> torch.Size([1, 1, 3264, 3264])
	[06-18 20:23:10] (R/models/var4dehazing.py, line 504)=> [init_weights] VAR_dehazing with init_std=0.0147314
	[06-18 20:23:13] (/projects/VARSR/train.py, line 53)=> pos_start
	[06-18 20:23:13] (/projects/VARSR/train.py, line 53)=> lvl_1L
	[06-18 20:23:13] (/projects/VARSR/train.py, line 53)=> attn_bias_for_masking
	[06-18 20:23:14] (/projects/VARSR/train.py, line 57)=> [VARTrainer.load_state_dict] missing: ['pos_start', 'lvl_1L', 'attn_bias_for_masking', 'con_embedding.conv_in.weight', 'con_embedding.conv_in.bias', 'con_embedding.blocks.0.weight', 'con_embedding.blocks.0.bias', 'con_embedding.blocks.1.weight', 'con_embedding.blocks.1.bias', 'con_embedding.blocks.2.weight', 'con_embedding.blocks.2.bias', 'con_embedding.blocks.3.weight', 'con_embedding.blocks.3.bias', 'con_embedding.blocks.4.weight', 'con_embedding.blocks.4.bias', 'con_embedding.blocks.5.weight', 'con_embedding.blocks.5.bias', 'con_embedding.blocks.6.weight', 'con_embedding.blocks.6.bias', 'con_embedding.blocks.7.weight', 'con_embedding.blocks.7.bias', 'con_embedding.conv_out.weight', 'con_embedding.conv_out.bias', 'decoder_norm.weight', 'decoder_norm.bias', 'diffloss.net.time_embed.mlp.0.weight', 'diffloss.net.time_embed.mlp.0.bias', 'diffloss.net.time_embed.mlp.2.weight', 'diffloss.net.time_embed.mlp.2.bias', 'diffloss.net.cond_embed.weight', 'diffloss.net.cond_embed.bias', 'diffloss.net.input_proj.weight', 'diffloss.net.input_proj.bias', 'diffloss.net.res_blocks.0.in_ln.weight', 'diffloss.net.res_blocks.0.in_ln.bias', 'diffloss.net.res_blocks.0.mlp.0.weight', 'diffloss.net.res_blocks.0.mlp.0.bias', 'diffloss.net.res_blocks.0.mlp.2.weight', 'diffloss.net.res_blocks.0.mlp.2.bias', 'diffloss.net.res_blocks.0.adaLN_modulation.1.weight', 'diffloss.net.res_blocks.0.adaLN_modulation.1.bias', 'diffloss.net.res_blocks.1.in_ln.weight', 'diffloss.net.res_blocks.1.in_ln.bias', 'diffloss.net.res_blocks.1.mlp.0.weight', 'diffloss.net.res_blocks.1.mlp.0.bias', 'diffloss.net.res_blocks.1.mlp.2.weight', 'diffloss.net.res_blocks.1.mlp.2.bias', 'diffloss.net.res_blocks.1.adaLN_modulation.1.weight', 'diffloss.net.res_blocks.1.adaLN_modulation.1.bias', 'diffloss.net.res_blocks.2.in_ln.weight', 'diffloss.net.res_blocks.2.in_ln.bias', 'diffloss.net.res_blocks.2.mlp.0.weight', 'diffloss.net.res_blocks.2.mlp.0.bias', 'diffloss.net.res_blocks.2.mlp.2.weight', 'diffloss.net.res_blocks.2.mlp.2.bias', 'diffloss.net.res_blocks.2.adaLN_modulation.1.weight', 'diffloss.net.res_blocks.2.adaLN_modulation.1.bias', 'diffloss.net.res_blocks.3.in_ln.weight', 'diffloss.net.res_blocks.3.in_ln.bias', 'diffloss.net.res_blocks.3.mlp.0.weight', 'diffloss.net.res_blocks.3.mlp.0.bias', 'diffloss.net.res_blocks.3.mlp.2.weight', 'diffloss.net.res_blocks.3.mlp.2.bias', 'diffloss.net.res_blocks.3.adaLN_modulation.1.weight', 'diffloss.net.res_blocks.3.adaLN_modulation.1.bias', 'diffloss.net.res_blocks.4.in_ln.weight', 'diffloss.net.res_blocks.4.in_ln.bias', 'diffloss.net.res_blocks.4.mlp.0.weight', 'diffloss.net.res_blocks.4.mlp.0.bias', 'diffloss.net.res_blocks.4.mlp.2.weight', 'diffloss.net.res_blocks.4.mlp.2.bias', 'diffloss.net.res_blocks.4.adaLN_modulation.1.weight', 'diffloss.net.res_blocks.4.adaLN_modulation.1.bias', 'diffloss.net.res_blocks.5.in_ln.weight', 'diffloss.net.res_blocks.5.in_ln.bias', 'diffloss.net.res_blocks.5.mlp.0.weight', 'diffloss.net.res_blocks.5.mlp.0.bias', 'diffloss.net.res_blocks.5.mlp.2.weight', 'diffloss.net.res_blocks.5.mlp.2.bias', 'diffloss.net.res_blocks.5.adaLN_modulation.1.weight', 'diffloss.net.res_blocks.5.adaLN_modulation.1.bias', 'diffloss.net.final_layer.linear.weight', 'diffloss.net.final_layer.linear.bias', 'diffloss.net.final_layer.adaLN_modulation.1.weight', 'diffloss.net.final_layer.adaLN_modulation.1.bias']
	[06-18 20:23:14] (/projects/VARSR/train.py, line 58)=> [VARTrainer.load_state_dict] unexpected: ['pos_1LC']
	[06-18 20:23:14] (/projects/VARSR/train.py, line 193)=> [INIT] VAR model = VAR_dehazing(
	drop_path_rate=0.1
	(con_embedding): ControlNetConditioningEmbedding(
	(conv_in): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	(blocks): ModuleList(
	(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	(1): Conv2d(32, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
	(2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	(3): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
	(4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	(5): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
	(6): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	(7): Conv2d(512, 1536, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
	)
	(conv_out): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
	)
	(word_embed): Linear(in_features=32, out_features=1536, bias=True)
	(class_emb): Embedding(2, 1536)
	(lvl_embed): Embedding(10, 1536)
	(shared_ada_lin): Identity()
	(blocks): ModuleList(
	(0): AdaLNSelfAttn_RoPE(
	shared_aln=False
	(drop_path): Identity()
	(attn): SelfAttention_RoPE(
	using_flash=False, using_xform=False, attn_l2_norm=True
	(mat_qkv): Linear(in_features=1536, out_features=4608, bias=False)
	(proj): Linear(in_features=1536, out_features=1536, bias=True)
	(proj_drop): Identity()
	)
	(ffn): FFN(
	fused_mlp_func=False
	(fc1): Linear(in_features=1536, out_features=6144, bias=True)
	(act): GELU(approximate='tanh')
	(fc2): Linear(in_features=6144, out_features=1536, bias=True)
	(drop): Identity()
	)
	(ln_wo_grad): LayerNorm((1536,), eps=1e-06, elementwise_affine=False)
	(ada_lin): Sequential(
	(0): SiLU()
	(1): Linear(in_features=1536, out_features=9216, bias=True)
	)
	)
	(1-23): 23 x AdaLNSelfAttn_RoPE(
	shared_aln=False
	(drop_path): DropPath((drop_prob=...))
	(attn): SelfAttention_RoPE(
	using_flash=False, using_xform=False, attn_l2_norm=True
	(mat_qkv): Linear(in_features=1536, out_features=4608, bias=False)
	(proj): Linear(in_features=1536, out_features=1536, bias=True)
	(proj_drop): Identity()
	)
	(ffn): FFN(
	fused_mlp_func=False
	(fc1): Linear(in_features=1536, out_features=6144, bias=True)
	(act): GELU(approximate='tanh')
	(fc2): Linear(in_features=6144, out_features=1536, bias=True)
	(drop): Identity()
	)
	(ln_wo_grad): LayerNorm((1536,), eps=1e-06, elementwise_affine=False)
	(ada_lin): Sequential(
	(0): SiLU()
	(1): Linear(in_features=1536, out_features=9216, bias=True)
	)
	)
	)
	(head_nm): AdaLNBeforeHead(
	(ln_wo_grad): LayerNorm((1536,), eps=1e-06, elementwise_affine=False)
	(ada_lin): Sequential(
	(0): SiLU()
	(1): Linear(in_features=1536, out_features=3072, bias=True)
	)
	)
	(head): Linear(in_features=1536, out_features=4096, bias=True)
	(decoder_norm): LayerNorm((1536,), eps=1e-06, elementwise_affine=True)
	(diffloss): DiffLoss(
	(net): SimpleMLPAdaLN(
	(time_embed): TimestepEmbedder(
	(mlp): Sequential(
	(0): Linear(in_features=256, out_features=1024, bias=True)
	(1): SiLU()
	(2): Linear(in_features=1024, out_features=1024, bias=True)
	)
	)
	(cond_embed): Linear(in_features=1536, out_features=1024, bias=True)
	(input_proj): Linear(in_features=32, out_features=1024, bias=True)
	(res_blocks): ModuleList(
	(0-5): 6 x ResBlock(
	(in_ln): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
	(mlp): Sequential(
	(0): Linear(in_features=1024, out_features=1024, bias=True)
	(1): SiLU()
	(2): Linear(in_features=1024, out_features=1024, bias=True)
	)
	(adaLN_modulation): Sequential(
	(0): SiLU()
	(1): Linear(in_features=1024, out_features=3072, bias=True)
	)
	)
	)
	(final_layer): FinalLayer(
	(norm_final): LayerNorm((1024,), eps=1e-06, elementwise_affine=False)
	(linear): Linear(in_features=1024, out_features=64, bias=True)
	(adaLN_modulation): Sequential(
	(0): SiLU()
	(1): Linear(in_features=1024, out_features=2048, bias=True)
	)
	)
	)
	)
	)


	[06-18 20:23:14] (/projects/VARSR/train.py, line 195)=> [INIT][#para] VAE=108.95, VAE.enc=44.11, VAE.dec=64.65, VAE.quant=0.17
	[06-18 20:23:14] (/projects/VARSR/train.py, line 196)=> [INIT][#para] VAR=1101.92


	[06-18 20:23:14] (ARSR/utils/lr_control.py, line 99)=> [get_param_groups] param_groups =
	{ 'D': { 'lr_sc': 1.0,
	'params': "('con_embedding.conv_in.weight, con_embedding.blocks.0.weight, con_embedding.blocks.1.weight, con_embedding.blocks.2.weight, con_embedding.blocks.3.weight, con_embedding.blocks.4.weight, '\n"
	" 'con_embedding.blocks.5.weight, con_embedding.blocks.6.weight, con_embedding.blocks.7.weight, con_embedding.conv_out.weight, word_embed.weight, class_emb.weight, blocks.0.attn.mat_qkv.weight, '\n"
	" 'blocks.0.attn.proj.weight, blocks.0.ffn.fc1.weight, blocks.0.ffn.fc2.weight, blocks.0.ada_lin.1.weight, blocks.1.attn.mat_qkv.weight, blocks.1.attn.proj.weight, blocks.1.ffn.fc1.weight, '\n"
	" 'blocks.1.ffn.fc2.weight, blocks.1.ada_lin.1.weight, blocks.2.attn.mat_qkv.weight, blocks.2.attn.proj.weight, blocks.2.ffn.fc1.weight, blocks.2.ffn.fc2.weight, blocks.2.ada_lin.1.weight, '\n"
	" 'blocks.3.attn.mat_qkv.weight, blocks.3.attn.proj.weight, blocks.3.ffn.fc1.weight, blocks.3.ffn.fc2.weight, blocks.3.ada_lin.1.weight, blocks.4.attn.mat_qkv.weight, blocks.4.attn.proj.weight, '\n"
	" 'blocks.4.ffn.fc1.weight, blocks.4.ffn.fc2.weight, blocks.4.ada_lin.1.weight, blocks.5.attn.mat_qkv.weight, blocks.5.attn.proj.weight, blocks.5.ffn.fc1.weight, blocks.5.ffn.fc2.weight, '\n"
	" 'blocks.5.ada_lin.1.weight, blocks.6.attn.mat_qkv.weight, blocks.6.attn.proj.weight, blocks.6.ffn.fc1.weight, blocks.6.ffn.fc2.weight, blocks.6.ada_lin.1.weight, blocks.7.attn.mat_qkv.weight, '\n"
	" 'blocks.7.attn.proj.weight, blocks.7.ffn.fc1.weight, blocks.7.ffn.fc2.weight, blocks.7.ada_lin.1.weight, blocks.8.attn.mat_qkv.weight, blocks.8.attn.proj.weight, blocks.8.ffn.fc1.weight, '\n"
	" 'blocks.8.ffn.fc2.weight, blocks.8.ada_lin.1.weight, blocks.9.attn.mat_qkv.weight, blocks.9.attn.proj.weight, blocks.9.ffn.fc1.weight, blocks.9.ffn.fc2.weight, blocks.9.ada_lin.1.weight, '\n"
	" 'blocks.10.attn.mat_qkv.weight, blocks.10.attn.proj.weight, blocks.10.ffn.fc1.weight, blocks.10.ffn.fc2.weight, blocks.10.ada_lin.1.weight, blocks.11.attn.mat_qkv.weight, '\n"
	" 'blocks.11.attn.proj.weight, blocks.11.ffn.fc1.weight, blocks.11.ffn.fc2.weight, blocks.11.ada_lin.1.weight, blocks.12.attn.mat_qkv.weight, blocks.12.attn.proj.weight, blocks.12.ffn.fc1.weight, '\n"
	" 'blocks.12.ffn.fc2.weight, blocks.12.ada_lin.1.weight, blocks.13.attn.mat_qkv.weight, blocks.13.attn.proj.weight, blocks.13.ffn.fc1.weight, blocks.13.ffn.fc2.weight, blocks.13.ada_lin.1.weight, '\n"
	" 'blocks.14.attn.mat_qkv.weight, blocks.14.attn.proj.weight, blocks.14.ffn.fc1.weight, blocks.14.ffn.fc2.weight, blocks.14.ada_lin.1.weight, blocks.15.attn.mat_qkv.weight, '\n"
	" 'blocks.15.attn.proj.weight, blocks.15.ffn.fc1.weight, blocks.15.ffn.fc2.weight, blocks.15.ada_lin.1.weight, blocks.16.attn.mat_qkv.weight, blocks.16.attn.proj.weight, blocks.16.ffn.fc1.weight, '\n"
	" 'blocks.16.ffn.fc2.weight, blocks.16.ada_lin.1.weight, blocks.17.attn.mat_qkv.weight, blocks.17.attn.proj.weight, blocks.17.ffn.fc1.weight, blocks.17.ffn.fc2.weight, blocks.17.ada_lin.1.weight, '\n"
	" 'blocks.18.attn.mat_qkv.weight, blocks.18.attn.proj.weight, blocks.18.ffn.fc1.weight, blocks.18.ffn.fc2.weight, blocks.18.ada_lin.1.weight, blocks.19.attn.mat_qkv.weight, '\n"
	" 'blocks.19.attn.proj.weight, blocks.19.ffn.fc1.weight, blocks.19.ffn.fc2.weight, blocks.19.ada_lin.1.weight, blocks.20.attn.mat_qkv.weight, blocks.20.attn.proj.weight, blocks.20.ffn.fc1.weight, '\n"
	" 'blocks.20.ffn.fc2.weight, blocks.20.ada_lin.1.weight, blocks.21.attn.mat_qkv.weight, blocks.21.attn.proj.weight, blocks.21.ffn.fc1.weight, blocks.21.ffn.fc2.weight, blocks.21.ada_lin.1.weight, '\n"
	" 'blocks.22.attn.mat_qkv.weight, blocks.22.attn.proj.weight, blocks.22.ffn.fc1.weight, blocks.22.ffn.fc2.weight, blocks.22.ada_lin.1.weight, blocks.23.attn.mat_qkv.weight, '\n"
	" 'blocks.23.attn.proj.weight, blocks.23.ffn.fc1.weight, blocks.23.ffn.fc2.weight, blocks.23.ada_lin.1.weight, head_nm.ada_lin.1.weight, head.weight')",
	'wd_sc': 1.0},
	'ND': { 'lr_sc': 1.0,
	'params': "('pos_start, con_embedding.conv_in.bias, con_embedding.blocks.0.bias, con_embedding.blocks.1.bias, con_embedding.blocks.2.bias, con_embedding.blocks.3.bias, con_embedding.blocks.4.bias, '\n"
	" 'con_embedding.blocks.5.bias, con_embedding.blocks.6.bias, con_embedding.blocks.7.bias, con_embedding.conv_out.bias, word_embed.bias, lvl_embed.weight, blocks.0.attn.scale_mul_1H11, '\n"
	" 'blocks.0.attn.q_bias, blocks.0.attn.v_bias, blocks.0.attn.proj.bias, blocks.0.ffn.fc1.bias, blocks.0.ffn.fc2.bias, blocks.0.ada_lin.1.bias, blocks.1.attn.scale_mul_1H11, blocks.1.attn.q_bias, '\n"
	" 'blocks.1.attn.v_bias, blocks.1.attn.proj.bias, blocks.1.ffn.fc1.bias, blocks.1.ffn.fc2.bias, blocks.1.ada_lin.1.bias, blocks.2.attn.scale_mul_1H11, blocks.2.attn.q_bias, blocks.2.attn.v_bias, '\n"
	" 'blocks.2.attn.proj.bias, blocks.2.ffn.fc1.bias, blocks.2.ffn.fc2.bias, blocks.2.ada_lin.1.bias, blocks.3.attn.scale_mul_1H11, blocks.3.attn.q_bias, blocks.3.attn.v_bias, blocks.3.attn.proj.bias, '\n"
	" 'blocks.3.ffn.fc1.bias, blocks.3.ffn.fc2.bias, blocks.3.ada_lin.1.bias, blocks.4.attn.scale_mul_1H11, blocks.4.attn.q_bias, blocks.4.attn.v_bias, blocks.4.attn.proj.bias, blocks.4.ffn.fc1.bias, '\n"
	" 'blocks.4.ffn.fc2.bias, blocks.4.ada_lin.1.bias, blocks.5.attn.scale_mul_1H11, blocks.5.attn.q_bias, blocks.5.attn.v_bias, blocks.5.attn.proj.bias, blocks.5.ffn.fc1.bias, blocks.5.ffn.fc2.bias, '\n"
	" 'blocks.5.ada_lin.1.bias, blocks.6.attn.scale_mul_1H11, blocks.6.attn.q_bias, blocks.6.attn.v_bias, blocks.6.attn.proj.bias, blocks.6.ffn.fc1.bias, blocks.6.ffn.fc2.bias, blocks.6.ada_lin.1.bias, '\n"
	" 'blocks.7.attn.scale_mul_1H11, blocks.7.attn.q_bias, blocks.7.attn.v_bias, blocks.7.attn.proj.bias, blocks.7.ffn.fc1.bias, blocks.7.ffn.fc2.bias, blocks.7.ada_lin.1.bias, '\n"
	" 'blocks.8.attn.scale_mul_1H11, blocks.8.attn.q_bias, blocks.8.attn.v_bias, blocks.8.attn.proj.bias, blocks.8.ffn.fc1.bias, blocks.8.ffn.fc2.bias, blocks.8.ada_lin.1.bias, '\n"
	" 'blocks.9.attn.scale_mul_1H11, blocks.9.attn.q_bias, blocks.9.attn.v_bias, blocks.9.attn.proj.bias, blocks.9.ffn.fc1.bias, blocks.9.ffn.fc2.bias, blocks.9.ada_lin.1.bias, '\n"
	" 'blocks.10.attn.scale_mul_1H11, blocks.10.attn.q_bias, blocks.10.attn.v_bias, blocks.10.attn.proj.bias, blocks.10.ffn.fc1.bias, blocks.10.ffn.fc2.bias, blocks.10.ada_lin.1.bias, '\n"
	" 'blocks.11.attn.scale_mul_1H11, blocks.11.attn.q_bias, blocks.11.attn.v_bias, blocks.11.attn.proj.bias, blocks.11.ffn.fc1.bias, blocks.11.ffn.fc2.bias, blocks.11.ada_lin.1.bias, '\n"
	" 'blocks.12.attn.scale_mul_1H11, blocks.12.attn.q_bias, blocks.12.attn.v_bias, blocks.12.attn.proj.bias, blocks.12.ffn.fc1.bias, blocks.12.ffn.fc2.bias, blocks.12.ada_lin.1.bias, '\n"
	" 'blocks.13.attn.scale_mul_1H11, blocks.13.attn.q_bias, blocks.13.attn.v_bias, blocks.13.attn.proj.bias, blocks.13.ffn.fc1.bias, blocks.13.ffn.fc2.bias, blocks.13.ada_lin.1.bias, '\n"
	" 'blocks.14.attn.scale_mul_1H11, blocks.14.attn.q_bias, blocks.14.attn.v_bias, blocks.14.attn.proj.bias, blocks.14.ffn.fc1.bias, blocks.14.ffn.fc2.bias, blocks.14.ada_lin.1.bias, '\n"
	" 'blocks.15.attn.scale_mul_1H11, blocks.15.attn.q_bias, blocks.15.attn.v_bias, blocks.15.attn.proj.bias, blocks.15.ffn.fc1.bias, blocks.15.ffn.fc2.bias, blocks.15.ada_lin.1.bias, '\n"
	" 'blocks.16.attn.scale_mul_1H11, blocks.16.attn.q_bias, blocks.16.attn.v_bias, blocks.16.attn.proj.bias, blocks.16.ffn.fc1.bias, blocks.16.ffn.fc2.bias, blocks.16.ada_lin.1.bias, '\n"
	" 'blocks.17.attn.scale_mul_1H11, blocks.17.attn.q_bias, blocks.17.attn.v_bias, blocks.17.attn.proj.bias, blocks.17.ffn.fc1.bias, blocks.17.ffn.fc2.bias, blocks.17.ada_lin.1.bias, '\n"
	" 'blocks.18.attn.scale_mul_1H11, blocks.18.attn.q_bias, blocks.18.attn.v_bias, blocks.18.attn.proj.bias, blocks.18.ffn.fc1.bias, blocks.18.ffn.fc2.bias, blocks.18.ada_lin.1.bias, '\n"
	" 'blocks.19.attn.scale_mul_1H11, blocks.19.attn.q_bias, blocks.19.attn.v_bias, blocks.19.attn.proj.bias, blocks.19.ffn.fc1.bias, blocks.19.ffn.fc2.bias, blocks.19.ada_lin.1.bias, '\n"
	" 'blocks.20.attn.scale_mul_1H11, blocks.20.attn.q_bias, blocks.20.attn.v_bias, blocks.20.attn.proj.bias, blocks.20.ffn.fc1.bias, blocks.20.ffn.fc2.bias, blocks.20.ada_lin.1.bias, '\n"
	" 'blocks.21.attn.scale_mul_1H11, blocks.21.attn.q_bias, blocks.21.attn.v_bias, blocks.21.attn.proj.bias, blocks.21.ffn.fc1.bias, blocks.21.ffn.fc2.bias, blocks.21.ada_lin.1.bias, '\n"
	" 'blocks.22.attn.scale_mul_1H11, blocks.22.attn.q_bias, blocks.22.attn.v_bias, blocks.22.attn.proj.bias, blocks.22.ffn.fc1.bias, blocks.22.ffn.fc2.bias, blocks.22.ada_lin.1.bias, '\n"
	" 'blocks.23.attn.scale_mul_1H11, blocks.23.attn.q_bias, blocks.23.attn.v_bias, blocks.23.attn.proj.bias, blocks.23.ffn.fc1.bias, blocks.23.ffn.fc2.bias, blocks.23.ada_lin.1.bias, '\n"
	" 'head_nm.ada_lin.1.bias, head.bias, decoder_norm.weight, decoder_norm.bias, diffloss.net.time_embed.mlp.0.weight, diffloss.net.time_embed.mlp.0.bias, diffloss.net.time_embed.mlp.2.weight, '\n"
	" 'diffloss.net.time_embed.mlp.2.bias, diffloss.net.cond_embed.weight, diffloss.net.cond_embed.bias, diffloss.net.input_proj.weight, diffloss.net.input_proj.bias, '\n"
	" 'diffloss.net.res_blocks.0.in_ln.weight, diffloss.net.res_blocks.0.in_ln.bias, diffloss.net.res_blocks.0.mlp.0.weight, diffloss.net.res_blocks.0.mlp.0.bias, diffloss.net.res_blocks.0.mlp.2.weight, '\n"
	" 'diffloss.net.res_blocks.0.mlp.2.bias, diffloss.net.res_blocks.0.adaLN_modulation.1.weight, diffloss.net.res_blocks.0.adaLN_modulation.1.bias, diffloss.net.res_blocks.1.in_ln.weight, '\n"
	" 'diffloss.net.res_blocks.1.in_ln.bias, diffloss.net.res_blocks.1.mlp.0.weight, diffloss.net.res_blocks.1.mlp.0.bias, diffloss.net.res_blocks.1.mlp.2.weight, diffloss.net.res_blocks.1.mlp.2.bias, '\n"
	" 'diffloss.net.res_blocks.1.adaLN_modulation.1.weight, diffloss.net.res_blocks.1.adaLN_modulation.1.bias, diffloss.net.res_blocks.2.in_ln.weight, diffloss.net.res_blocks.2.in_ln.bias, '\n"
	" 'diffloss.net.res_blocks.2.mlp.0.weight, diffloss.net.res_blocks.2.mlp.0.bias, diffloss.net.res_blocks.2.mlp.2.weight, diffloss.net.res_blocks.2.mlp.2.bias, '\n"
	" 'diffloss.net.res_blocks.2.adaLN_modulation.1.weight, diffloss.net.res_blocks.2.adaLN_modulation.1.bias, diffloss.net.res_blocks.3.in_ln.weight, diffloss.net.res_blocks.3.in_ln.bias, '\n"
	" 'diffloss.net.res_blocks.3.mlp.0.weight, diffloss.net.res_blocks.3.mlp.0.bias, diffloss.net.res_blocks.3.mlp.2.weight, diffloss.net.res_blocks.3.mlp.2.bias, '\n"
	" 'diffloss.net.res_blocks.3.adaLN_modulation.1.weight, diffloss.net.res_blocks.3.adaLN_modulation.1.bias, diffloss.net.res_blocks.4.in_ln.weight, diffloss.net.res_blocks.4.in_ln.bias, '\n"
	" 'diffloss.net.res_blocks.4.mlp.0.weight, diffloss.net.res_blocks.4.mlp.0.bias, diffloss.net.res_blocks.4.mlp.2.weight, diffloss.net.res_blocks.4.mlp.2.bias, '\n"
	" 'diffloss.net.res_blocks.4.adaLN_modulation.1.weight, diffloss.net.res_blocks.4.adaLN_modulation.1.bias, diffloss.net.res_blocks.5.in_ln.weight, diffloss.net.res_blocks.5.in_ln.bias, '\n"
	" 'diffloss.net.res_blocks.5.mlp.0.weight, diffloss.net.res_blocks.5.mlp.0.bias, diffloss.net.res_blocks.5.mlp.2.weight, diffloss.net.res_blocks.5.mlp.2.bias, '\n"
	" 'diffloss.net.res_blocks.5.adaLN_modulation.1.weight, diffloss.net.res_blocks.5.adaLN_modulation.1.bias, diffloss.net.final_layer.linear.weight, diffloss.net.final_layer.linear.bias, '\n"
	" 'diffloss.net.final_layer.adaLN_modulation.1.weight, diffloss.net.final_layer.adaLN_modulation.1.bias')",
	'wd_sc': 0.0}}

	[06-18 20:23:14] (ARSR/utils/lr_control.py, line 104)=> [get_param_groups][rank0] type(model).__name__='VAR_dehazing' count=379, numel=1101916448
	[06-18 20:23:14] (ARSR/utils/lr_control.py, line 105)=>
	[06-18 20:23:14] (/projects/VARSR/train.py, line 211)=> [INIT] optim=functools.partial(<class 'torch.optim.adamw.AdamW'>, betas=(0.9, 0.95), fused=True), opt_kw={'lr': 5e-05, 'weight_decay': 0}

	[06-18 20:23:23] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 0/5] [ 0/2267] eta: 5:32:58 tlr: 2.5e-07 tnm: 29.13 Lm: 10.083 (10.083) Lt: 9.557 (9.557) Accm: 0.04 (0.04) Acct: 0.10 (0.10) time: 8.8129 data: 0.7686
	[06-18 20:58:02] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 0/5] [ 566/2267] eta: 1:44:24 tlr: 5e-05 tnm: 1.22 Lm: 8.801 (8.801) Lt: 8.717 (8.717) Accm: 0.56 (0.56) Acct: 0.13 (0.13) time: 3.6906 data: 0.8209
	[06-18 21:33:35] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 0/5] [1133/2267] eta: 1:10:20 tlr: 5e-05 tnm: 0.74 Lm: 7.528 (8.376) Lt: 7.878 (8.399) Accm: 0.74 (0.62) Acct: 0.17 (0.23) time: 3.8047 data: 0.9043
	[06-18 22:09:02] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 0/5] [1699/2267] eta: 0:35:20 tlr: 5e-05 tnm: 0.84 Lm: 7.523 (8.126) Lt: 7.820 (8.226) Accm: 0.90 (0.79) Acct: 0.29 (0.34) time: 3.6712 data: 0.7746
	[06-18 22:44:37] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 0/5] [2266/2267] eta: 0:00:03 tlr: 5e-05 tnm: 0.84 Lm: 7.518 (7.916) Lt: 7.761 (8.090) Accm: 1.07 (1.05) Acct: 0.42 (0.51) time: 3.7091 data: 0.8078
	[06-18 22:44:37] (/VARSR/zrk_utils/misc.py, line 344)=> [Ep]: [ 0/5] Total time: 2:21:22 (3.742 s / it)
	[06-18 22:44:37] (/projects/VARSR/train.py, line 423)=> [ep0] (training ) Lm: 7.935 (7.935), Lt: 8.120 (8.120), Acc m&t: 1.00 0.47, Remain: 9:21:32, Finish: 2025-06-19 10:06
	[06-18 22:44:41] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 1/5] [ 0/2267] eta: 2:24:26 tlr: 5e-05 tnm: 0.90 Lm: 7.230 (7.230) Lt: 7.532 (7.532) Accm: 1.63 (1.63) Acct: 1.10 (1.10) time: 3.8231 data: 0.6894
	[06-18 23:19:33] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 1/5] [ 566/2267] eta: 1:44:48 tlr: 5e-05 tnm: 0.79 Lm: 7.185 (7.185) Lt: 7.553 (7.553) Accm: 1.72 (1.72) Acct: 0.94 (0.94) time: 3.7238 data: 0.7989
	[06-18 23:54:36] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 1/5] [1133/2267] eta: 1:09:59 tlr: 5e-05 tnm: 0.83 Lm: 7.139 (7.137) Lt: 7.532 (7.532) Accm: 1.81 (1.85) Acct: 1.00 (0.96) time: 3.6393 data: 0.7187
	[06-19 00:29:21] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 1/5] [1699/2267] eta: 0:34:59 tlr: 4.7e-05 tnm: 1.01 Lm: 7.091 (7.109) Lt: 7.512 (7.509) Accm: 1.95 (2.01) Acct: 1.05 (1.02) time: 3.6697 data: 0.7540
	[06-19 01:04:29] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 1/5] [2266/2267] eta: 0:00:03 tlr: 4.4e-05 tnm: 1.05 Lm: 7.042 (7.083) Lt: 7.491 (7.495) Accm: 2.10 (2.04) Acct: 1.00 (1.02) time: 3.6405 data: 0.7603
	[06-19 01:04:29] (/VARSR/zrk_utils/misc.py, line 344)=> [Ep]: [ 1/5] Total time: 2:19:52 (3.702 s / it)
	[06-19 01:04:29] (/projects/VARSR/train.py, line 423)=> [ep1] (training ) Lm: 7.085 (7.085), Lt: 7.508 (7.508), Acc m&t: 1.97 0.95, Remain: 6:52:48, Finish: 2025-06-19 09:57
	[06-19 01:04:33] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 2/5] [ 0/2267] eta: 2:14:38 tlr: 4.4e-05 tnm: 0.98 Lm: 6.934 (6.934) Lt: 7.404 (7.404) Accm: 2.19 (2.19) Acct: 0.93 (0.93) time: 3.5634 data: 0.6454
	[06-19 01:39:45] (/projects/VARSR/train.py, line 363)=> [*] [ep2] (val 1135) Lm: 6.8316, Lt: 7.2320, Acc m&t: 2.54 1.54, Val cost: 419.09s
	[06-19 01:39:45] (/projects/VARSR/train.py, line 367)=> [saving ckpt] ... [saving ckpt](*) finished! @ results/VARDahazing0616/ar-ckpt-best.pth
	[06-19 01:46:22] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 2/5] [ 566/2267] eta: 2:05:39 tlr: 4e-05 tnm: 1.15 Lm: 6.759 (6.759) Lt: 7.316 (7.316) Accm: 2.84 (2.84) Acct: 1.14 (1.14) time: 3.7289 data: 0.8361
	[06-19 02:21:54] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 2/5] [1133/2267] eta: 1:17:25 tlr: 3.7e-05 tnm: 1.30 Lm: 6.585 (6.674) Lt: 7.242 (7.291) Accm: 3.36 (3.01) Acct: 1.27 (1.18) time: 3.7628 data: 0.8396
	[06-19 02:56:58] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 2/5] [1699/2267] eta: 0:37:34 tlr: 3.3e-05 tnm: 1.62 Lm: 6.544 (6.568) Lt: 7.235 (7.220) Accm: 3.43 (3.46) Acct: 1.31 (1.38) time: 3.6585 data: 0.7778
	[06-19 03:31:56] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 2/5] [2266/2267] eta: 0:00:03 tlr: 2.9e-05 tnm: 1.49 Lm: 6.502 (6.522) Lt: 7.227 (7.185) Accm: 3.49 (3.61) Acct: 1.34 (1.42) time: 3.6719 data: 0.7520
	[06-19 03:31:56] (/VARSR/zrk_utils/misc.py, line 344)=> [Ep]: [ 2/5] Total time: 2:27:26 (3.902 s / it)
	[06-19 03:31:56] (/projects/VARSR/train.py, line 423)=> [ep2] (training ) Lm: 6.601 (6.601), Lt: 7.239 (7.239), Acc m&t: 3.31 1.28, Remain: 4:37:44, Finish: 2025-06-19 10:09
	[06-19 03:32:00] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 3/5] [ 0/2267] eta: 2:16:07 tlr: 2.9e-05 tnm: 1.68 Lm: 6.134 (6.134) Lt: 6.922 (6.922) Accm: 4.75 (4.75) Acct: 1.66 (1.66) time: 3.6028 data: 0.6484
	[06-19 04:06:19] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 3/5] [ 566/2267] eta: 1:43:09 tlr: 2.6e-05 tnm: 1.59 Lm: 5.999 (5.999) Lt: 6.788 (6.788) Accm: 5.33 (5.33) Acct: 2.10 (2.10) time: 3.6572 data: 0.7595
	[06-19 04:40:59] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 3/5] [1133/2267] eta: 1:09:03 tlr: 2.2e-05 tnm: 1.92 Lm: 5.877 (5.959) Lt: 6.744 (6.773) Accm: 5.60 (5.42) Acct: 2.29 (2.16) time: 3.8565 data: 0.7627
	[06-19 05:15:50] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 3/5] [1699/2267] eta: 0:34:42 tlr: 1.9e-05 tnm: 2.37 Lm: 5.871 (5.888) Lt: 6.699 (6.699) Accm: 5.76 (5.81) Acct: 2.42 (2.34) time: 3.6987 data: 0.7211
	[06-19 05:50:59] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 3/5] [2266/2267] eta: 0:00:03 tlr: 1.5e-05 tnm: 2.20 Lm: 5.865 (5.869) Lt: 6.655 (6.673) Accm: 5.92 (5.93) Acct: 2.54 (2.41) time: 3.7162 data: 0.8073
	[06-19 05:50:59] (/VARSR/zrk_utils/misc.py, line 344)=> [Ep]: [ 3/5] Total time: 2:19:02 (3.680 s / it)
	[06-19 05:50:59] (/projects/VARSR/train.py, line 423)=> [ep3] (training ) Lm: 5.924 (5.924), Lt: 6.683 (6.683), Acc m&t: 5.82 2.52, Remain: 2:19:57, Finish: 2025-06-19 10:10
	[06-19 05:51:02] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 4/5] [ 0/2267] eta: 2:12:57 tlr: 1.5e-05 tnm: 1.97 Lm: 5.730 (5.730) Lt: 6.450 (6.450) Accm: 5.97 (5.97) Acct: 3.03 (3.03) time: 3.5190 data: 0.6160
	[06-19 06:25:29] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 4/5] [ 566/2267] eta: 1:43:30 tlr: 1.1e-05 tnm: 1.92 Lm: 5.768 (5.768) Lt: 6.413 (6.413) Accm: 5.73 (5.73) Acct: 3.09 (3.09) time: 3.6240 data: 0.6995
	[06-19 06:54:19] (/projects/VARSR/train.py, line 363)=> [*] [ep4] (val 1135) Lm: 6.1785, Lt: 6.6731, Acc m&t: 4.82 2.99, Val cost: 359.43s
	[06-19 06:54:19] (/projects/VARSR/train.py, line 367)=> [saving ckpt] ... [saving ckpt](*) finished! @ results/VARDahazing0616/ar-ckpt-best.pth
	[06-19 07:07:19] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 4/5] [1133/2267] eta: 1:16:19 tlr: 7.7e-06 tnm: 2.26 Lm: 5.730 (5.611) Lt: 6.377 (6.308) Accm: 5.97 (6.54) Acct: 3.15 (3.39) time: 3.6319 data: 0.7192
	[06-19 07:42:13] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 4/5] [1699/2267] eta: 0:37:09 tlr: 4.1e-06 tnm: 2.13 Lm: 5.603 (5.577) Lt: 6.250 (6.262) Accm: 6.48 (6.65) Acct: 3.48 (3.50) time: 3.7271 data: 0.8105
	[06-19 08:17:44] (/VARSR/zrk_utils/misc.py, line 322)=> [Ep]: [ 4/5] [2266/2267] eta: 0:00:03 tlr: 5e-07 tnm: 2.01 Lm: 5.477 (5.518) Lt: 6.123 (6.212) Accm: 6.99 (7.06) Acct: 3.81 (3.72) time: 3.6919 data: 0.7889
	[06-19 08:17:44] (/VARSR/zrk_utils/misc.py, line 344)=> [Ep]: [ 4/5] Total time: 2:26:44 (3.884 s / it)
	[06-19 08:17:44] (/projects/VARSR/train.py, line 423)=> [ep4] (training ) Lm: 5.504 (5.504), Lt: 6.209 (6.209), Acc m&t: 7.24 3.73, Remain: 0:00:55, Finish: 2025-06-19 10:18
	[06-19 08:17:44] (/projects/VARSR/train.py, line 427)=>


	[06-19 08:17:44] (/projects/VARSR/train.py, line 428)=> [*] [PT finished] Total cost: 11.9h, Lm: 5.504 (5.503819465637207), Lt: 6.209 (6.209009504318237)
	[06-19 08:17:44] (/projects/VARSR/train.py, line 429)=>


	[06-19 08:17:50] (/projects/VARSR/train.py, line 436)=> final args:

	{
	vae_model_path : checkpoints/VQVAE.pth
	var_pretrain_path : checkpoints/checkpoints_VAR/var_d24.pth
	wandb_flag : True
	exp_name : VARDahazing0616
	vfast : 0
	tfast : 0
	depth : 24
	ini : -1
	hd : 0.02
	aln : 0.5
	alng : 0.0001
	fp16 : 1
	tblr : 5e-05
	tlr : 5e-05
	twd : 0.05
	twde : 0.05
	tclip : 2.0
	ls : 0.0
	bs : 8
	batch_size : 4
	glb_batch_size : 8
	ac : 1
	ep : 5
	wp : 0.1
	wp0 : 0.005
	wpe : 0.01
	sche : lin0
	opt : adamw
	afuse : True
	saln : False
	anorm : True
	fuse : True
	pn : 1_2_3_4_6_9_13_18_24_32
	patch_size : 16
	patch_nums : (1, 2, 3, 4, 6, 9, 13, 18, 24, 32)
	resos : (16, 32, 48, 64, 96, 144, 208, 288, 384, 512)
	data_load_reso : 512
	mid_reso : 1.125
	hflip : False
	workers : 0
	dataset_opt_path : options/VARSR_Dehazing.yml
	val_freq : 5000
	wandb_entity : benzlxs
	project : VARSR
	label_B : 1
	pg : 0.0
	pg0 : 4
	pgwp : 0.016666666666666666
	cmd : --depth=24 --bs=4 --ep=5 --fp16=1 --tblr=5e-5 --alng=1e-4 --wpe=0.01 --wandb_flag=True --fuse=1 --exp_name=VARDahazing0616 --opt=adamw
	branch : master
	commit_id : 80bd1f0d832951506610a3e82320a8322a6f1a7a
	commit_msg : Update README.md
	acc_mean : 7.236607149243355
	acc_tail : 3.73291015625
	L_mean : 5.503819465637207
	L_tail : 6.209009504318237
	vacc_mean : 4.817337989807129
	vacc_tail : 2.9948203563690186
	vL_mean : 6.178485870361328
	vL_tail : 6.6730804443359375
	grad_norm : 2.0592718839645388
	cur_lr : 5.000000000000001e-07
	cur_wd : 0.05
	cur_it : 2267/2267
	cur_ep : 5/5
	remain_time : -
	finish_time : 2025-06-19 10:16
	local_out_dir_path : results/VARDahazing0616
	tb_log_dir_path : results/VARDahazing0616/tb-VARd24__pn1_2_3_4_6_9_13_18_24_32__b8ep5adamlr5e-05wd0.05
	log_txt_path : results/VARDahazing0616/log.txt
	last_ckpt_path : results/VARDahazing0616/ar-ckpt-last.pth
	tf32 : True
	seed : None
	same_seed_for_all_ranks: 0
	local_debug : False
	dbg_nan : False
	}