DIBS / anet_clip /train.log
Exclibur's picture
Upload folder using huggingface_hub
f1c1609 verified
backup evironment completed !
Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal
******************** All args: *************************************************
align_contiguous = False
align_drop_z = 0
align_keep_percentile = 0.1
align_many_to_one = False
align_one_to_many = False
align_top_band_size = 0
att_hid_size = 512
aux_loss = True
backbone = None
base_cfg_path = cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml
basic_ss_prob = 0
batch_size = 1
batch_size_for_eval = 1
bbox_loss_coef = 0
beta = 1
cap_dec_n_points = 4
cap_nheads = 1
cap_num_feature_levels = 4
cap_prob_clip = False
caption_cost_type = loss
caption_decoder_type = standard
caption_loss_coef = 2
cfg_path = cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml
cl_schedule_time = [0, 2]
cl_schedule_val = [0, 0.1]
clip_context_dim = 512
cls_loss_coef = 2
contrastive_hidden_size = 128
contrastive_loss_start_coef = 0.0
contrastive_loss_temperature = 0.1
cost_alpha = 0.25
cost_gamma = 2
count_loss_coef = 0.5
criteria_for_best_ckpt = overall
current_lr = 5e-05
data_norm = 0
data_rescale = 1
debug = False
dec_layers = 2
dec_n_points = 4
device = cuda
dict_file = data/howto/vocabulary_howto_rate2_anet.json
dict_file_val = data/howto/vocabulary_howto_rate2_anet.json
dilation = False
disable_contrastive_projection = 1
disable_cudnn = 0
disable_mid_caption_heads = False
disable_rematch = False
disable_tqdm = False
drop_prob = 0.5
ec_alpha = 1.0
enable_bg_for_cl = True
enable_contrastive = False
enable_cross_video_cl = True
enable_e2t_cl = True
enc_layers = 2
enc_n_points = 4
eos_coef = 0.1
epoch = 20
eval_proposal_file = data/generated_proposals/dbg_trainval_top100.json
event_context_dim = None
feature_dim = 768
feature_sample_rate = 1
fix_xcw = 1
focal_alpha = 0.25
focal_gamma = 2.0
focal_mil = False
frame_embedding_num = 100
ft_gt_percent = 1.0
giou_loss_coef = 4
gpu_id = []
grad_clip = 100.0
gt_file_for_auc = data/anet/captiondata/val_all.json
gt_file_for_eval = ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json']
gt_file_for_para_eval = ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json']
gt_proposal_sample_num = 20
hidden_dim = 512
hidden_dropout_prob = 0.5
huggingface_cache_dir = .cache
id = seq2-ft(mix)-gt_percent-1.0
id_ori =
input_encoding_size = 512
invalid_video_json = []
iteration = 3
layer_norm_eps = 1e-12
learning_rate_decay_every = 3
learning_rate_decay_rate = 0.5
learning_rate_decay_start = 8
lloss_beta = 1
lloss_cross_entropy = 0
lloss_focal_loss = 0
lloss_gau_mask = 1
lr = 5e-05
lr_backbone = 2e-05
lr_backbone_names = ['None']
lr_linear_proj_mult = 0.1
lr_linear_proj_names = ['reference_points', 'sampling_offsets']
lr_proj = 0
map = True
matcher_type = default
max_caption_len = 50
max_eseq_length = 10
max_pos_num = 500
max_text_input_len = 32
merge_criterion = ins_cap_topk
merge_k_boxes = 3
merge_mode = weighted_sum
mil_loss_coef = 0
min_epoch_when_save = -1
nheads = 8
norm_ins_score = sigmoid
nthreads = 4
num_classes = 1
num_feature_levels = 4
num_layers = 1
num_neg_box = 10
num_queries = 100
optimizer_type = adam
position_embedding = sine
position_embedding_scale = 6.283185307179586
pre_percent = 1.0
pretrain = None
pretrain_path =
pretrained_language_model = CLIP
prior_anchor_duration_init = True
prior_manner = all
pseudo_box_aug = False
pseudo_box_aug_mode = random_range
pseudo_box_aug_num = 8
pseudo_box_aug_ratio = 0.02
pseudo_box_type = similarity_op_order_v2
random_anchor_init = True
random_seed = False
ref_rank_loss_coef = 0.0
refine_pseudo_box = False
refine_pseudo_stage_num = 2
rnn_size = 512
sample_method = nearest
save_all_checkpoint = 0
save_checkpoint_every = 1
save_dir = /mnt/data/pjlab-3090-sport/wuhao/logs/dibs
scheduled_sampling_increase_every = 2
scheduled_sampling_increase_prob = 0.05
scheduled_sampling_max_prob = 0.25
scheduled_sampling_start = -1
seed = 777
self_iou_loss_coef = 0.0
set_cost_bbox = 0
set_cost_caption = 0
set_cost_cl = 0.0
set_cost_class = 2
set_cost_giou = 4
set_cost_sim = 1.0
share_caption_head = 1
soft_attention = 1
start_from =
start_from_mode = last
start_refine_epoch = -1
statistic_mode = mode
text_encoder_learning_strategy = frozen
text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/']
text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/']
text_hidden_dim = 768
top_frames = 30
train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json']
train_proposal_file = data/generated_proposals/dbg_trainval_top100.json
train_proposal_sample_num = 30
train_proposal_type = gt
training_scheme = all
transformer_dropout_prob = 0.1
transformer_ff_dim = 512
transformer_input_type = queries
use_additional_cap_layer = False
use_additional_score_layer = False
use_anchor = 0
use_neg_pseudo_box = False
use_pseudo_box = False
use_query_box_for_refine = 0
val_caption_file = data/anet/captiondata/val_1.json
visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/']
visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/']
visual_feature_type = ['CLIP']
vocab_size = 16221
vocab_size_val = 16221
weight_decay = 0.0001
weighted_mil_loss = False
width_ratio = 1
width_th = 1
window_size = 2
with_box_refine = 1
wordRNN_input_feats_type = C
******************** Model structure: ******************************************
PDVC(
(base_encoder): BaseEncoder(
(pos_embed): PositionEmbeddingSine(
(duration_embed_layer): Linear(in_features=256, out_features=256, bias=True)
)
(input_proj): ModuleList(
(0): Sequential(
(0): Conv1d(768, 512, kernel_size=(1,), stride=(1,))
(1): GroupNorm(32, 512, eps=1e-05, affine=True)
)
(1): Sequential(
(0): Conv1d(768, 512, kernel_size=(3,), stride=(2,), padding=(1,))
(1): GroupNorm(32, 512, eps=1e-05, affine=True)
)
(2): Sequential(
(0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,))
(1): GroupNorm(32, 512, eps=1e-05, affine=True)
)
(3): Sequential(
(0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,))
(1): GroupNorm(32, 512, eps=1e-05, affine=True)
)
)
)
(transformer): DeformableTransformer(
(encoder): DeformableTransformerEncoder(
(layers): ModuleList(
(0): DeformableTransformerEncoderLayer(
(self_attn): MSDeformAttn(
(sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
(attention_weights): Linear(in_features=512, out_features=128, bias=True)
(value_proj): Linear(in_features=512, out_features=512, bias=True)
(output_proj): Linear(in_features=512, out_features=512, bias=True)
)
(dropout1): Dropout(p=0.1, inplace=False)
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(linear1): Linear(in_features=512, out_features=512, bias=True)
(dropout2): Dropout(p=0.1, inplace=False)
(linear2): Linear(in_features=512, out_features=512, bias=True)
(dropout3): Dropout(p=0.1, inplace=False)
(norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(1): DeformableTransformerEncoderLayer(
(self_attn): MSDeformAttn(
(sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
(attention_weights): Linear(in_features=512, out_features=128, bias=True)
(value_proj): Linear(in_features=512, out_features=512, bias=True)
(output_proj): Linear(in_features=512, out_features=512, bias=True)
)
(dropout1): Dropout(p=0.1, inplace=False)
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(linear1): Linear(in_features=512, out_features=512, bias=True)
(dropout2): Dropout(p=0.1, inplace=False)
(linear2): Linear(in_features=512, out_features=512, bias=True)
(dropout3): Dropout(p=0.1, inplace=False)
(norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
)
)
(decoder): DeformableTransformerDecoder(
(layers): ModuleList(
(0): DeformableTransformerDecoderLayer(
(cross_attn): MSDeformAttn(
(sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
(attention_weights): Linear(in_features=512, out_features=128, bias=True)
(value_proj): Linear(in_features=512, out_features=512, bias=True)
(output_proj): Linear(in_features=512, out_features=512, bias=True)
)
(dropout1): Dropout(p=0.1, inplace=False)
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(dropout2): Dropout(p=0.1, inplace=False)
(norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(linear1): Linear(in_features=512, out_features=512, bias=True)
(dropout3): Dropout(p=0.1, inplace=False)
(linear2): Linear(in_features=512, out_features=512, bias=True)
(dropout4): Dropout(p=0.1, inplace=False)
(norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
(1): DeformableTransformerDecoderLayer(
(cross_attn): MSDeformAttn(
(sampling_offsets): Linear(in_features=512, out_features=128, bias=True)
(attention_weights): Linear(in_features=512, out_features=128, bias=True)
(value_proj): Linear(in_features=512, out_features=512, bias=True)
(output_proj): Linear(in_features=512, out_features=512, bias=True)
)
(dropout1): Dropout(p=0.1, inplace=False)
(norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(self_attn): MultiheadAttention(
(out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
)
(dropout2): Dropout(p=0.1, inplace=False)
(norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
(linear1): Linear(in_features=512, out_features=512, bias=True)
(dropout3): Dropout(p=0.1, inplace=False)
(linear2): Linear(in_features=512, out_features=512, bias=True)
(dropout4): Dropout(p=0.1, inplace=False)
(norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
)
)
(bbox_head): ModuleList(
(0): MLP(
(layers): ModuleList(
(0): Linear(in_features=512, out_features=512, bias=True)
(1): Linear(in_features=512, out_features=512, bias=True)
(2): Linear(in_features=512, out_features=2, bias=True)
)
)
(1): MLP(
(layers): ModuleList(
(0): Linear(in_features=512, out_features=512, bias=True)
(1): Linear(in_features=512, out_features=512, bias=True)
(2): Linear(in_features=512, out_features=2, bias=True)
)
)
)
)
(pos_trans): Linear(in_features=512, out_features=1024, bias=True)
(pos_trans_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
(reference_points): Linear(in_features=512, out_features=1, bias=True)
)
(caption_head): ModuleList(
(0): LSTMDSACaptioner(
(embed): Embedding(16222, 512)
(logit): Linear(in_features=512, out_features=16222, bias=True)
(dropout): Dropout(p=0.5, inplace=False)
(core): ShowAttendTellCore(
(rnn): LSTM(1536, 512, bias=False, dropout=0.5)
(att_drop): Dropout(p=0.5, inplace=False)
(deformable_att): MSDeformAttnCap(
(sampling_offsets): Linear(in_features=1024, out_features=16, bias=True)
(attention_weights): Linear(in_features=1024, out_features=16, bias=True)
(value_proj): Linear(in_features=512, out_features=512, bias=True)
(output_proj): Linear(in_features=512, out_features=512, bias=True)
)
(ctx2att): Linear(in_features=512, out_features=512, bias=True)
(h2att): Linear(in_features=512, out_features=512, bias=True)
(alpha_net): Linear(in_features=512, out_features=1, bias=True)
)
)
(1): LSTMDSACaptioner(
(embed): Embedding(16222, 512)
(logit): Linear(in_features=512, out_features=16222, bias=True)
(dropout): Dropout(p=0.5, inplace=False)
(core): ShowAttendTellCore(
(rnn): LSTM(1536, 512, bias=False, dropout=0.5)
(att_drop): Dropout(p=0.5, inplace=False)
(deformable_att): MSDeformAttnCap(
(sampling_offsets): Linear(in_features=1024, out_features=16, bias=True)
(attention_weights): Linear(in_features=1024, out_features=16, bias=True)
(value_proj): Linear(in_features=512, out_features=512, bias=True)
(output_proj): Linear(in_features=512, out_features=512, bias=True)
)
(ctx2att): Linear(in_features=512, out_features=512, bias=True)
(h2att): Linear(in_features=512, out_features=512, bias=True)
(alpha_net): Linear(in_features=512, out_features=1, bias=True)
)
)
)
(query_embed): Embedding(100, 1024)
(class_head): ModuleList(
(0): Linear(in_features=512, out_features=1, bias=True)
(1): Linear(in_features=512, out_features=1, bias=True)
)
(class_refine_head): ModuleList(
(0): Linear(in_features=512, out_features=1, bias=True)
(1): Linear(in_features=512, out_features=1, bias=True)
)
(count_head): ModuleList(
(0): Linear(in_features=512, out_features=11, bias=True)
(1): Linear(in_features=512, out_features=11, bias=True)
)
(bbox_head): ModuleList(
(0): MLP(
(layers): ModuleList(
(0): Linear(in_features=512, out_features=512, bias=True)
(1): Linear(in_features=512, out_features=512, bias=True)
(2): Linear(in_features=512, out_features=2, bias=True)
)
)
(1): MLP(
(layers): ModuleList(
(0): Linear(in_features=512, out_features=512, bias=True)
(1): Linear(in_features=512, out_features=512, bias=True)
(2): Linear(in_features=512, out_features=2, bias=True)
)
)
)
(contrastive_projection_event): ModuleList(
(0): Identity()
(1): Identity()
)
(contrastive_projection_text): ModuleList(
(0): Identity()
(1): Identity()
)
)
******************** Strat training ! ******************************************
loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0'])
loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0])
ID seq2-ft(mix)-gt_percent-1.0 iter 1000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.126), ('loss_bbox', 0.117), ('loss_giou', 0.275), ('loss_self_iou', 0.126), ('cardinality_error', 3.775), ('loss_ce_0', 0.284), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.118), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.126), ('cardinality_error_0', 3.775), ('loss_caption_0', 3.781), ('loss_caption', 3.778), ('total_loss', 18.585)]),
time/iter = 0.182, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 2000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.119), ('loss_bbox', 0.087), ('loss_giou', 0.239), ('loss_self_iou', 0.12), ('cardinality_error', 3.705), ('loss_ce_0', 0.289), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.087), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.121), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.682), ('loss_caption', 3.675), ('total_loss', 17.896)]),
time/iter = 0.180, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 3000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.227), ('loss_self_iou', 0.098), ('cardinality_error', 3.705), ('loss_ce_0', 0.292), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.668), ('loss_caption', 3.664), ('total_loss', 17.771)]),
time/iter = 0.181, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 4000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.126), ('loss_bbox', 0.078), ('loss_giou', 0.224), ('loss_self_iou', 0.1), ('cardinality_error', 3.784), ('loss_ce_0', 0.291), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.624), ('loss_caption', 3.629), ('total_loss', 17.579)]),
time/iter = 0.174, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 5000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.121), ('loss_bbox', 0.08), ('loss_giou', 0.218), ('loss_self_iou', 0.114), ('cardinality_error', 3.674), ('loss_ce_0', 0.287), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.08), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.674), ('loss_caption_0', 3.629), ('loss_caption', 3.629), ('total_loss', 17.526)]),
time/iter = 0.178, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 6000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.13), ('loss_bbox', 0.076), ('loss_giou', 0.22), ('loss_self_iou', 0.098), ('cardinality_error', 3.786), ('loss_ce_0', 0.293), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.786), ('loss_caption_0', 3.625), ('loss_caption', 3.622), ('total_loss', 17.555)]),
time/iter = 0.182, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 7000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.215), ('loss_self_iou', 0.097), ('cardinality_error', 3.746), ('loss_ce_0', 0.293), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.746), ('loss_caption_0', 3.58), ('loss_caption', 3.576), ('total_loss', 17.319)]),
time/iter = 0.179, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 8000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.129), ('loss_bbox', 0.078), ('loss_giou', 0.218), ('loss_self_iou', 0.108), ('cardinality_error', 3.754), ('loss_ce_0', 0.288), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.079), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.754), ('loss_caption_0', 3.546), ('loss_caption', 3.546), ('total_loss', 17.209)]),
time/iter = 0.184, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 9000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.078), ('loss_giou', 0.219), ('loss_self_iou', 0.1), ('cardinality_error', 3.685), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.219), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.685), ('loss_caption_0', 3.544), ('loss_caption', 3.54), ('total_loss', 17.2)]),
time/iter = 0.180, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 10000 (epoch 0),
loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.22), ('loss_self_iou', 0.101), ('cardinality_error', 3.748), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.748), ('loss_caption_0', 3.582), ('loss_caption', 3.577), ('total_loss', 17.376)]),
time/iter = 0.180, bad_vid = 0.000
Validation results of iter 10009:
Bleu_1:0.15656016917085527
Bleu_2:0.08210369852679855
Bleu_3:0.042491746140277446
Bleu_4:0.021149866989626908
METEOR:0.08752782819459405
ROUGE_L:0.1577032846084498
CIDEr:0.2687260839927409
Recall:0.4986985069085389
Precision:0.548450952477792
soda_c:0.045070258467165024
para_Bleu_1:0.36987086578065714
para_Bleu_2:0.1987998709052068
para_Bleu_3:0.11671522868501899
para_Bleu_4:0.07164097958462183
para_METEOR:0.13901753612789455
para_ROUGE_L:0.2826680559963382
para_CIDEr:0.0956891322121665
overall score of iter 10009: 0.3063476479246829
Save model at iter 10009 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 10009 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 11000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.124), ('loss_bbox', 0.077), ('loss_giou', 0.217), ('loss_self_iou', 0.101), ('cardinality_error', 3.788), ('loss_ce_0', 0.292), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.788), ('loss_caption_0', 3.446), ('loss_caption', 3.443), ('total_loss', 16.802)]),
time/iter = 0.707, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 12000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.694), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.694), ('loss_caption_0', 3.427), ('loss_caption', 3.428), ('total_loss', 16.701)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 13000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.217), ('loss_self_iou', 0.107), ('cardinality_error', 3.689), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.689), ('loss_caption_0', 3.464), ('loss_caption', 3.461), ('total_loss', 16.871)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 14000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.21), ('loss_self_iou', 0.1), ('cardinality_error', 3.663), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.663), ('loss_caption_0', 3.414), ('loss_caption', 3.41), ('total_loss', 16.616)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 15000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.127), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.828), ('loss_ce_0', 0.296), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.828), ('loss_caption_0', 3.453), ('loss_caption', 3.453), ('total_loss', 16.836)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 16000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.073), ('loss_giou', 0.206), ('loss_self_iou', 0.105), ('cardinality_error', 3.687), ('loss_ce_0', 0.297), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.461), ('loss_caption', 3.462), ('total_loss', 16.803)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 17000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.127), ('loss_bbox', 0.073), ('loss_giou', 0.208), ('loss_self_iou', 0.102), ('cardinality_error', 3.791), ('loss_ce_0', 0.3), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.791), ('loss_caption_0', 3.469), ('loss_caption', 3.465), ('total_loss', 16.864)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 18000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.074), ('loss_giou', 0.205), ('loss_self_iou', 0.107), ('cardinality_error', 3.68), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.68), ('loss_caption_0', 3.478), ('loss_caption', 3.475), ('total_loss', 16.859)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 19000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.073), ('loss_giou', 0.207), ('loss_self_iou', 0.099), ('cardinality_error', 3.752), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.396), ('loss_caption', 3.396), ('total_loss', 16.585)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 20000 (epoch 1),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.128), ('loss_bbox', 0.071), ('loss_giou', 0.208), ('loss_self_iou', 0.101), ('cardinality_error', 3.804), ('loss_ce_0', 0.304), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.804), ('loss_caption_0', 3.42), ('loss_caption', 3.419), ('total_loss', 16.684)]),
time/iter = 0.189, bad_vid = 0.000
Validation results of iter 20018:
Bleu_1:0.15965966113561106
Bleu_2:0.08785069799970043
Bleu_3:0.04739925348589703
Bleu_4:0.02377096308421814
METEOR:0.09062964515721111
ROUGE_L:0.1652647774491388
CIDEr:0.27366191469495676
Recall:0.45131293652113946
Precision:0.5379414954918249
soda_c:0.04303682007432423
para_Bleu_1:0.3640361416830845
para_Bleu_2:0.1986476696673755
para_Bleu_3:0.11814800235116821
para_Bleu_4:0.07336184523852665
para_METEOR:0.13911724177507803
para_ROUGE_L:0.28211794880017504
para_CIDEr:0.08634617454158834
overall score of iter 20018: 0.29882526155519307
Save model at iter 20018 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 21000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.202), ('loss_self_iou', 0.101), ('cardinality_error', 3.666), ('loss_ce_0', 0.299), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.666), ('loss_caption_0', 3.344), ('loss_caption', 3.335), ('total_loss', 16.294)]),
time/iter = 0.726, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 22000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.201), ('loss_self_iou', 0.109), ('cardinality_error', 3.752), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.302), ('loss_caption', 3.304), ('total_loss', 16.116)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 23000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.128), ('loss_bbox', 0.077), ('loss_giou', 0.208), ('loss_self_iou', 0.113), ('cardinality_error', 3.803), ('loss_ce_0', 0.299), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.112), ('cardinality_error_0', 3.803), ('loss_caption_0', 3.348), ('loss_caption', 3.34), ('total_loss', 16.363)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 24000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.122), ('loss_bbox', 0.076), ('loss_giou', 0.207), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.294), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 3.354), ('loss_caption', 3.351), ('total_loss', 16.364)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 25000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.213), ('loss_self_iou', 0.091), ('cardinality_error', 3.734), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.734), ('loss_caption_0', 3.372), ('loss_caption', 3.372), ('total_loss', 16.494)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 26000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.125), ('loss_bbox', 0.072), ('loss_giou', 0.203), ('loss_self_iou', 0.096), ('cardinality_error', 3.784), ('loss_ce_0', 0.299), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.334), ('loss_caption', 3.333), ('total_loss', 16.279)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 27000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.118), ('loss_bbox', 0.076), ('loss_giou', 0.203), ('loss_self_iou', 0.102), ('cardinality_error', 3.64), ('loss_ce_0', 0.291), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.64), ('loss_caption_0', 3.348), ('loss_caption', 3.345), ('total_loss', 16.287)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 28000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.201), ('loss_self_iou', 0.095), ('cardinality_error', 3.774), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.774), ('loss_caption_0', 3.337), ('loss_caption', 3.333), ('total_loss', 16.249)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 29000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.12), ('loss_bbox', 0.075), ('loss_giou', 0.204), ('loss_self_iou', 0.1), ('cardinality_error', 3.755), ('loss_ce_0', 0.299), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.755), ('loss_caption_0', 3.315), ('loss_caption', 3.321), ('total_loss', 16.223)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 30000 (epoch 2),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.119), ('loss_bbox', 0.071), ('loss_giou', 0.195), ('loss_self_iou', 0.103), ('cardinality_error', 3.72), ('loss_ce_0', 0.302), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.72), ('loss_caption_0', 3.347), ('loss_caption', 3.349), ('total_loss', 16.283)]),
time/iter = 0.195, bad_vid = 0.000
Validation results of iter 30027:
Bleu_1:0.15440507165989542
Bleu_2:0.08178273697953425
Bleu_3:0.042600749568780155
Bleu_4:0.02119123483046711
METEOR:0.08563216148714695
ROUGE_L:0.156809182143994
CIDEr:0.25960752079137744
Recall:0.5075951227720545
Precision:0.571834112941489
soda_c:0.048597974030683
para_Bleu_1:0.3985431504573892
para_Bleu_2:0.22415947108296613
para_Bleu_3:0.1341003834690626
para_Bleu_4:0.08312155143550452
para_METEOR:0.1510085678983445
para_ROUGE_L:0.2957598062989384
para_CIDEr:0.12271570278513648
overall score of iter 30027: 0.3568458221189855
Save model at iter 30027 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 30027 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 31000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.123), ('loss_bbox', 0.073), ('loss_giou', 0.202), ('loss_self_iou', 0.114), ('cardinality_error', 3.772), ('loss_ce_0', 0.296), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.772), ('loss_caption_0', 3.24), ('loss_caption', 3.242), ('total_loss', 15.889)]),
time/iter = 0.725, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 32000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.093), ('cardinality_error', 3.66), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.251), ('loss_caption', 3.248), ('total_loss', 15.869)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 33000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.787), ('loss_ce_0', 0.301), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.223), ('loss_caption', 3.225), ('total_loss', 15.81)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 34000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.121), ('loss_bbox', 0.076), ('loss_giou', 0.201), ('loss_self_iou', 0.107), ('cardinality_error', 3.719), ('loss_ce_0', 0.296), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.719), ('loss_caption_0', 3.21), ('loss_caption', 3.206), ('total_loss', 15.752)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 35000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.074), ('loss_giou', 0.201), ('loss_self_iou', 0.1), ('cardinality_error', 3.761), ('loss_ce_0', 0.304), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.761), ('loss_caption_0', 3.261), ('loss_caption', 3.267), ('total_loss', 16.006)]),
time/iter = 0.187, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 36000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.12), ('loss_bbox', 0.074), ('loss_giou', 0.202), ('loss_self_iou', 0.096), ('cardinality_error', 3.731), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.731), ('loss_caption_0', 3.322), ('loss_caption', 3.322), ('total_loss', 16.237)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 37000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.088), ('cardinality_error', 3.747), ('loss_ce_0', 0.306), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.276), ('loss_caption', 3.278), ('total_loss', 16.005)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 38000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.122), ('loss_bbox', 0.073), ('loss_giou', 0.198), ('loss_self_iou', 0.096), ('cardinality_error', 3.747), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.26), ('loss_caption', 3.267), ('total_loss', 15.944)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 39000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.073), ('loss_giou', 0.194), ('loss_self_iou', 0.096), ('cardinality_error', 3.714), ('loss_ce_0', 0.3), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 3.29), ('loss_caption', 3.284), ('total_loss', 16.029)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 40000 (epoch 3),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.068), ('loss_giou', 0.187), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 3.255), ('loss_caption', 3.258), ('total_loss', 15.861)]),
time/iter = 0.191, bad_vid = 0.000
Validation results of iter 40036:
Bleu_1:0.16003947012491918
Bleu_2:0.08640386650819816
Bleu_3:0.045769192920880976
Bleu_4:0.023139762266241797
METEOR:0.08893476927946467
ROUGE_L:0.16285119298911696
CIDEr:0.27850058398714506
Recall:0.4974410652224822
Precision:0.571762083926507
soda_c:0.04898353247531122
para_Bleu_1:0.4116267700746525
para_Bleu_2:0.23315066082372427
para_Bleu_3:0.139785630195007
para_Bleu_4:0.08689414164874545
para_METEOR:0.15321412716959742
para_ROUGE_L:0.2993749803089721
para_CIDEr:0.12755194391496638
overall score of iter 40036: 0.3676602127333093
Save model at iter 40036 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 40036 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 41000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.196), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.159), ('loss_caption', 3.162), ('total_loss', 15.549)]),
time/iter = 0.733, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 42000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.698), ('loss_ce_0', 0.298), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.698), ('loss_caption_0', 3.191), ('loss_caption', 3.187), ('total_loss', 15.571)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 43000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.198), ('loss_self_iou', 0.089), ('cardinality_error', 3.785), ('loss_ce_0', 0.306), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.785), ('loss_caption_0', 3.247), ('loss_caption', 3.249), ('total_loss', 15.93)]),
time/iter = 0.195, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 44000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.072), ('loss_giou', 0.194), ('loss_self_iou', 0.104), ('cardinality_error', 3.727), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.727), ('loss_caption_0', 3.228), ('loss_caption', 3.227), ('total_loss', 15.794)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 45000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.194), ('loss_self_iou', 0.094), ('cardinality_error', 3.684), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.684), ('loss_caption_0', 3.138), ('loss_caption', 3.143), ('total_loss', 15.458)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 46000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.194), ('loss_self_iou', 0.107), ('cardinality_error', 3.8), ('loss_ce_0', 0.301), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.8), ('loss_caption_0', 3.198), ('loss_caption', 3.202), ('total_loss', 15.69)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 47000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.071), ('loss_giou', 0.193), ('loss_self_iou', 0.1), ('cardinality_error', 3.724), ('loss_ce_0', 0.302), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.166), ('loss_caption', 3.167), ('total_loss', 15.544)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 48000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.074), ('loss_giou', 0.194), ('loss_self_iou', 0.1), ('cardinality_error', 3.779), ('loss_ce_0', 0.303), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.779), ('loss_caption_0', 3.197), ('loss_caption', 3.204), ('total_loss', 15.693)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 49000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.186), ('loss_self_iou', 0.103), ('cardinality_error', 3.67), ('loss_ce_0', 0.299), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.67), ('loss_caption_0', 3.197), ('loss_caption', 3.193), ('total_loss', 15.597)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 50000 (epoch 4),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.191), ('loss_self_iou', 0.1), ('cardinality_error', 3.769), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 3.195), ('loss_caption', 3.196), ('total_loss', 15.646)]),
time/iter = 0.193, bad_vid = 0.000
Validation results of iter 50045:
Bleu_1:0.1612752203314224
Bleu_2:0.08712092952271142
Bleu_3:0.04643407984417907
Bleu_4:0.024237450149938583
METEOR:0.0888552980469009
ROUGE_L:0.16165678007821221
CIDEr:0.28844655875134945
Recall:0.5079771255793173
Precision:0.5707494407158785
soda_c:0.05143467092505771
para_Bleu_1:0.425828341023263
para_Bleu_2:0.2431293051387748
para_Bleu_3:0.14662751878582
para_Bleu_4:0.09131956416083617
para_METEOR:0.15868276543147294
para_ROUGE_L:0.30762031965083425
para_CIDEr:0.1438790695271004
overall score of iter 50045: 0.39388139911940956
Save model at iter 50045 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 50045 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 51000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.119), ('loss_bbox', 0.072), ('loss_giou', 0.19), ('loss_self_iou', 0.1), ('cardinality_error', 3.708), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.708), ('loss_caption_0', 3.123), ('loss_caption', 3.122), ('total_loss', 15.345)]),
time/iter = 0.739, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 52000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.195), ('loss_self_iou', 0.091), ('cardinality_error', 3.787), ('loss_ce_0', 0.302), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.08), ('loss_caption', 3.08), ('total_loss', 15.224)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 53000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.192), ('loss_self_iou', 0.101), ('cardinality_error', 3.688), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.688), ('loss_caption_0', 3.121), ('loss_caption', 3.125), ('total_loss', 15.366)]),
time/iter = 0.196, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 54000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.66), ('loss_ce_0', 0.303), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.151), ('loss_caption', 3.158), ('total_loss', 15.44)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 55000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.759), ('loss_ce_0', 0.314), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.759), ('loss_caption_0', 3.137), ('loss_caption', 3.138), ('total_loss', 15.427)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 56000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.7), ('loss_ce_0', 0.303), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.7), ('loss_caption_0', 3.128), ('loss_caption', 3.132), ('total_loss', 15.353)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 57000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.125), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.833), ('loss_ce_0', 0.308), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.833), ('loss_caption_0', 3.157), ('loss_caption', 3.154), ('total_loss', 15.516)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 58000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.072), ('loss_giou', 0.192), ('loss_self_iou', 0.099), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.092), ('loss_caption', 3.088), ('total_loss', 15.209)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 59000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.092), ('cardinality_error', 3.806), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.806), ('loss_caption_0', 3.204), ('loss_caption', 3.204), ('total_loss', 15.668)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 60000 (epoch 5),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.73), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.185), ('loss_caption', 3.179), ('total_loss', 15.62)]),
time/iter = 0.192, bad_vid = 0.000
Validation results of iter 60054:
Bleu_1:0.16203040821313286
Bleu_2:0.087418866671477
Bleu_3:0.04641401855891123
Bleu_4:0.023872355329811287
METEOR:0.08736154709181514
ROUGE_L:0.16095171754962678
CIDEr:0.3019460931650574
Recall:0.5237442505746305
Precision:0.5691986983933232
soda_c:0.05366939846142926
para_Bleu_1:0.4285515683378188
para_Bleu_2:0.24896313523930838
para_Bleu_3:0.15083849533584295
para_Bleu_4:0.09425440122753082
para_METEOR:0.15418242275887206
para_ROUGE_L:0.3037081433191389
para_CIDEr:0.16822639157343386
overall score of iter 60054: 0.41666321555983676
Save model at iter 60054 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 60054 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 61000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.687), ('loss_ce_0', 0.303), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.025), ('loss_caption', 3.031), ('total_loss', 14.914)]),
time/iter = 0.715, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 62000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.125), ('loss_bbox', 0.068), ('loss_giou', 0.192), ('loss_self_iou', 0.088), ('cardinality_error', 3.809), ('loss_ce_0', 0.304), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.809), ('loss_caption_0', 3.067), ('loss_caption', 3.064), ('total_loss', 15.147)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 63000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.072), ('loss_giou', 0.189), ('loss_self_iou', 0.102), ('cardinality_error', 3.636), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.636), ('loss_caption_0', 3.09), ('loss_caption', 3.083), ('total_loss', 15.188)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 64000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.185), ('loss_self_iou', 0.105), ('cardinality_error', 3.738), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.738), ('loss_caption_0', 3.09), ('loss_caption', 3.088), ('total_loss', 15.193)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 65000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.191), ('loss_self_iou', 0.094), ('cardinality_error', 3.735), ('loss_ce_0', 0.304), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.735), ('loss_caption_0', 3.087), ('loss_caption', 3.083), ('total_loss', 15.203)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 66000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.121), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.095), ('cardinality_error', 3.753), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.093), ('loss_caption', 3.093), ('total_loss', 15.235)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 67000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.189), ('loss_self_iou', 0.099), ('cardinality_error', 3.781), ('loss_ce_0', 0.299), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.781), ('loss_caption_0', 3.104), ('loss_caption', 3.095), ('total_loss', 15.24)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 68000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.702), ('loss_ce_0', 0.3), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.702), ('loss_caption_0', 3.092), ('loss_caption', 3.087), ('total_loss', 15.171)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 69000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.116), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.087), ('cardinality_error', 3.705), ('loss_ce_0', 0.303), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.087), ('loss_caption', 3.084), ('total_loss', 15.154)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 70000 (epoch 6),
loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.119), ('loss_bbox', 0.07), ('loss_giou', 0.188), ('loss_self_iou', 0.104), ('cardinality_error', 3.763), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.763), ('loss_caption_0', 3.137), ('loss_caption', 3.142), ('total_loss', 15.421)]),
time/iter = 0.201, bad_vid = 0.000
Validation results of iter 70063:
Bleu_1:0.17095715677415013
Bleu_2:0.0951967897773989
Bleu_3:0.05145074727592996
Bleu_4:0.026686223548170303
METEOR:0.09033289555302068
ROUGE_L:0.16939818741017104
CIDEr:0.33299543538258497
Recall:0.5001550726802355
Precision:0.5629321740898863
soda_c:0.05378783144134501
para_Bleu_1:0.44719474980697405
para_Bleu_2:0.2615784516531111
para_Bleu_3:0.15956746990786394
para_Bleu_4:0.09983770060804388
para_METEOR:0.15549284849496958
para_ROUGE_L:0.30852597622578265
para_CIDEr:0.18758102150887232
overall score of iter 70063: 0.4429115706118858
Save model at iter 70063 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 70063 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 71000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.115), ('loss_bbox', 0.067), ('loss_giou', 0.187), ('loss_self_iou', 0.091), ('cardinality_error', 3.724), ('loss_ce_0', 0.304), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.994), ('loss_caption', 2.994), ('total_loss', 14.812)]),
time/iter = 0.691, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 72000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.099), ('cardinality_error', 3.665), ('loss_ce_0', 0.296), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.995), ('loss_caption', 3.0), ('total_loss', 14.803)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 73000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.067), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.762), ('loss_ce_0', 0.302), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.762), ('loss_caption_0', 3.03), ('loss_caption', 3.034), ('total_loss', 14.924)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 74000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.181), ('loss_self_iou', 0.093), ('cardinality_error', 3.722), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.722), ('loss_caption_0', 3.061), ('loss_caption', 3.062), ('total_loss', 15.037)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 75000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.835), ('loss_caption_0', 3.102), ('loss_caption', 3.108), ('total_loss', 15.261)]),
time/iter = 0.195, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 76000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.19), ('loss_self_iou', 0.096), ('cardinality_error', 3.787), ('loss_ce_0', 0.305), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.055), ('loss_caption', 3.056), ('total_loss', 15.081)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 77000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.191), ('loss_self_iou', 0.101), ('cardinality_error', 3.753), ('loss_ce_0', 0.3), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.064), ('loss_caption', 3.063), ('total_loss', 15.105)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 78000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.812), ('loss_ce_0', 0.302), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.812), ('loss_caption_0', 3.075), ('loss_caption', 3.081), ('total_loss', 15.186)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 79000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.119), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.712), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.712), ('loss_caption_0', 3.004), ('loss_caption', 3.004), ('total_loss', 14.833)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 80000 (epoch 7),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.639), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.639), ('loss_caption_0', 3.011), ('loss_caption', 3.021), ('total_loss', 14.846)]),
time/iter = 0.189, bad_vid = 0.000
Validation results of iter 80072:
Bleu_1:0.16525493799366836
Bleu_2:0.09017429361474327
Bleu_3:0.04843073565357156
Bleu_4:0.025752141227780294
METEOR:0.09042668571725655
ROUGE_L:0.1657835735936403
CIDEr:0.30766696683798356
Recall:0.5070758476264831
Precision:0.5698723815334497
soda_c:0.05193286444599829
para_Bleu_1:0.4299765573510605
para_Bleu_2:0.24998607326423264
para_Bleu_3:0.15168978606887273
para_Bleu_4:0.09540463753102806
para_METEOR:0.15913054274631774
para_ROUGE_L:0.30821511076520103
para_CIDEr:0.14655297481419807
overall score of iter 80072: 0.4010881550915439
Save model at iter 80072 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 81000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.098), ('cardinality_error', 3.664), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.972), ('loss_caption', 2.974), ('total_loss', 14.63)]),
time/iter = 0.723, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 82000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.098), ('cardinality_error', 3.692), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.914), ('loss_caption', 2.912), ('total_loss', 14.413)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 83000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.939), ('loss_caption', 2.933), ('total_loss', 14.562)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 84000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.119), ('loss_bbox', 0.066), ('loss_giou', 0.18), ('loss_self_iou', 0.086), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.086), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.964), ('loss_caption', 2.963), ('total_loss', 14.614)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 85000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.187), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.301), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 2.942), ('loss_caption', 2.945), ('total_loss', 14.596)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 86000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.067), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.989), ('loss_caption', 2.988), ('total_loss', 14.745)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 87000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.119), ('loss_bbox', 0.067), ('loss_giou', 0.178), ('loss_self_iou', 0.096), ('cardinality_error', 3.692), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.93), ('loss_caption', 2.931), ('total_loss', 14.465)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 88000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.102), ('cardinality_error', 3.74), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.945), ('loss_caption', 2.939), ('total_loss', 14.538)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 89000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.096), ('cardinality_error', 3.911), ('loss_ce_0', 0.303), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.911), ('loss_caption_0', 2.981), ('loss_caption', 2.985), ('total_loss', 14.762)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 90000 (epoch 8),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.113), ('loss_bbox', 0.066), ('loss_giou', 0.174), ('loss_self_iou', 0.099), ('cardinality_error', 3.667), ('loss_ce_0', 0.3), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.946), ('loss_caption', 2.945), ('total_loss', 14.493)]),
time/iter = 0.191, bad_vid = 0.000
Validation results of iter 90081:
Bleu_1:0.1659435247550983
Bleu_2:0.09010888064116455
Bleu_3:0.04740925434645997
Bleu_4:0.023810200153797586
METEOR:0.0893691583245007
ROUGE_L:0.16481267120708817
CIDEr:0.3096929324572276
Recall:0.5271698247293078
Precision:0.5766981899532185
soda_c:0.05637593299631936
para_Bleu_1:0.4507795558374508
para_Bleu_2:0.2668765313566654
para_Bleu_3:0.16324000259413463
para_Bleu_4:0.10292908422008885
para_METEOR:0.163503434468027
para_ROUGE_L:0.3141109355407807
para_CIDEr:0.1830754815850521
overall score of iter 90081: 0.44950800027316795
Save model at iter 90081 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 90081 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 91000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.066), ('loss_giou', 0.179), ('loss_self_iou', 0.097), ('cardinality_error', 3.807), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.807), ('loss_caption_0', 2.916), ('loss_caption', 2.914), ('total_loss', 14.411)]),
time/iter = 0.724, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 92000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.121), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.093), ('cardinality_error', 3.784), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.784), ('loss_caption_0', 2.916), ('loss_caption', 2.915), ('total_loss', 14.422)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 93000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.117), ('loss_bbox', 0.065), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.806), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.806), ('loss_caption_0', 2.9), ('loss_caption', 2.905), ('total_loss', 14.377)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 94000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.109), ('loss_bbox', 0.068), ('loss_giou', 0.174), ('loss_self_iou', 0.105), ('cardinality_error', 3.616), ('loss_ce_0', 0.293), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.106), ('cardinality_error_0', 3.616), ('loss_caption_0', 2.912), ('loss_caption', 2.914), ('total_loss', 14.339)]),
time/iter = 0.187, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 95000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.12), ('loss_bbox', 0.066), ('loss_giou', 0.185), ('loss_self_iou', 0.093), ('cardinality_error', 3.805), ('loss_ce_0', 0.296), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.805), ('loss_caption_0', 2.938), ('loss_caption', 2.941), ('total_loss', 14.546)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 96000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.177), ('loss_self_iou', 0.103), ('cardinality_error', 3.684), ('loss_ce_0', 0.293), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.684), ('loss_caption_0', 2.928), ('loss_caption', 2.931), ('total_loss', 14.434)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 97000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.111), ('loss_bbox', 0.066), ('loss_giou', 0.184), ('loss_self_iou', 0.095), ('cardinality_error', 3.693), ('loss_ce_0', 0.298), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.693), ('loss_caption_0', 2.902), ('loss_caption', 2.903), ('total_loss', 14.392)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 98000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.115), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.089), ('cardinality_error', 3.738), ('loss_ce_0', 0.298), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.738), ('loss_caption_0', 2.896), ('loss_caption', 2.902), ('total_loss', 14.361)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 99000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.115), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.095), ('cardinality_error', 3.702), ('loss_ce_0', 0.296), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.702), ('loss_caption_0', 2.956), ('loss_caption', 2.956), ('total_loss', 14.525)]),
time/iter = 0.195, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 100000 (epoch 9),
loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.092), ('cardinality_error', 3.751), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.751), ('loss_caption_0', 2.932), ('loss_caption', 2.932), ('total_loss', 14.453)]),
time/iter = 0.191, bad_vid = 0.000
Validation results of iter 100090:
Bleu_1:0.16664911544364056
Bleu_2:0.09023295213839283
Bleu_3:0.04763940550902772
Bleu_4:0.02409205514859969
METEOR:0.0878588871148787
ROUGE_L:0.16401896184386325
CIDEr:0.31947446694949533
Recall:0.5282742157284517
Precision:0.5750796556165633
soda_c:0.05745241491068406
para_Bleu_1:0.46204429574393835
para_Bleu_2:0.2749900961045832
para_Bleu_3:0.1683879565471281
para_Bleu_4:0.10624339593597942
para_METEOR:0.16245439213508253
para_ROUGE_L:0.3162965936511474
para_CIDEr:0.20803178964320856
overall score of iter 100090: 0.4767295777142705
Save model at iter 100090 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 100090 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 101000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.849), ('loss_caption', 2.847), ('total_loss', 14.064)]),
time/iter = 0.713, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 102000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.116), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.093), ('cardinality_error', 3.695), ('loss_ce_0', 0.293), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.85), ('loss_caption', 2.848), ('total_loss', 14.087)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 103000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.115), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.724), ('loss_ce_0', 0.293), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.846), ('loss_caption', 2.854), ('total_loss', 14.092)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 104000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.097), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.916), ('loss_caption', 2.913), ('total_loss', 14.362)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 105000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.907), ('loss_caption', 2.902), ('total_loss', 14.342)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 106000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.11), ('cardinality_error', 3.775), ('loss_ce_0', 0.293), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.876), ('loss_caption', 2.875), ('total_loss', 14.264)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 107000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.178), ('loss_self_iou', 0.099), ('cardinality_error', 3.743), ('loss_ce_0', 0.291), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.91), ('loss_caption', 2.909), ('total_loss', 14.358)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 108000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.118), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.1), ('cardinality_error', 3.81), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.81), ('loss_caption_0', 2.928), ('loss_caption', 2.93), ('total_loss', 14.446)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 109000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.118), ('loss_bbox', 0.063), ('loss_giou', 0.178), ('loss_self_iou', 0.091), ('cardinality_error', 3.78), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.78), ('loss_caption_0', 2.916), ('loss_caption', 2.912), ('total_loss', 14.396)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 110000 (epoch 10),
loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.087), ('cardinality_error', 3.72), ('loss_ce_0', 0.297), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.948), ('loss_caption', 2.948), ('total_loss', 14.539)]),
time/iter = 0.196, bad_vid = 0.000
Validation results of iter 110099:
Bleu_1:0.1671778590456048
Bleu_2:0.09077014613023152
Bleu_3:0.0476684747303012
Bleu_4:0.02445564298599047
METEOR:0.08933235383587503
ROUGE_L:0.1654660162888944
CIDEr:0.31886265111118334
Recall:0.5314017615268335
Precision:0.5831469052945512
soda_c:0.05853263249839839
para_Bleu_1:0.46544090189732323
para_Bleu_2:0.2789325258737778
para_Bleu_3:0.17172911957785325
para_Bleu_4:0.10903514181091935
para_METEOR:0.16550159188298816
para_ROUGE_L:0.3181118223429575
para_CIDEr:0.2056618808195008
overall score of iter 110099: 0.4801986145134083
Save model at iter 110099 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 110099 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 111000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.095), ('cardinality_error', 3.718), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.718), ('loss_caption_0', 2.867), ('loss_caption', 2.869), ('total_loss', 14.14)]),
time/iter = 0.727, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 112000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.725), ('loss_ce_0', 0.289), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.844), ('loss_caption', 2.842), ('total_loss', 14.015)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 113000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.734), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.734), ('loss_caption_0', 2.837), ('loss_caption', 2.834), ('total_loss', 13.981)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 114000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.739), ('loss_ce_0', 0.285), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.739), ('loss_caption_0', 2.855), ('loss_caption', 2.857), ('total_loss', 14.084)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 115000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.175), ('loss_self_iou', 0.092), ('cardinality_error', 3.74), ('loss_ce_0', 0.284), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.823), ('loss_caption', 2.824), ('total_loss', 13.959)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 116000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.177), ('loss_self_iou', 0.088), ('cardinality_error', 3.753), ('loss_ce_0', 0.288), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.846), ('loss_caption', 2.843), ('total_loss', 14.073)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 117000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.755), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.804), ('loss_caption', 2.81), ('total_loss', 13.896)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 118000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.093), ('cardinality_error', 3.715), ('loss_ce_0', 0.285), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.715), ('loss_caption_0', 2.863), ('loss_caption', 2.866), ('total_loss', 14.129)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 119000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.064), ('loss_giou', 0.176), ('loss_self_iou', 0.098), ('cardinality_error', 3.735), ('loss_ce_0', 0.287), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.735), ('loss_caption_0', 2.844), ('loss_caption', 2.843), ('total_loss', 14.061)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 120000 (epoch 11),
loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.175), ('loss_self_iou', 0.101), ('cardinality_error', 3.755), ('loss_ce_0', 0.285), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.868), ('loss_caption', 2.878), ('total_loss', 14.168)]),
time/iter = 0.190, bad_vid = 0.000
Validation results of iter 120108:
Bleu_1:0.16560019346009094
Bleu_2:0.08934946581658681
Bleu_3:0.04692472826903507
Bleu_4:0.023331060597699706
METEOR:0.08861943572471001
ROUGE_L:0.16392659155605854
CIDEr:0.31177527957257306
Recall:0.5248955646301546
Precision:0.5713061826316813
soda_c:0.056694173808073595
para_Bleu_1:0.45551540477127933
para_Bleu_2:0.2725270289009415
para_Bleu_3:0.16731081427102573
para_Bleu_4:0.10555679460767188
para_METEOR:0.1665724805603667
para_ROUGE_L:0.31619749898051375
para_CIDEr:0.19719071969736374
overall score of iter 120108: 0.4693199948654023
Save model at iter 120108 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 121000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.095), ('cardinality_error', 3.691), ('loss_ce_0', 0.284), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.809), ('loss_caption', 2.808), ('total_loss', 13.835)]),
time/iter = 0.727, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 122000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.17), ('loss_self_iou', 0.093), ('cardinality_error', 3.706), ('loss_ce_0', 0.281), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.706), ('loss_caption_0', 2.811), ('loss_caption', 2.814), ('total_loss', 13.867)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 123000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.691), ('loss_ce_0', 0.281), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.789), ('loss_caption', 2.797), ('total_loss', 13.808)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 124000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.063), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.76), ('loss_ce_0', 0.281), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.76), ('loss_caption_0', 2.839), ('loss_caption', 2.842), ('total_loss', 13.984)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 125000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.763), ('loss_ce_0', 0.282), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.763), ('loss_caption_0', 2.81), ('loss_caption', 2.815), ('total_loss', 13.898)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 126000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.095), ('cardinality_error', 3.717), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.717), ('loss_caption_0', 2.789), ('loss_caption', 2.787), ('total_loss', 13.835)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 127000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.867), ('loss_caption', 2.871), ('total_loss', 14.097)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 128000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.113), ('loss_bbox', 0.063), ('loss_giou', 0.173), ('loss_self_iou', 0.092), ('cardinality_error', 3.793), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.793), ('loss_caption_0', 2.868), ('loss_caption', 2.863), ('total_loss', 14.111)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 129000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.1), ('cardinality_error', 3.686), ('loss_ce_0', 0.283), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.686), ('loss_caption_0', 2.812), ('loss_caption', 2.813), ('total_loss', 13.903)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 130000 (epoch 12),
loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.772), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.772), ('loss_caption_0', 2.86), ('loss_caption', 2.861), ('total_loss', 14.105)]),
time/iter = 0.190, bad_vid = 0.000
Validation results of iter 130117:
Bleu_1:0.16778675341331784
Bleu_2:0.09082555766488616
Bleu_3:0.047445681271689716
Bleu_4:0.02375280793420285
METEOR:0.08883520478698428
ROUGE_L:0.16531435721130755
CIDEr:0.31778343902267087
Recall:0.5273619026669621
Precision:0.5698181479221706
soda_c:0.05753856798988932
para_Bleu_1:0.4610381779339771
para_Bleu_2:0.2761144617772928
para_Bleu_3:0.16915034097081671
para_Bleu_4:0.10654029953240575
para_METEOR:0.16638305166981465
para_ROUGE_L:0.31710573495570465
para_CIDEr:0.19601570682645908
overall score of iter 130117: 0.46893905802867947
Save model at iter 130117 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 131000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.062), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.75), ('loss_ce_0', 0.279), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.75), ('loss_caption_0', 2.817), ('loss_caption', 2.826), ('total_loss', 13.897)]),
time/iter = 0.734, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 132000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.109), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.089), ('cardinality_error', 3.814), ('loss_ce_0', 0.274), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.814), ('loss_caption_0', 2.778), ('loss_caption', 2.776), ('total_loss', 13.726)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 133000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.095), ('cardinality_error', 3.773), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.773), ('loss_caption_0', 2.843), ('loss_caption', 2.843), ('total_loss', 13.999)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 134000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.108), ('loss_bbox', 0.065), ('loss_giou', 0.171), ('loss_self_iou', 0.101), ('cardinality_error', 3.743), ('loss_ce_0', 0.276), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.786), ('loss_caption', 2.787), ('total_loss', 13.756)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 135000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.794), ('loss_ce_0', 0.281), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.785), ('loss_caption', 2.784), ('total_loss', 13.759)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 136000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.065), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.653), ('loss_ce_0', 0.279), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.828), ('loss_caption', 2.834), ('total_loss', 13.919)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 137000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.105), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.099), ('cardinality_error', 3.654), ('loss_ce_0', 0.281), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.654), ('loss_caption_0', 2.79), ('loss_caption', 2.799), ('total_loss', 13.806)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 138000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.171), ('loss_self_iou', 0.095), ('cardinality_error', 3.714), ('loss_ce_0', 0.28), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.835), ('loss_caption', 2.828), ('total_loss', 13.945)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 139000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.098), ('cardinality_error', 3.813), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.83), ('loss_caption', 2.828), ('total_loss', 13.924)]),
time/iter = 0.186, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 140000 (epoch 13),
loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.09), ('cardinality_error', 3.664), ('loss_ce_0', 0.28), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.821), ('loss_caption', 2.823), ('total_loss', 13.905)]),
time/iter = 0.191, bad_vid = 0.000
Validation results of iter 140126:
Bleu_1:0.16683698969676453
Bleu_2:0.09036855967772307
Bleu_3:0.047484441130632896
Bleu_4:0.023876859658376735
METEOR:0.08814626862844692
ROUGE_L:0.16473003568483396
CIDEr:0.3189568758512915
Recall:0.5281546209817979
Precision:0.5704333604501349
soda_c:0.057417105431783064
para_Bleu_1:0.4580706340663244
para_Bleu_2:0.27372623489326064
para_Bleu_3:0.16745128920972313
para_Bleu_4:0.10550306643408856
para_METEOR:0.16656454278617736
para_ROUGE_L:0.31631873012989425
para_CIDEr:0.19724321819057877
overall score of iter 140126: 0.46931082741084473
Save model at iter 140126 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 141000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.171), ('loss_self_iou', 0.106), ('cardinality_error', 3.774), ('loss_ce_0', 0.27), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.75), ('loss_caption', 2.748), ('total_loss', 13.572)]),
time/iter = 0.739, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 142000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.173), ('loss_self_iou', 0.091), ('cardinality_error', 3.797), ('loss_ce_0', 0.272), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.797), ('loss_caption_0', 2.72), ('loss_caption', 2.722), ('total_loss', 13.492)]),
time/iter = 0.186, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 143000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.162), ('loss_self_iou', 0.095), ('cardinality_error', 3.637), ('loss_ce_0', 0.268), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.637), ('loss_caption_0', 2.782), ('loss_caption', 2.782), ('total_loss', 13.626)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 144000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.112), ('loss_bbox', 0.062), ('loss_giou', 0.172), ('loss_self_iou', 0.094), ('cardinality_error', 3.831), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.831), ('loss_caption_0', 2.793), ('loss_caption', 2.79), ('total_loss', 13.773)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 145000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.101), ('loss_bbox', 0.061), ('loss_giou', 0.16), ('loss_self_iou', 0.093), ('cardinality_error', 3.665), ('loss_ce_0', 0.273), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.168), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.762), ('loss_caption', 2.767), ('total_loss', 13.554)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 146000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.725), ('loss_ce_0', 0.276), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.813), ('loss_caption', 2.813), ('total_loss', 13.811)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 147000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.097), ('cardinality_error', 3.714), ('loss_ce_0', 0.273), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.747), ('loss_caption', 2.745), ('total_loss', 13.578)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 148000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.728), ('loss_ce_0', 0.274), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.728), ('loss_caption_0', 2.843), ('loss_caption', 2.84), ('total_loss', 13.944)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 149000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.799), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.836), ('loss_caption', 2.836), ('total_loss', 13.926)]),
time/iter = 0.196, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 150000 (epoch 14),
loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.087), ('cardinality_error', 3.703), ('loss_ce_0', 0.272), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.703), ('loss_caption_0', 2.806), ('loss_caption', 2.806), ('total_loss', 13.795)]),
time/iter = 0.193, bad_vid = 0.000
Validation results of iter 150135:
Bleu_1:0.16662144072598145
Bleu_2:0.08988753231411394
Bleu_3:0.04690847145308288
Bleu_4:0.023224274927987735
METEOR:0.08725158341768323
ROUGE_L:0.16364893754496343
CIDEr:0.32028824475030926
Recall:0.5260420675803493
Precision:0.5630584367161506
soda_c:0.057565785652999135
para_Bleu_1:0.46764194087144684
para_Bleu_2:0.2801629240374498
para_Bleu_3:0.1713033186995987
para_Bleu_4:0.10750827268624512
para_METEOR:0.16742715934059368
para_ROUGE_L:0.31858424377772926
para_CIDEr:0.2089956210595351
overall score of iter 150135: 0.4839310530863739
Save model at iter 150135 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 150135 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 151000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.101), ('loss_bbox', 0.063), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.645), ('loss_ce_0', 0.266), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.645), ('loss_caption_0', 2.762), ('loss_caption', 2.759), ('total_loss', 13.537)]),
time/iter = 0.737, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 152000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.087), ('cardinality_error', 3.722), ('loss_ce_0', 0.269), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.762), ('loss_caption', 2.766), ('total_loss', 13.59)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 153000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.111), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.083), ('cardinality_error', 3.813), ('loss_ce_0', 0.267), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.085), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.777), ('loss_caption', 2.778), ('total_loss', 13.663)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 154000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.106), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.769), ('loss_ce_0', 0.272), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.787), ('loss_caption', 2.787), ('total_loss', 13.717)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 155000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.09), ('cardinality_error', 3.714), ('loss_ce_0', 0.267), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.758), ('loss_caption', 2.76), ('total_loss', 13.593)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 156000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.167), ('loss_self_iou', 0.102), ('cardinality_error', 3.675), ('loss_ce_0', 0.269), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.675), ('loss_caption_0', 2.741), ('loss_caption', 2.742), ('total_loss', 13.504)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 157000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.267), ('loss_counter', 0.104), ('loss_bbox', 0.065), ('loss_giou', 0.167), ('loss_self_iou', 0.103), ('cardinality_error', 3.722), ('loss_ce_0', 0.268), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.777), ('loss_caption', 2.783), ('total_loss', 13.668)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 158000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.099), ('cardinality_error', 3.758), ('loss_ce_0', 0.27), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.758), ('loss_caption_0', 2.815), ('loss_caption', 2.817), ('total_loss', 13.789)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 159000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.729), ('loss_ce_0', 0.275), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.783), ('loss_caption', 2.785), ('total_loss', 13.721)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 160000 (epoch 15),
loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.109), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.098), ('cardinality_error', 3.816), ('loss_ce_0', 0.271), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.78), ('loss_caption', 2.784), ('total_loss', 13.686)]),
time/iter = 0.196, bad_vid = 0.000
Validation results of iter 160144:
Bleu_1:0.16754398447821903
Bleu_2:0.08978801866243748
Bleu_3:0.046077601805781236
Bleu_4:0.02215727819941335
METEOR:0.08650894641812401
ROUGE_L:0.16425299709373153
CIDEr:0.3192637628790779
Recall:0.5308598805776927
Precision:0.5705477594739302
soda_c:0.059035206979637336
para_Bleu_1:0.4722129873397206
para_Bleu_2:0.2843271953295457
para_Bleu_3:0.17433620623201318
para_Bleu_4:0.10943737200004257
para_METEOR:0.16524483023272712
para_ROUGE_L:0.3180351825656492
para_CIDEr:0.2139382514781602
overall score of iter 160144: 0.4886204537109299
Save model at iter 160144 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 160144 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 161000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.103), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.695), ('loss_ce_0', 0.263), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.766), ('loss_caption', 2.768), ('total_loss', 13.553)]),
time/iter = 0.749, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 162000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.694), ('loss_ce_0', 0.266), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.768), ('loss_caption', 2.764), ('total_loss', 13.573)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 163000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.173), ('loss_self_iou', 0.097), ('cardinality_error', 3.769), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.765), ('loss_caption', 2.766), ('total_loss', 13.63)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 164000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.11), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.774), ('loss_ce_0', 0.269), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.772), ('loss_caption', 2.776), ('total_loss', 13.625)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 165000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.102), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.699), ('loss_ce_0', 0.267), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.711), ('loss_caption', 2.716), ('total_loss', 13.368)]),
time/iter = 0.187, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 166000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.105), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.72), ('loss_ce_0', 0.268), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.754), ('loss_caption', 2.755), ('total_loss', 13.534)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 167000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.095), ('cardinality_error', 3.712), ('loss_ce_0', 0.266), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.712), ('loss_caption_0', 2.771), ('loss_caption', 2.772), ('total_loss', 13.617)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 168000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.09), ('cardinality_error', 3.816), ('loss_ce_0', 0.269), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.814), ('loss_caption', 2.82), ('total_loss', 13.826)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 169000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.106), ('cardinality_error', 3.697), ('loss_ce_0', 0.261), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.697), ('loss_caption_0', 2.769), ('loss_caption', 2.775), ('total_loss', 13.598)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 170000 (epoch 16),
loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.105), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.093), ('cardinality_error', 3.799), ('loss_ce_0', 0.272), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.794), ('loss_caption', 2.798), ('total_loss', 13.727)]),
time/iter = 0.191, bad_vid = 0.000
Validation results of iter 170153:
Bleu_1:0.16584280243722227
Bleu_2:0.08889969905794425
Bleu_3:0.04569298286173284
Bleu_4:0.021992960199339176
METEOR:0.08570833880397384
ROUGE_L:0.16234979503724006
CIDEr:0.3170462149966731
Recall:0.5273397281824633
Precision:0.5648989898989865
soda_c:0.058539462474976364
para_Bleu_1:0.4735378044184376
para_Bleu_2:0.2855599966961999
para_Bleu_3:0.17485842077678387
para_Bleu_4:0.10998333079246524
para_METEOR:0.16580782598840993
para_ROUGE_L:0.3184105968751349
para_CIDEr:0.2144083270960459
overall score of iter 170153: 0.4901994838769211
Save model at iter 170153 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 170153 to checkpoint file.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 171000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.161), ('loss_self_iou', 0.094), ('cardinality_error', 3.694), ('loss_ce_0', 0.261), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.772), ('loss_caption', 2.77), ('total_loss', 13.544)]),
time/iter = 0.745, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 172000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.096), ('cardinality_error', 3.667), ('loss_ce_0', 0.262), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.741), ('loss_caption', 2.743), ('total_loss', 13.47)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 173000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.104), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.09), ('cardinality_error', 3.753), ('loss_ce_0', 0.261), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.786), ('loss_caption', 2.785), ('total_loss', 13.646)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 174000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.107), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.094), ('cardinality_error', 3.832), ('loss_ce_0', 0.261), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.832), ('loss_caption_0', 2.733), ('loss_caption', 2.738), ('total_loss', 13.457)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 175000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.163), ('loss_self_iou', 0.098), ('cardinality_error', 3.731), ('loss_ce_0', 0.259), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.062), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.731), ('loss_caption_0', 2.745), ('loss_caption', 2.744), ('total_loss', 13.454)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 176000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.095), ('cardinality_error', 3.795), ('loss_ce_0', 0.264), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.795), ('loss_caption_0', 2.761), ('loss_caption', 2.77), ('total_loss', 13.575)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 177000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.161), ('loss_self_iou', 0.096), ('cardinality_error', 3.652), ('loss_ce_0', 0.261), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.745), ('total_loss', 13.43)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 178000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.103), ('cardinality_error', 3.664), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.682), ('loss_caption', 2.68), ('total_loss', 13.211)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 179000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.105), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.09), ('cardinality_error', 3.825), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.825), ('loss_caption_0', 2.788), ('loss_caption', 2.796), ('total_loss', 13.671)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 180000 (epoch 17),
loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.781), ('loss_caption', 2.775), ('total_loss', 13.608)]),
time/iter = 0.192, bad_vid = 0.000
Validation results of iter 180162:
Bleu_1:0.16720622564646215
Bleu_2:0.08946643461131876
Bleu_3:0.04568137095423273
Bleu_4:0.022039722503534608
METEOR:0.08588931176535387
ROUGE_L:0.16315869782389542
CIDEr:0.32099741016990446
Recall:0.5265047853249455
Precision:0.5647345942647923
soda_c:0.05847424883094643
para_Bleu_1:0.47508155945278135
para_Bleu_2:0.2858233856765029
para_Bleu_3:0.17499503512152859
para_Bleu_4:0.11002968407978216
para_METEOR:0.16541373751181562
para_ROUGE_L:0.3190110890037882
para_CIDEr:0.21421557986951392
overall score of iter 180162: 0.4896590014611117
Save model at iter 180162 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 181000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.781), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.781), ('loss_caption_0', 2.743), ('loss_caption', 2.746), ('total_loss', 13.452)]),
time/iter = 0.750, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 182000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.1), ('cardinality_error', 3.726), ('loss_ce_0', 0.26), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.748), ('loss_caption', 2.746), ('total_loss', 13.472)]),
time/iter = 0.189, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 183000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.722), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.729), ('loss_caption', 2.734), ('total_loss', 13.405)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 184000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.726), ('loss_ce_0', 0.257), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.783), ('loss_caption', 2.787), ('total_loss', 13.591)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 185000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.098), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.087), ('cardinality_error', 3.667), ('loss_ce_0', 0.26), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.718), ('loss_caption', 2.716), ('total_loss', 13.354)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 186000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.776), ('loss_ce_0', 0.259), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.776), ('loss_caption_0', 2.75), ('loss_caption', 2.75), ('total_loss', 13.494)]),
time/iter = 0.194, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 187000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.803), ('loss_ce_0', 0.264), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.803), ('loss_caption_0', 2.788), ('loss_caption', 2.791), ('total_loss', 13.678)]),
time/iter = 0.198, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 188000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.1), ('loss_bbox', 0.062), ('loss_giou', 0.163), ('loss_self_iou', 0.091), ('cardinality_error', 3.71), ('loss_ce_0', 0.259), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.71), ('loss_caption_0', 2.745), ('loss_caption', 2.743), ('total_loss', 13.444)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 189000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.165), ('loss_self_iou', 0.1), ('cardinality_error', 3.748), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.748), ('loss_caption_0', 2.751), ('loss_caption', 2.753), ('total_loss', 13.484)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 190000 (epoch 18),
loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.104), ('loss_bbox', 0.06), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 2.729), ('loss_caption', 2.73), ('total_loss', 13.395)]),
time/iter = 0.189, bad_vid = 0.000
Validation results of iter 190171:
Bleu_1:0.1662475028889873
Bleu_2:0.08895418147726737
Bleu_3:0.04559170272578064
Bleu_4:0.021869443641790748
METEOR:0.0853620749347768
ROUGE_L:0.16226693807975517
CIDEr:0.3203697867996399
Recall:0.5243080966273422
Precision:0.5592002237136435
soda_c:0.058066485957305666
para_Bleu_1:0.47302383939773723
para_Bleu_2:0.2848420020452884
para_Bleu_3:0.17477626094199183
para_Bleu_4:0.11005159892431456
para_METEOR:0.16474042555391544
para_ROUGE_L:0.31754161420686944
para_CIDEr:0.2082818020277855
overall score of iter 190171: 0.4830738265060155
Save model at iter 190171 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save info to info.json
ID seq2-ft(mix)-gt_percent-1.0 iter 191000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.086), ('cardinality_error', 3.653), ('loss_ce_0', 0.257), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.754), ('loss_caption', 2.752), ('total_loss', 13.501)]),
time/iter = 0.755, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 192000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.1), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.094), ('cardinality_error', 3.767), ('loss_ce_0', 0.258), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.767), ('loss_caption_0', 2.717), ('loss_caption', 2.72), ('total_loss', 13.343)]),
time/iter = 0.188, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 193000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.106), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.093), ('cardinality_error', 3.847), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.847), ('loss_caption_0', 2.754), ('loss_caption', 2.759), ('total_loss', 13.499)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 194000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.165), ('loss_self_iou', 0.097), ('cardinality_error', 3.775), ('loss_ce_0', 0.262), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.769), ('loss_caption', 2.772), ('total_loss', 13.587)]),
time/iter = 0.192, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 195000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.794), ('loss_ce_0', 0.261), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.751), ('loss_caption', 2.751), ('total_loss', 13.506)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 196000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.095), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.1), ('cardinality_error', 3.652), ('loss_ce_0', 0.258), ('loss_counter_0', 0.095), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.735), ('total_loss', 13.403)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 197000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.091), ('cardinality_error', 3.759), ('loss_ce_0', 0.258), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.759), ('loss_caption_0', 2.74), ('loss_caption', 2.743), ('total_loss', 13.418)]),
time/iter = 0.191, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 198000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.098), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.092), ('cardinality_error', 3.664), ('loss_ce_0', 0.255), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.718), ('loss_caption', 2.72), ('total_loss', 13.31)]),
time/iter = 0.190, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 199000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.101), ('cardinality_error', 3.736), ('loss_ce_0', 0.257), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.759), ('loss_caption', 2.76), ('total_loss', 13.502)]),
time/iter = 0.193, bad_vid = 0.000
ID seq2-ft(mix)-gt_percent-1.0 iter 200000 (epoch 19),
loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.159), ('loss_self_iou', 0.098), ('cardinality_error', 3.701), ('loss_ce_0', 0.259), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.701), ('loss_caption_0', 2.766), ('loss_caption', 2.771), ('total_loss', 13.518)]),
time/iter = 0.190, bad_vid = 0.000
Validation results of iter 200180:
Bleu_1:0.16600244771432068
Bleu_2:0.08859363359362551
Bleu_3:0.045174799285766926
Bleu_4:0.021453706973694267
METEOR:0.08469975853590762
ROUGE_L:0.1615333099598977
CIDEr:0.3178372173219055
Recall:0.5270524681293403
Precision:0.5612365263371945
soda_c:0.05852570981425518
para_Bleu_1:0.47641872729084495
para_Bleu_2:0.28679556025023933
para_Bleu_3:0.1757988669447671
para_Bleu_4:0.11061748158923715
para_METEOR:0.1647238014039032
para_ROUGE_L:0.3182336912910021
para_CIDEr:0.21852415031403352
overall score of iter 200180: 0.4938654333071738
Save model at iter 200180 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth.
Save Best-model at iter 200180 to checkpoint file.
Save info to info.json
Best epoch: 10
Best Model Performance:
Bleu_1:0.1671778590456048
Bleu_2:0.09077014613023152
Bleu_3:0.0476684747303012
Bleu_4:0.02445564298599047
METEOR:0.08933235383587503
ROUGE_L:0.1654660162888944
CIDEr:0.31886265111118334
Recall:0.5314017615268335
Precision:0.5831469052945512
soda_c:0.05853263249839839
para_Bleu_1:0.46544090189732323
para_Bleu_2:0.2789325258737778
para_Bleu_3:0.17172911957785325
para_Bleu_4:0.10903514181091935
para_METEOR:0.16550159188298816
para_ROUGE_L:0.3181118223429575
para_CIDEr:0.2056618808195008
avg_proposal_number:-1
Best Overall Score epoch10: 1.5812763042668414