diff --git "a/eval.log" "b/eval.log" new file mode 100644--- /dev/null +++ "b/eval.log" @@ -0,0 +1,3064 @@ +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +{'ego4d/narration_val_L4096_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}} +Evaluation datasets: +* ego4d/narration_val | num samples: 223 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +ER@0.50: 0.722 (S=197, C=2682, M=5712, R=295) +ER@0.55: 0.742 (S=370, C=2509, M=5712, R=295) +ER@0.60: 0.769 (S=600, C=2279, M=5712, R=295) +ER@0.65: 0.800 (S=864, C=2015, M=5712, R=295) +ER@0.70: 0.836 (S=1178, C=1701, M=5712, R=295) +ER@0.75: 0.877 (S=1530, C=1349, M=5712, R=295) +ER@0.80: 0.920 (S=1897, C=982, M=5712, R=295) +ER@0.85: 0.962 (S=2256, C=623, M=5712, R=295) +ER@0.90: 0.993 (S=2521, C=358, M=5712, R=295) +ER@0.95: 1.012 (S=2683, C=196, M=5712, R=295) +ER@1.00: 1.025 (S=2796, C=83, M=5712, R=295) +Evalulation: ego4d-narration_val_L4096_I10/stream/notalk0.2-maxlen_4k +Metrics: +missing_rate: 0.6649 +redundant_rate: 0.0929 +match_cost: 0.3135 +semantic_score: 0.7285 +mean_error_rate: 0.8780 +mean_error_rate_v2: 0.8488 +jaccard_index: 0.3240 +jaccard_index_v2: 0.1512 +AP: 0.4232 +AR: 0.1564 +Avg-F1: 0.2284 +num_matched: 2879.0000 +num_missed: 5712.0000 +num_redundant: 295.0000 +num_correct_5: 2682.0000 +Bleu_1: 0.4702 +Bleu_1_w: 0.1523 +Bleu_2: 0.3149 +Bleu_2_w: 0.1020 +Bleu_3: 0.2145 +Bleu_3_w: 0.0695 +Bleu_4: 0.1494 +Bleu_4_w: 0.0484 +CIDEr: 1.3650 +CIDEr_w: 0.4422 +METEOR: 0.2271 +METEOR_w: 0.0736 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +ER@0.50: 0.601 (S=508, C=5684, M=2399, R=2257) +ER@0.55: 0.650 (S=926, C=5266, M=2399, R=2257) +ER@0.60: 0.714 (S=1482, C=4710, M=2399, R=2257) +ER@0.65: 0.782 (S=2058, C=4134, M=2399, R=2257) +ER@0.70: 0.857 (S=2708, C=3484, M=2399, R=2257) +ER@0.75: 0.944 (S=3457, C=2735, M=2399, R=2257) +ER@0.80: 1.039 (S=4268, C=1924, M=2399, R=2257) +ER@0.85: 1.120 (S=4965, C=1227, M=2399, R=2257) +ER@0.90: 1.180 (S=5483, C=709, M=2399, R=2257) +ER@0.95: 1.218 (S=5805, C=387, M=2399, R=2257) +ER@1.00: 1.239 (S=5986, C=206, M=2399, R=2257) +Evalulation: ego4d-narration_val_L4096_I10/stream/notalk0.3-maxlen_4k +Metrics: +missing_rate: 0.2792 +redundant_rate: 0.2671 +match_cost: 0.3268 +semantic_score: 0.7172 +mean_error_rate: 0.9403 +mean_error_rate_v2: 0.7447 +jaccard_index: 0.5708 +jaccard_index_v2: 0.2553 +AP: 0.3278 +AR: 0.3224 +Avg-F1: 0.3251 +num_matched: 6192.0000 +num_missed: 2399.0000 +num_redundant: 2257.0000 +num_correct_5: 5684.0000 +Bleu_1: 0.4685 +Bleu_1_w: 0.2674 +Bleu_2: 0.3095 +Bleu_2_w: 0.1767 +Bleu_3: 0.2115 +Bleu_3_w: 0.1207 +Bleu_4: 0.1468 +Bleu_4_w: 0.0838 +CIDEr: 1.3725 +CIDEr_w: 0.7834 +METEOR: 0.2242 +METEOR_w: 0.1280 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +ER@0.50: 0.951 (S=657, C=7121, M=813, R=6702) +ER@0.55: 1.013 (S=1185, C=6593, M=813, R=6702) +ER@0.60: 1.091 (S=1855, C=5923, M=813, R=6702) +ER@0.65: 1.179 (S=2617, C=5161, M=813, R=6702) +ER@0.70: 1.274 (S=3432, C=4346, M=813, R=6702) +ER@0.75: 1.385 (S=4384, C=3394, M=813, R=6702) +ER@0.80: 1.506 (S=5424, C=2354, M=813, R=6702) +ER@0.85: 1.602 (S=6250, C=1528, M=813, R=6702) +ER@0.90: 1.672 (S=6852, C=926, M=813, R=6702) +ER@0.95: 1.724 (S=7293, C=485, M=813, R=6702) +ER@1.00: 1.747 (S=7490, C=288, M=813, R=6702) +Evalulation: ego4d-narration_val_L4096_I10/stream/notalk0.4-maxlen_4k +Metrics: +missing_rate: 0.0946 +redundant_rate: 0.4628 +match_cost: 0.3248 +semantic_score: 0.7160 +mean_error_rate: 1.3767 +mean_error_rate_v2: 0.7734 +jaccard_index: 0.5086 +jaccard_index_v2: 0.2266 +AP: 0.2393 +AR: 0.4034 +Avg-F1: 0.3004 +num_matched: 7778.0000 +num_missed: 813.0000 +num_redundant: 6702.0000 +num_correct_5: 7121.0000 +Bleu_1: 0.4657 +Bleu_1_w: 0.2368 +Bleu_2: 0.3072 +Bleu_2_w: 0.1563 +Bleu_3: 0.2103 +Bleu_3_w: 0.1069 +Bleu_4: 0.1460 +Bleu_4_w: 0.0743 +CIDEr: 1.3982 +CIDEr_w: 0.7111 +METEOR: 0.2256 +METEOR_w: 0.1147 + +All Finished! Time: 9.98 minutes +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +Runs: +ego4d/narration_val_L4096_I10|stream|4k|0.2|summarize_and_drop +ego4d/narration_val_L4096_I10|stream|4k|0.3|summarize_and_drop +ego4d/narration_val_L4096_I10|stream|4k|0.4|summarize_and_drop +Traceback (most recent call last): + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 121, in + main(eval_args, slurm_args) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 110, in main + job.results() # wait for the job to finish + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in results + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 266, in result + r = self.results() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 294, in results + raise job_exception # pylint: disable=raising-bad-type +submitit.core.utils.FailedJobError: Job (task=0) failed during processing with trace: +---------------------- +Traceback (most recent call last): + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/submission.py", line 55, in process_job + result = delayed.result() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/utils.py", line 133, in result + self._result = self.function(*self.args, **self.kwargs) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 52, in run_eval + datasets = build_eval_datasets(eval_datasets=data_name, **args_dict) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/data/build.py", line 137, in build_eval_datasets + dataset = build_dataset(dataset_name, **kwargs) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/data/build.py", line 63, in build_dataset + return BaseDataset( + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/data/dataset.py", line 50, in __init__ + self.data = hf_datasets.Dataset.from_json(self.ann_file) + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/datasets/arrow_dataset.py", line 1178, in from_json + return JsonDatasetReader( + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/datasets/io/json.py", line 40, in __init__ + self.builder = Json( + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/datasets/builder.py", line 360, in __init__ + data_files = DataFilesDict.from_patterns( + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/datasets/data_files.py", line 721, in from_patterns + else DataFilesList.from_patterns( + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/datasets/data_files.py", line 624, in from_patterns + resolve_pattern( + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/datasets/data_files.py", line 411, in resolve_pattern + raise FileNotFoundError(error_msg) +FileNotFoundError: Unable to find '/fsx_0/user/imzyc/processed_data/ego4d/prepared/dialog_val_L0_L4096_I10.jsonl' + +---------------------- +You can check full logs with 'job.stderr(0)' and 'job.stdout(0)'or at paths: + - /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/13811/13811_0_log.err + - /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/13811/13811_0_log.out +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +{'assembly101/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.1}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}, + 'ego4d/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.05}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.1}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}, + 'egoexolearn/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.1}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}, + 'epickitchens/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.1}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}, + 'holoassist/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.1}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}, + 'wtag/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.1}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}} +Evaluation datasets: +* ego4d/dialog_val | num samples: 96 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.05 +ER@0.50: 0.874 (S=191, C=757, M=3866, R=149) +ER@0.55: 0.891 (S=276, C=672, M=3866, R=149) +ER@0.60: 0.908 (S=358, C=590, M=3866, R=149) +ER@0.65: 0.932 (S=473, C=475, M=3866, R=149) +ER@0.70: 0.954 (S=579, C=369, M=3866, R=149) +ER@0.75: 0.976 (S=682, C=266, M=3866, R=149) +ER@0.80: 0.993 (S=766, C=182, M=3866, R=149) +ER@0.85: 1.007 (S=832, C=116, M=3866, R=149) +ER@0.90: 1.018 (S=888, C=60, M=3866, R=149) +ER@0.95: 1.027 (S=927, C=21, M=3866, R=149) +ER@1.00: 1.031 (S=948, C=0, M=3866, R=149) +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.05-maxlen_4k +Metrics: +missing_rate: 0.8031 +redundant_rate: 0.1358 +match_cost: 0.3689 +semantic_score: 0.6455 +mean_error_rate: 0.9647 +mean_error_rate_v2: 0.9357 +jaccard_index: 0.1910 +jaccard_index_v2: 0.0643 +AP: 0.2907 +AR: 0.0662 +Avg-F1: 0.1079 +num_matched: 948.0000 +num_missed: 3866.0000 +num_redundant: 149.0000 +num_correct_5: 757.0000 +Bleu_1: 0.4054 +Bleu_1_w: 0.0774 +Bleu_2: 0.3015 +Bleu_2_w: 0.0576 +Bleu_3: 0.2369 +Bleu_3_w: 0.0452 +Bleu_4: 0.1922 +Bleu_4_w: 0.0367 +CIDEr: 1.1670 +CIDEr_w: 0.2229 +METEOR: 0.2061 +METEOR_w: 0.0394 + +Updating eval setup: not_talk_threshold: 0.05 -> 0.1 +ER@0.50: 0.907 (S=253, C=841, M=3720, R=395) +ER@0.55: 0.929 (S=356, C=738, M=3720, R=395) +ER@0.60: 0.951 (S=463, C=631, M=3720, R=395) +ER@0.65: 0.975 (S=580, C=514, M=3720, R=395) +ER@0.70: 1.002 (S=710, C=384, M=3720, R=395) +ER@0.75: 1.025 (S=821, C=273, M=3720, R=395) +ER@0.80: 1.045 (S=917, C=177, M=3720, R=395) +ER@0.85: 1.059 (S=983, C=111, M=3720, R=395) +ER@0.90: 1.069 (S=1029, C=65, M=3720, R=395) +ER@0.95: 1.078 (S=1074, C=20, M=3720, R=395) +ER@1.00: 1.082 (S=1094, C=0, M=3720, R=395) +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.1-maxlen_4k +Metrics: +missing_rate: 0.7727 +redundant_rate: 0.2653 +match_cost: 0.3965 +semantic_score: 0.6273 +mean_error_rate: 1.0112 +mean_error_rate_v2: 0.9345 +jaccard_index: 0.2100 +jaccard_index_v2: 0.0655 +AP: 0.2292 +AR: 0.0709 +Avg-F1: 0.1083 +num_matched: 1094.0000 +num_missed: 3720.0000 +num_redundant: 395.0000 +num_correct_5: 841.0000 +Bleu_1: 0.3939 +Bleu_1_w: 0.0827 +Bleu_2: 0.2859 +Bleu_2_w: 0.0601 +Bleu_3: 0.2210 +Bleu_3_w: 0.0464 +Bleu_4: 0.1773 +Bleu_4_w: 0.0372 +CIDEr: 1.0426 +CIDEr_w: 0.2190 +METEOR: 0.1988 +METEOR_w: 0.0418 + +Updating eval setup: not_talk_threshold: 0.1 -> 0.2 +ER@0.50: 1.008 (S=372, C=992, M=3450, R=1031) +ER@0.55: 1.036 (S=505, C=859, M=3450, R=1031) +ER@0.60: 1.063 (S=635, C=729, M=3450, R=1031) +ER@0.65: 1.092 (S=778, C=586, M=3450, R=1031) +ER@0.70: 1.124 (S=929, C=435, M=3450, R=1031) +ER@0.75: 1.149 (S=1052, C=312, M=3450, R=1031) +ER@0.80: 1.174 (S=1172, C=192, M=3450, R=1031) +ER@0.85: 1.190 (S=1246, C=118, M=3450, R=1031) +ER@0.90: 1.203 (S=1310, C=54, M=3450, R=1031) +ER@0.95: 1.210 (S=1344, C=20, M=3450, R=1031) +ER@1.00: 1.214 (S=1364, C=0, M=3450, R=1031) +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +missing_rate: 0.7167 +redundant_rate: 0.4305 +match_cost: 0.4306 +semantic_score: 0.6061 +mean_error_rate: 1.1330 +mean_error_rate_v2: 0.9332 +jaccard_index: 0.2334 +jaccard_index_v2: 0.0668 +AP: 0.1631 +AR: 0.0811 +Avg-F1: 0.1084 +num_matched: 1364.0000 +num_missed: 3450.0000 +num_redundant: 1031.0000 +num_correct_5: 992.0000 +Bleu_1: 0.3769 +Bleu_1_w: 0.0879 +Bleu_2: 0.2667 +Bleu_2_w: 0.0622 +Bleu_3: 0.2008 +Bleu_3_w: 0.0469 +Bleu_4: 0.1575 +Bleu_4_w: 0.0368 +CIDEr: 0.9491 +CIDEr_w: 0.2215 +METEOR: 0.1910 +METEOR_w: 0.0446 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +ER@0.50: 1.169 (S=522, C=1155, M=3137, R=1970) +ER@0.55: 1.201 (S=677, C=1000, M=3137, R=1970) +ER@0.60: 1.238 (S=851, C=826, M=3137, R=1970) +ER@0.65: 1.279 (S=1049, C=628, M=3137, R=1970) +ER@0.70: 1.310 (S=1199, C=478, M=3137, R=1970) +ER@0.75: 1.344 (S=1361, C=316, M=3137, R=1970) +ER@0.80: 1.367 (S=1473, C=204, M=3137, R=1970) +ER@0.85: 1.383 (S=1551, C=126, M=3137, R=1970) +ER@0.90: 1.397 (S=1620, C=57, M=3137, R=1970) +ER@0.95: 1.406 (S=1661, C=16, M=3137, R=1970) +ER@1.00: 1.409 (S=1677, C=0, M=3137, R=1970) +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +missing_rate: 0.6516 +redundant_rate: 0.5402 +match_cost: 0.4533 +semantic_score: 0.5915 +mean_error_rate: 1.3185 +mean_error_rate_v2: 0.9356 +jaccard_index: 0.2472 +jaccard_index_v2: 0.0644 +AP: 0.1198 +AR: 0.0908 +Avg-F1: 0.1033 +num_matched: 1677.0000 +num_missed: 3137.0000 +num_redundant: 1970.0000 +num_correct_5: 1155.0000 +Bleu_1: 0.3657 +Bleu_1_w: 0.0904 +Bleu_2: 0.2545 +Bleu_2_w: 0.0629 +Bleu_3: 0.1897 +Bleu_3_w: 0.0469 +Bleu_4: 0.1486 +Bleu_4_w: 0.0367 +CIDEr: 0.8309 +CIDEr_w: 0.2054 +METEOR: 0.1817 +METEOR_w: 0.0449 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +ER@0.50: 1.381 (S=708, C=1394, M=2712, R=3228) +ER@0.55: 1.425 (S=918, C=1184, M=2712, R=3228) +ER@0.60: 1.472 (S=1147, C=955, M=2712, R=3228) +ER@0.65: 1.519 (S=1373, C=729, M=2712, R=3228) +ER@0.70: 1.559 (S=1566, C=536, M=2712, R=3228) +ER@0.75: 1.593 (S=1728, C=374, M=2712, R=3228) +ER@0.80: 1.621 (S=1863, C=239, M=2712, R=3228) +ER@0.85: 1.642 (S=1965, C=137, M=2712, R=3228) +ER@0.90: 1.656 (S=2032, C=70, M=2712, R=3228) +ER@0.95: 1.666 (S=2079, C=23, M=2712, R=3228) +ER@1.00: 1.671 (S=2102, C=0, M=2712, R=3228) +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +missing_rate: 0.5634 +redundant_rate: 0.6056 +match_cost: 0.4721 +semantic_score: 0.5789 +mean_error_rate: 1.5640 +mean_error_rate_v2: 0.9362 +jaccard_index: 0.2614 +jaccard_index_v2: 0.0638 +AP: 0.0962 +AR: 0.1065 +Avg-F1: 0.1011 +num_matched: 2102.0000 +num_missed: 2712.0000 +num_redundant: 3228.0000 +num_correct_5: 1394.0000 +Bleu_1: 0.3599 +Bleu_1_w: 0.0941 +Bleu_2: 0.2469 +Bleu_2_w: 0.0645 +Bleu_3: 0.1820 +Bleu_3_w: 0.0476 +Bleu_4: 0.1414 +Bleu_4_w: 0.0369 +CIDEr: 0.8052 +CIDEr_w: 0.2105 +METEOR: 0.1766 +METEOR_w: 0.0461 + +Evaluation datasets: +* holoassist/dialog_val | num samples: 291 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.1 +ER@0.50: 0.854 (S=810, C=2319, M=12132, R=88) +ER@0.55: 0.872 (S=1081, C=2048, M=12132, R=88) +ER@0.60: 0.892 (S=1386, C=1743, M=12132, R=88) +ER@0.65: 0.912 (S=1698, C=1431, M=12132, R=88) +ER@0.70: 0.933 (S=2013, C=1116, M=12132, R=88) +ER@0.75: 0.952 (S=2313, C=816, M=12132, R=88) +ER@0.80: 0.972 (S=2616, C=513, M=12132, R=88) +ER@0.85: 0.986 (S=2828, C=301, M=12132, R=88) +ER@0.90: 0.998 (S=3008, C=121, M=12132, R=88) +ER@0.95: 1.004 (S=3097, C=32, M=12132, R=88) +ER@1.00: 1.006 (S=3125, C=4, M=12132, R=88) +Evalulation: holoassist-dialog_val_L0_I10/stream/notalk0.1-maxlen_4k +Metrics: +missing_rate: 0.7950 +redundant_rate: 0.0274 +match_cost: 0.4018 +semantic_score: 0.6163 +mean_error_rate: 0.9436 +mean_error_rate_v2: 0.9381 +jaccard_index: 0.2039 +jaccard_index_v2: 0.0619 +AP: 0.2951 +AR: 0.0622 +Avg-F1: 0.1028 +num_matched: 3129.0000 +num_missed: 12132.0000 +num_redundant: 88.0000 +num_correct_5: 2319.0000 +Bleu_1: 0.4397 +Bleu_1_w: 0.0896 +Bleu_2: 0.3219 +Bleu_2_w: 0.0656 +Bleu_3: 0.2470 +Bleu_3_w: 0.0503 +Bleu_4: 0.1951 +Bleu_4_w: 0.0398 +CIDEr: 1.1901 +CIDEr_w: 0.2426 +METEOR: 0.2116 +METEOR_w: 0.0431 + +Updating eval setup: not_talk_threshold: 0.1 -> 0.2 +ER@0.50: 0.775 (S=1607, C=3824, M=9830, R=384) +ER@0.55: 0.805 (S=2068, C=3363, M=9830, R=384) +ER@0.60: 0.835 (S=2533, C=2898, M=9830, R=384) +ER@0.65: 0.868 (S=3037, C=2394, M=9830, R=384) +ER@0.70: 0.903 (S=3561, C=1870, M=9830, R=384) +ER@0.75: 0.937 (S=4091, C=1340, M=9830, R=384) +ER@0.80: 0.970 (S=4584, C=847, M=9830, R=384) +ER@0.85: 0.994 (S=4948, C=483, M=9830, R=384) +ER@0.90: 1.012 (S=5237, C=194, M=9830, R=384) +ER@0.95: 1.021 (S=5371, C=60, M=9830, R=384) +ER@1.00: 1.025 (S=5427, C=4, M=9830, R=384) +Evalulation: holoassist-dialog_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +missing_rate: 0.6441 +redundant_rate: 0.0660 +match_cost: 0.4213 +semantic_score: 0.6035 +mean_error_rate: 0.9222 +mean_error_rate_v2: 0.8996 +jaccard_index: 0.3471 +jaccard_index_v2: 0.1004 +AP: 0.2701 +AR: 0.1029 +Avg-F1: 0.1490 +num_matched: 5431.0000 +num_missed: 9830.0000 +num_redundant: 384.0000 +num_correct_5: 3824.0000 +Bleu_1: 0.4309 +Bleu_1_w: 0.1496 +Bleu_2: 0.3129 +Bleu_2_w: 0.1086 +Bleu_3: 0.2392 +Bleu_3_w: 0.0830 +Bleu_4: 0.1882 +Bleu_4_w: 0.0653 +CIDEr: 1.1481 +CIDEr_w: 0.3986 +METEOR: 0.2081 +METEOR_w: 0.0723 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +ER@0.50: 0.750 (S=2047, C=4543, M=8671, R=733) +ER@0.55: 0.787 (S=2601, C=3989, M=8671, R=733) +ER@0.60: 0.827 (S=3223, C=3367, M=8671, R=733) +ER@0.65: 0.866 (S=3809, C=2781, M=8671, R=733) +ER@0.70: 0.909 (S=4462, C=2128, M=8671, R=733) +ER@0.75: 0.949 (S=5083, C=1507, M=8671, R=733) +ER@0.80: 0.986 (S=5642, C=948, M=8671, R=733) +ER@0.85: 1.012 (S=6045, C=545, M=8671, R=733) +ER@0.90: 1.033 (S=6360, C=230, M=8671, R=733) +ER@0.95: 1.043 (S=6516, C=74, M=8671, R=733) +ER@1.00: 1.048 (S=6583, C=7, M=8671, R=733) +Evalulation: holoassist-dialog_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +missing_rate: 0.5682 +redundant_rate: 0.1001 +match_cost: 0.4364 +semantic_score: 0.5939 +mean_error_rate: 0.9282 +mean_error_rate_v2: 0.8856 +jaccard_index: 0.4120 +jaccard_index_v2: 0.1144 +AP: 0.2498 +AR: 0.1198 +Avg-F1: 0.1620 +num_matched: 6590.0000 +num_missed: 8671.0000 +num_redundant: 733.0000 +num_correct_5: 4543.0000 +Bleu_1: 0.4238 +Bleu_1_w: 0.1746 +Bleu_2: 0.3048 +Bleu_2_w: 0.1256 +Bleu_3: 0.2324 +Bleu_3_w: 0.0958 +Bleu_4: 0.1826 +Bleu_4_w: 0.0752 +CIDEr: 1.0930 +CIDEr_w: 0.4504 +METEOR: 0.2035 +METEOR_w: 0.0839 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +ER@0.50: 0.832 (S=3074, C=5641, M=6546, R=3077) +ER@0.55: 0.879 (S=3797, C=4918, M=6546, R=3077) +ER@0.60: 0.927 (S=4529, C=4186, M=6546, R=3077) +ER@0.65: 0.982 (S=5365, C=3350, M=6546, R=3077) +ER@0.70: 1.035 (S=6179, C=2536, M=6546, R=3077) +ER@0.75: 1.086 (S=6946, C=1769, M=6546, R=3077) +ER@0.80: 1.129 (S=7607, C=1108, M=6546, R=3077) +ER@0.85: 1.162 (S=8103, C=612, M=6546, R=3077) +ER@0.90: 1.184 (S=8442, C=273, M=6546, R=3077) +ER@0.95: 1.196 (S=8636, C=79, M=6546, R=3077) +ER@1.00: 1.201 (S=8711, C=4, M=6546, R=3077) +Evalulation: holoassist-dialog_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +missing_rate: 0.4289 +redundant_rate: 0.2609 +match_cost: 0.4650 +semantic_score: 0.5744 +mean_error_rate: 1.0558 +mean_error_rate_v2: 0.8787 +jaccard_index: 0.4752 +jaccard_index_v2: 0.1213 +AP: 0.1887 +AR: 0.1458 +Avg-F1: 0.1645 +num_matched: 8715.0000 +num_missed: 6546.0000 +num_redundant: 3077.0000 +num_correct_5: 5641.0000 +Bleu_1: 0.4142 +Bleu_1_w: 0.1968 +Bleu_2: 0.2937 +Bleu_2_w: 0.1396 +Bleu_3: 0.2207 +Bleu_3_w: 0.1049 +Bleu_4: 0.1712 +Bleu_4_w: 0.0814 +CIDEr: 1.0113 +CIDEr_w: 0.4806 +METEOR: 0.1989 +METEOR_w: 0.0945 + +Evaluation datasets: +* epickitchens/dialog_val | num samples: 150 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.1 +ER@0.50: 0.863 (S=694, C=1213, M=4525, R=332) +ER@0.55: 0.897 (S=915, C=992, M=4525, R=332) +ER@0.60: 0.926 (S=1102, C=805, M=4525, R=332) +ER@0.65: 0.954 (S=1279, C=628, M=4525, R=332) +ER@0.70: 0.983 (S=1464, C=443, M=4525, R=332) +ER@0.75: 1.003 (S=1592, C=315, M=4525, R=332) +ER@0.80: 1.021 (S=1710, C=197, M=4525, R=332) +ER@0.85: 1.034 (S=1793, C=114, M=4525, R=332) +ER@0.90: 1.042 (S=1844, C=63, M=4525, R=332) +ER@0.95: 1.048 (S=1884, C=23, M=4525, R=332) +ER@1.00: 1.052 (S=1907, C=0, M=4525, R=332) +Evalulation: epickitchens-dialog_val_L0_I10/stream/notalk0.1-maxlen_4k +Metrics: +missing_rate: 0.7035 +redundant_rate: 0.1483 +match_cost: 0.4678 +semantic_score: 0.5649 +mean_error_rate: 0.9839 +mean_error_rate_v2: 0.9356 +jaccard_index: 0.2819 +jaccard_index_v2: 0.0644 +AP: 0.1946 +AR: 0.0677 +Avg-F1: 0.1005 +num_matched: 1907.0000 +num_missed: 4525.0000 +num_redundant: 332.0000 +num_correct_5: 1213.0000 +Bleu_1: 0.3985 +Bleu_1_w: 0.1123 +Bleu_2: 0.2848 +Bleu_2_w: 0.0803 +Bleu_3: 0.2174 +Bleu_3_w: 0.0613 +Bleu_4: 0.1727 +Bleu_4_w: 0.0487 +CIDEr: 1.2020 +CIDEr_w: 0.3389 +METEOR: 0.2031 +METEOR_w: 0.0573 + +Updating eval setup: not_talk_threshold: 0.1 -> 0.2 +ER@0.50: 0.899 (S=1056, C=1496, M=3880, R=846) +ER@0.55: 0.939 (S=1313, C=1239, M=3880, R=846) +ER@0.60: 0.980 (S=1577, C=975, M=3880, R=846) +ER@0.65: 1.014 (S=1797, C=755, M=3880, R=846) +ER@0.70: 1.048 (S=2014, C=538, M=3880, R=846) +ER@0.75: 1.074 (S=2179, C=373, M=3880, R=846) +ER@0.80: 1.097 (S=2333, C=219, M=3880, R=846) +ER@0.85: 1.110 (S=2415, C=137, M=3880, R=846) +ER@0.90: 1.120 (S=2481, C=71, M=3880, R=846) +ER@0.95: 1.128 (S=2527, C=25, M=3880, R=846) +ER@1.00: 1.131 (S=2551, C=1, M=3880, R=846) +Evalulation: epickitchens-dialog_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +missing_rate: 0.6032 +redundant_rate: 0.2490 +match_cost: 0.5024 +semantic_score: 0.5445 +mean_error_rate: 1.0491 +mean_error_rate_v2: 0.9272 +jaccard_index: 0.3506 +jaccard_index_v2: 0.0728 +AP: 0.1559 +AR: 0.0824 +Avg-F1: 0.1078 +num_matched: 2552.0000 +num_missed: 3880.0000 +num_redundant: 846.0000 +num_correct_5: 1496.0000 +Bleu_1: 0.3924 +Bleu_1_w: 0.1376 +Bleu_2: 0.2719 +Bleu_2_w: 0.0954 +Bleu_3: 0.2013 +Bleu_3_w: 0.0706 +Bleu_4: 0.1561 +Bleu_4_w: 0.0547 +CIDEr: 1.0380 +CIDEr_w: 0.3640 +METEOR: 0.1930 +METEOR_w: 0.0677 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +ER@0.50: 0.981 (S=1547, C=1751, M=3134, R=1628) +ER@0.55: 1.030 (S=1864, C=1434, M=3134, R=1628) +ER@0.60: 1.077 (S=2165, C=1133, M=3134, R=1628) +ER@0.65: 1.122 (S=2452, C=846, M=3134, R=1628) +ER@0.70: 1.164 (S=2726, C=572, M=3134, R=1628) +ER@0.75: 1.197 (S=2934, C=364, M=3134, R=1628) +ER@0.80: 1.219 (S=3081, C=217, M=3134, R=1628) +ER@0.85: 1.232 (S=3160, C=138, M=3134, R=1628) +ER@0.90: 1.243 (S=3234, C=64, M=3134, R=1628) +ER@0.95: 1.250 (S=3278, C=20, M=3134, R=1628) +ER@1.00: 1.253 (S=3298, C=0, M=3134, R=1628) +Evalulation: epickitchens-dialog_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +missing_rate: 0.4873 +redundant_rate: 0.3305 +match_cost: 0.5340 +semantic_score: 0.5245 +mean_error_rate: 1.1607 +mean_error_rate_v2: 0.9262 +jaccard_index: 0.4092 +jaccard_index_v2: 0.0738 +AP: 0.1207 +AR: 0.0924 +Avg-F1: 0.1047 +num_matched: 3298.0000 +num_missed: 3134.0000 +num_redundant: 1628.0000 +num_correct_5: 1751.0000 +Bleu_1: 0.3753 +Bleu_1_w: 0.1536 +Bleu_2: 0.2541 +Bleu_2_w: 0.1040 +Bleu_3: 0.1830 +Bleu_3_w: 0.0749 +Bleu_4: 0.1385 +Bleu_4_w: 0.0567 +CIDEr: 0.9440 +CIDEr_w: 0.3863 +METEOR: 0.1835 +METEOR_w: 0.0751 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +ER@0.50: 1.155 (S=1923, C=2080, M=2429, R=3076) +ER@0.55: 1.222 (S=2352, C=1651, M=2429, R=3076) +ER@0.60: 1.281 (S=2737, C=1266, M=2429, R=3076) +ER@0.65: 1.337 (S=3093, C=910, M=2429, R=3076) +ER@0.70: 1.382 (S=3385, C=618, M=2429, R=3076) +ER@0.75: 1.416 (S=3605, C=398, M=2429, R=3076) +ER@0.80: 1.442 (S=3767, C=236, M=2429, R=3076) +ER@0.85: 1.458 (S=3870, C=133, M=2429, R=3076) +ER@0.90: 1.468 (S=3938, C=65, M=2429, R=3076) +ER@0.95: 1.476 (S=3988, C=15, M=2429, R=3076) +ER@1.00: 1.478 (S=4002, C=1, M=2429, R=3076) +Evalulation: epickitchens-dialog_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +missing_rate: 0.3776 +redundant_rate: 0.4345 +match_cost: 0.5442 +semantic_score: 0.5143 +mean_error_rate: 1.3740 +mean_error_rate_v2: 0.9295 +jaccard_index: 0.4210 +jaccard_index_v2: 0.0705 +AP: 0.0947 +AR: 0.1042 +Avg-F1: 0.0992 +num_matched: 4003.0000 +num_missed: 2429.0000 +num_redundant: 3076.0000 +num_correct_5: 2080.0000 +Bleu_1: 0.3605 +Bleu_1_w: 0.1518 +Bleu_2: 0.2399 +Bleu_2_w: 0.1010 +Bleu_3: 0.1703 +Bleu_3_w: 0.0717 +Bleu_4: 0.1268 +Bleu_4_w: 0.0534 +CIDEr: 0.8436 +CIDEr_w: 0.3552 +METEOR: 0.1800 +METEOR_w: 0.0758 + +Evaluation datasets: +* egoexolearn/dialog_val | num samples: 123 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.1 +ER@0.50: 0.913 (S=642, C=1681, M=9668, R=640) +ER@0.55: 0.933 (S=876, C=1447, M=9668, R=640) +ER@0.60: 0.952 (S=1107, C=1216, M=9668, R=640) +ER@0.65: 0.973 (S=1363, C=960, M=9668, R=640) +ER@0.70: 0.993 (S=1601, C=722, M=9668, R=640) +ER@0.75: 1.010 (S=1808, C=515, M=9668, R=640) +ER@0.80: 1.026 (S=1998, C=325, M=9668, R=640) +ER@0.85: 1.037 (S=2127, C=196, M=9668, R=640) +ER@0.90: 1.047 (S=2241, C=82, M=9668, R=640) +ER@0.95: 1.051 (S=2299, C=24, M=9668, R=640) +ER@1.00: 1.053 (S=2323, C=0, M=9668, R=640) +Evalulation: egoexolearn-dialog_val_L0_I10/stream/notalk0.1-maxlen_4k +Metrics: +missing_rate: 0.8063 +redundant_rate: 0.2160 +match_cost: 0.4180 +semantic_score: 0.6056 +mean_error_rate: 0.9990 +mean_error_rate_v2: 0.9484 +jaccard_index: 0.1839 +jaccard_index_v2: 0.0516 +AP: 0.2199 +AR: 0.0543 +Avg-F1: 0.0872 +num_matched: 2323.0000 +num_missed: 9668.0000 +num_redundant: 640.0000 +num_correct_5: 1681.0000 +Bleu_1: 0.4194 +Bleu_1_w: 0.0771 +Bleu_2: 0.2979 +Bleu_2_w: 0.0548 +Bleu_3: 0.2264 +Bleu_3_w: 0.0416 +Bleu_4: 0.1779 +Bleu_4_w: 0.0327 +CIDEr: 1.0127 +CIDEr_w: 0.1862 +METEOR: 0.1980 +METEOR_w: 0.0364 + +Updating eval setup: not_talk_threshold: 0.1 -> 0.2 +ER@0.50: 0.955 (S=907, C=1864, M=9220, R=1324) +ER@0.55: 0.979 (S=1193, C=1578, M=9220, R=1324) +ER@0.60: 1.003 (S=1487, C=1284, M=9220, R=1324) +ER@0.65: 1.026 (S=1754, C=1017, M=9220, R=1324) +ER@0.70: 1.046 (S=2004, C=767, M=9220, R=1324) +ER@0.75: 1.064 (S=2216, C=555, M=9220, R=1324) +ER@0.80: 1.081 (S=2416, C=355, M=9220, R=1324) +ER@0.85: 1.093 (S=2568, C=203, M=9220, R=1324) +ER@0.90: 1.102 (S=2676, C=95, M=9220, R=1324) +ER@0.95: 1.108 (S=2745, C=26, M=9220, R=1324) +ER@1.00: 1.110 (S=2771, C=0, M=9220, R=1324) +Evalulation: egoexolearn-dialog_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +missing_rate: 0.7689 +redundant_rate: 0.3233 +match_cost: 0.4512 +semantic_score: 0.5857 +mean_error_rate: 1.0517 +mean_error_rate_v2: 0.9471 +jaccard_index: 0.2081 +jaccard_index_v2: 0.0529 +AP: 0.1719 +AR: 0.0587 +Avg-F1: 0.0875 +num_matched: 2771.0000 +num_missed: 9220.0000 +num_redundant: 1324.0000 +num_correct_5: 1864.0000 +Bleu_1: 0.4145 +Bleu_1_w: 0.0863 +Bleu_2: 0.2929 +Bleu_2_w: 0.0610 +Bleu_3: 0.2210 +Bleu_3_w: 0.0460 +Bleu_4: 0.1731 +Bleu_4_w: 0.0360 +CIDEr: 0.9804 +CIDEr_w: 0.2040 +METEOR: 0.1935 +METEOR_w: 0.0403 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +ER@0.50: 1.014 (S=1319, C=2197, M=8475, R=2369) +ER@0.55: 1.046 (S=1701, C=1815, M=8475, R=2369) +ER@0.60: 1.076 (S=2053, C=1463, M=8475, R=2369) +ER@0.65: 1.105 (S=2404, C=1112, M=8475, R=2369) +ER@0.70: 1.130 (S=2704, C=812, M=8475, R=2369) +ER@0.75: 1.149 (S=2933, C=583, M=8475, R=2369) +ER@0.80: 1.166 (S=3140, C=376, M=8475, R=2369) +ER@0.85: 1.180 (S=3306, C=210, M=8475, R=2369) +ER@0.90: 1.190 (S=3427, C=89, M=8475, R=2369) +ER@0.95: 1.196 (S=3494, C=22, M=8475, R=2369) +ER@1.00: 1.198 (S=3516, C=0, M=8475, R=2369) +Evalulation: egoexolearn-dialog_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +missing_rate: 0.7068 +redundant_rate: 0.4025 +match_cost: 0.4859 +semantic_score: 0.5658 +mean_error_rate: 1.1318 +mean_error_rate_v2: 0.9451 +jaccard_index: 0.2448 +jaccard_index_v2: 0.0549 +AP: 0.1341 +AR: 0.0658 +Avg-F1: 0.0883 +num_matched: 3516.0000 +num_missed: 8475.0000 +num_redundant: 2369.0000 +num_correct_5: 2197.0000 +Bleu_1: 0.4004 +Bleu_1_w: 0.0980 +Bleu_2: 0.2768 +Bleu_2_w: 0.0678 +Bleu_3: 0.2048 +Bleu_3_w: 0.0501 +Bleu_4: 0.1573 +Bleu_4_w: 0.0385 +CIDEr: 0.8837 +CIDEr_w: 0.2164 +METEOR: 0.1863 +METEOR_w: 0.0456 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +ER@0.50: 1.173 (S=2058, C=2853, M=7080, R=4929) +ER@0.55: 1.221 (S=2637, C=2274, M=7080, R=4929) +ER@0.60: 1.262 (S=3120, C=1791, M=7080, R=4929) +ER@0.65: 1.299 (S=3563, C=1348, M=7080, R=4929) +ER@0.70: 1.329 (S=3930, C=981, M=7080, R=4929) +ER@0.75: 1.354 (S=4221, C=690, M=7080, R=4929) +ER@0.80: 1.377 (S=4500, C=411, M=7080, R=4929) +ER@0.85: 1.392 (S=4678, C=233, M=7080, R=4929) +ER@0.90: 1.402 (S=4804, C=107, M=7080, R=4929) +ER@0.95: 1.408 (S=4879, C=32, M=7080, R=4929) +ER@1.00: 1.411 (S=4911, C=0, M=7080, R=4929) +Evalulation: egoexolearn-dialog_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +missing_rate: 0.5904 +redundant_rate: 0.5009 +match_cost: 0.5158 +semantic_score: 0.5457 +mean_error_rate: 1.3298 +mean_error_rate_v2: 0.9424 +jaccard_index: 0.2902 +jaccard_index_v2: 0.0576 +AP: 0.0990 +AR: 0.0813 +Avg-F1: 0.0893 +num_matched: 4911.0000 +num_missed: 7080.0000 +num_redundant: 4929.0000 +num_correct_5: 2853.0000 +Bleu_1: 0.3893 +Bleu_1_w: 0.1130 +Bleu_2: 0.2633 +Bleu_2_w: 0.0764 +Bleu_3: 0.1904 +Bleu_3_w: 0.0553 +Bleu_4: 0.1434 +Bleu_4_w: 0.0416 +CIDEr: 0.7911 +CIDEr_w: 0.2296 +METEOR: 0.1761 +METEOR_w: 0.0511 + +Evaluation datasets: +* wtag/dialog_val | num samples: 21 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.1 +ER@0.50: 0.968 (S=169, C=137, M=767, R=103) +ER@0.55: 0.986 (S=188, C=118, M=767, R=103) +ER@0.60: 1.007 (S=210, C=96, M=767, R=103) +ER@0.65: 1.030 (S=235, C=71, M=767, R=103) +ER@0.70: 1.054 (S=261, C=45, M=767, R=103) +ER@0.75: 1.073 (S=281, C=25, M=767, R=103) +ER@0.80: 1.082 (S=291, C=15, M=767, R=103) +ER@0.85: 1.089 (S=299, C=7, M=767, R=103) +ER@0.90: 1.091 (S=301, C=5, M=767, R=103) +ER@0.95: 1.093 (S=303, C=3, M=767, R=103) +ER@1.00: 1.095 (S=305, C=1, M=767, R=103) +Evalulation: wtag-dialog_val_L0_I10/stream/notalk0.1-maxlen_4k +Metrics: +missing_rate: 0.7148 +redundant_rate: 0.2518 +match_cost: 0.5805 +semantic_score: 0.4627 +mean_error_rate: 1.0517 +mean_error_rate_v2: 0.9596 +jaccard_index: 0.2602 +jaccard_index_v2: 0.0404 +AP: 0.1162 +AR: 0.0443 +Avg-F1: 0.0642 +num_matched: 306.0000 +num_missed: 767.0000 +num_redundant: 103.0000 +num_correct_5: 137.0000 +Bleu_1: 0.3341 +Bleu_1_w: 0.0869 +Bleu_2: 0.2402 +Bleu_2_w: 0.0625 +Bleu_3: 0.1742 +Bleu_3_w: 0.0453 +Bleu_4: 0.1260 +Bleu_4_w: 0.0328 +CIDEr: 0.8764 +CIDEr_w: 0.2281 +METEOR: 0.1963 +METEOR_w: 0.0511 + +Updating eval setup: not_talk_threshold: 0.1 -> 0.2 +ER@0.50: 0.918 (S=306, C=254, M=513, R=166) +ER@0.55: 0.950 (S=340, C=220, M=513, R=166) +ER@0.60: 0.986 (S=379, C=181, M=513, R=166) +ER@0.65: 1.029 (S=425, C=135, M=513, R=166) +ER@0.70: 1.064 (S=463, C=97, M=513, R=166) +ER@0.75: 1.094 (S=495, C=65, M=513, R=166) +ER@0.80: 1.116 (S=519, C=41, M=513, R=166) +ER@0.85: 1.135 (S=539, C=21, M=513, R=166) +ER@0.90: 1.142 (S=546, C=14, M=513, R=166) +ER@0.95: 1.149 (S=554, C=6, M=513, R=166) +ER@1.00: 1.155 (S=560, C=0, M=513, R=166) +Evalulation: wtag-dialog_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +missing_rate: 0.4781 +redundant_rate: 0.2287 +match_cost: 0.5656 +semantic_score: 0.4702 +mean_error_rate: 1.0671 +mean_error_rate_v2: 0.9241 +jaccard_index: 0.4520 +jaccard_index_v2: 0.0759 +AP: 0.1295 +AR: 0.0876 +Avg-F1: 0.1045 +num_matched: 560.0000 +num_missed: 513.0000 +num_redundant: 166.0000 +num_correct_5: 254.0000 +Bleu_1: 0.3608 +Bleu_1_w: 0.1631 +Bleu_2: 0.2556 +Bleu_2_w: 0.1155 +Bleu_3: 0.1876 +Bleu_3_w: 0.0848 +Bleu_4: 0.1411 +Bleu_4_w: 0.0638 +CIDEr: 0.9510 +CIDEr_w: 0.4298 +METEOR: 0.2020 +METEOR_w: 0.0913 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +ER@0.50: 1.007 (S=330, C=270, M=473, R=277) +ER@0.55: 1.041 (S=367, C=233, M=473, R=277) +ER@0.60: 1.068 (S=396, C=204, M=473, R=277) +ER@0.65: 1.128 (S=460, C=140, M=473, R=277) +ER@0.70: 1.168 (S=503, C=97, M=473, R=277) +ER@0.75: 1.201 (S=539, C=61, M=473, R=277) +ER@0.80: 1.223 (S=562, C=38, M=473, R=277) +ER@0.85: 1.239 (S=579, C=21, M=473, R=277) +ER@0.90: 1.246 (S=587, C=13, M=473, R=277) +ER@0.95: 1.251 (S=592, C=8, M=473, R=277) +ER@1.00: 1.258 (S=600, C=0, M=473, R=277) +Evalulation: wtag-dialog_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +missing_rate: 0.4408 +redundant_rate: 0.3158 +match_cost: 0.5648 +semantic_score: 0.4752 +mean_error_rate: 1.1662 +mean_error_rate_v2: 0.9269 +jaccard_index: 0.4444 +jaccard_index_v2: 0.0731 +AP: 0.1125 +AR: 0.0919 +Avg-F1: 0.1012 +num_matched: 600.0000 +num_missed: 473.0000 +num_redundant: 277.0000 +num_correct_5: 270.0000 +Bleu_1: 0.3666 +Bleu_1_w: 0.1629 +Bleu_2: 0.2588 +Bleu_2_w: 0.1150 +Bleu_3: 0.1879 +Bleu_3_w: 0.0835 +Bleu_4: 0.1402 +Bleu_4_w: 0.0623 +CIDEr: 0.9763 +CIDEr_w: 0.4339 +METEOR: 0.2013 +METEOR_w: 0.0895 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +ER@0.50: 1.036 (S=363, C=297, M=413, R=336) +ER@0.55: 1.087 (S=417, C=243, M=413, R=336) +ER@0.60: 1.126 (S=459, C=201, M=413, R=336) +ER@0.65: 1.170 (S=506, C=154, M=413, R=336) +ER@0.70: 1.208 (S=547, C=113, M=413, R=336) +ER@0.75: 1.240 (S=582, C=78, M=413, R=336) +ER@0.80: 1.270 (S=614, C=46, M=413, R=336) +ER@0.85: 1.286 (S=631, C=29, M=413, R=336) +ER@0.90: 1.297 (S=643, C=17, M=413, R=336) +ER@0.95: 1.305 (S=651, C=9, M=413, R=336) +ER@1.00: 1.313 (S=660, C=0, M=413, R=336) +Evalulation: wtag-dialog_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +missing_rate: 0.3849 +redundant_rate: 0.3373 +match_cost: 0.5662 +semantic_score: 0.4736 +mean_error_rate: 1.2126 +mean_error_rate_v2: 0.9234 +jaccard_index: 0.4684 +jaccard_index_v2: 0.0766 +AP: 0.1083 +AR: 0.1006 +Avg-F1: 0.1043 +num_matched: 660.0000 +num_missed: 413.0000 +num_redundant: 336.0000 +num_correct_5: 297.0000 +Bleu_1: 0.3455 +Bleu_1_w: 0.1619 +Bleu_2: 0.2362 +Bleu_2_w: 0.1106 +Bleu_3: 0.1679 +Bleu_3_w: 0.0787 +Bleu_4: 0.1226 +Bleu_4_w: 0.0574 +CIDEr: 0.9134 +CIDEr_w: 0.4278 +METEOR: 0.1930 +METEOR_w: 0.0904 + +Evaluation datasets: +* assembly101/dialog_val | num samples: 336 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.1 +ER@0.50: 0.822 (S=727, C=1735, M=5856, R=256) +ER@0.55: 0.846 (S=928, C=1534, M=5856, R=256) +ER@0.60: 0.874 (S=1161, C=1301, M=5856, R=256) +ER@0.65: 0.903 (S=1398, C=1064, M=5856, R=256) +ER@0.70: 0.928 (S=1608, C=854, M=5856, R=256) +ER@0.75: 0.953 (S=1818, C=644, M=5856, R=256) +ER@0.80: 0.975 (S=1999, C=463, M=5856, R=256) +ER@0.85: 0.995 (S=2167, C=295, M=5856, R=256) +ER@0.90: 1.013 (S=2314, C=148, M=5856, R=256) +ER@0.95: 1.024 (S=2406, C=56, M=5856, R=256) +ER@1.00: 1.031 (S=2460, C=2, M=5856, R=256) +Evalulation: assembly101-dialog_val_L0_I10/stream/notalk0.1-maxlen_4k +Metrics: +missing_rate: 0.7040 +redundant_rate: 0.0942 +match_cost: 0.4114 +semantic_score: 0.6103 +mean_error_rate: 0.9423 +mean_error_rate_v2: 0.9142 +jaccard_index: 0.2871 +jaccard_index_v2: 0.0858 +AP: 0.2708 +AR: 0.0885 +Avg-F1: 0.1334 +num_matched: 2462.0000 +num_missed: 5856.0000 +num_redundant: 256.0000 +num_correct_5: 1735.0000 +Bleu_1: 0.4510 +Bleu_1_w: 0.1295 +Bleu_2: 0.3427 +Bleu_2_w: 0.0984 +Bleu_3: 0.2705 +Bleu_3_w: 0.0777 +Bleu_4: 0.2207 +Bleu_4_w: 0.0634 +CIDEr: 1.2046 +CIDEr_w: 0.3459 +METEOR: 0.2255 +METEOR_w: 0.0648 + +Updating eval setup: not_talk_threshold: 0.1 -> 0.2 +ER@0.50: 0.850 (S=1334, C=2257, M=4727, R=1007) +ER@0.55: 0.892 (S=1685, C=1906, M=4727, R=1007) +ER@0.60: 0.933 (S=2030, C=1561, M=4727, R=1007) +ER@0.65: 0.970 (S=2338, C=1253, M=4727, R=1007) +ER@0.70: 1.001 (S=2594, C=997, M=4727, R=1007) +ER@0.75: 1.031 (S=2840, C=751, M=4727, R=1007) +ER@0.80: 1.061 (S=3091, C=500, M=4727, R=1007) +ER@0.85: 1.083 (S=3272, C=319, M=4727, R=1007) +ER@0.90: 1.102 (S=3433, C=158, M=4727, R=1007) +ER@0.95: 1.114 (S=3536, C=55, M=4727, R=1007) +ER@1.00: 1.120 (S=3586, C=5, M=4727, R=1007) +Evalulation: assembly101-dialog_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +missing_rate: 0.5683 +redundant_rate: 0.2190 +match_cost: 0.4643 +semantic_score: 0.5757 +mean_error_rate: 1.0144 +mean_error_rate_v2: 0.9048 +jaccard_index: 0.3851 +jaccard_index_v2: 0.0952 +AP: 0.1930 +AR: 0.1067 +Avg-F1: 0.1374 +num_matched: 3591.0000 +num_missed: 4727.0000 +num_redundant: 1007.0000 +num_correct_5: 2257.0000 +Bleu_1: 0.4320 +Bleu_1_w: 0.1664 +Bleu_2: 0.3209 +Bleu_2_w: 0.1236 +Bleu_3: 0.2477 +Bleu_3_w: 0.0954 +Bleu_4: 0.1984 +Bleu_4_w: 0.0764 +CIDEr: 1.0423 +CIDEr_w: 0.4014 +METEOR: 0.2126 +METEOR_w: 0.0819 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +ER@0.50: 0.915 (S=1895, C=2816, M=3607, R=2106) +ER@0.55: 0.977 (S=2414, C=2297, M=3607, R=2106) +ER@0.60: 1.032 (S=2869, C=1842, M=3607, R=2106) +ER@0.65: 1.080 (S=3272, C=1439, M=3607, R=2106) +ER@0.70: 1.120 (S=3606, C=1105, M=3607, R=2106) +ER@0.75: 1.155 (S=3896, C=815, M=3607, R=2106) +ER@0.80: 1.187 (S=4163, C=548, M=3607, R=2106) +ER@0.85: 1.213 (S=4375, C=336, M=3607, R=2106) +ER@0.90: 1.235 (S=4561, C=150, M=3607, R=2106) +ER@0.95: 1.246 (S=4654, C=57, M=3607, R=2106) +ER@1.00: 1.252 (S=4704, C=7, M=3607, R=2106) +Evalulation: assembly101-dialog_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +missing_rate: 0.4336 +redundant_rate: 0.3089 +match_cost: 0.4947 +semantic_score: 0.5574 +mean_error_rate: 1.1285 +mean_error_rate_v2: 0.9005 +jaccard_index: 0.4519 +jaccard_index_v2: 0.0995 +AP: 0.1522 +AR: 0.1247 +Avg-F1: 0.1371 +num_matched: 4711.0000 +num_missed: 3607.0000 +num_redundant: 2106.0000 +num_correct_5: 2816.0000 +Bleu_1: 0.4094 +Bleu_1_w: 0.1850 +Bleu_2: 0.2955 +Bleu_2_w: 0.1336 +Bleu_3: 0.2229 +Bleu_3_w: 0.1007 +Bleu_4: 0.1751 +Bleu_4_w: 0.0791 +CIDEr: 0.8843 +CIDEr_w: 0.3996 +METEOR: 0.1972 +METEOR_w: 0.0891 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +ER@0.50: 1.077 (S=2462, C=3334, M=2522, R=3971) +ER@0.55: 1.160 (S=3152, C=2644, M=2522, R=3971) +ER@0.60: 1.231 (S=3747, C=2049, M=2522, R=3971) +ER@0.65: 1.290 (S=4234, C=1562, M=2522, R=3971) +ER@0.70: 1.333 (S=4593, C=1203, M=2522, R=3971) +ER@0.75: 1.375 (S=4943, C=853, M=2522, R=3971) +ER@0.80: 1.411 (S=5240, C=556, M=2522, R=3971) +ER@0.85: 1.437 (S=5462, C=334, M=2522, R=3971) +ER@0.90: 1.460 (S=5648, C=148, M=2522, R=3971) +ER@0.95: 1.471 (S=5744, C=52, M=2522, R=3971) +ER@1.00: 1.476 (S=5787, C=9, M=2522, R=3971) +Evalulation: assembly101-dialog_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +missing_rate: 0.3032 +redundant_rate: 0.4066 +match_cost: 0.5143 +semantic_score: 0.5435 +mean_error_rate: 1.3381 +mean_error_rate_v2: 0.9057 +jaccard_index: 0.4716 +jaccard_index_v2: 0.0943 +AP: 0.1186 +AR: 0.1393 +Avg-F1: 0.1281 +num_matched: 5796.0000 +num_missed: 2522.0000 +num_redundant: 3971.0000 +num_correct_5: 3334.0000 +Bleu_1: 0.4019 +Bleu_1_w: 0.1896 +Bleu_2: 0.2875 +Bleu_2_w: 0.1356 +Bleu_3: 0.2158 +Bleu_3_w: 0.1018 +Bleu_4: 0.1694 +Bleu_4_w: 0.0799 +CIDEr: 0.8355 +CIDEr_w: 0.3941 +METEOR: 0.1900 +METEOR_w: 0.0896 + +All Finished! Time: 83.57 minutes +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +Runs: +ego4d/dialog_val_L0_I10|stream|4k|0.05|summarize_and_drop +ego4d/dialog_val_L0_I10|stream|4k|0.1|summarize_and_drop +holoassist/dialog_val_L0_I10|stream|4k|0.1|summarize_and_drop +epickitchens/dialog_val_L0_I10|stream|4k|0.1|summarize_and_drop +egoexolearn/dialog_val_L0_I10|stream|4k|0.1|summarize_and_drop +wtag/dialog_val_L0_I10|stream|4k|0.1|summarize_and_drop +assembly101/dialog_val_L0_I10|stream|4k|0.1|summarize_and_drop +ego4d/dialog_val_L0_I10|stream|4k|0.2|summarize_and_drop +holoassist/dialog_val_L0_I10|stream|4k|0.2|summarize_and_drop +epickitchens/dialog_val_L0_I10|stream|4k|0.2|summarize_and_drop +egoexolearn/dialog_val_L0_I10|stream|4k|0.2|summarize_and_drop +wtag/dialog_val_L0_I10|stream|4k|0.2|summarize_and_drop +assembly101/dialog_val_L0_I10|stream|4k|0.2|summarize_and_drop +ego4d/dialog_val_L0_I10|stream|4k|0.3|summarize_and_drop +holoassist/dialog_val_L0_I10|stream|4k|0.3|summarize_and_drop +epickitchens/dialog_val_L0_I10|stream|4k|0.3|summarize_and_drop +egoexolearn/dialog_val_L0_I10|stream|4k|0.3|summarize_and_drop +wtag/dialog_val_L0_I10|stream|4k|0.3|summarize_and_drop +assembly101/dialog_val_L0_I10|stream|4k|0.3|summarize_and_drop +ego4d/dialog_val_L0_I10|stream|4k|0.4|summarize_and_drop +holoassist/dialog_val_L0_I10|stream|4k|0.4|summarize_and_drop +epickitchens/dialog_val_L0_I10|stream|4k|0.4|summarize_and_drop +egoexolearn/dialog_val_L0_I10|stream|4k|0.4|summarize_and_drop +wtag/dialog_val_L0_I10|stream|4k|0.4|summarize_and_drop +assembly101/dialog_val_L0_I10|stream|4k|0.4|summarize_and_drop +Traceback (most recent call last): + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 121, in + main(eval_args, slurm_args) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 110, in main + job.results() # wait for the job to finish + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in results + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 266, in result + r = self.results() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 289, in results + outcome, result = self._get_outcome_and_result() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 384, in _get_outcome_and_result + raise utils.UncompletedJobError("\n".join(message)) +submitit.core.utils.UncompletedJobError: Job 13998 (task: 0) with path /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/13998/13998_0_result.pkl +has not produced any output (state: CANCELLED by 649731) +Error stream produced: +---------------------------------------- + +Loading checkpoint shards: 0%| | 0/4 [00:00 stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.05 +ER@0.50: 0.874 (S=191, C=757, M=3866, R=149) +ER@0.55: 0.891 (S=276, C=672, M=3866, R=149) +ER@0.60: 0.908 (S=358, C=590, M=3866, R=149) +ER@0.65: 0.932 (S=473, C=475, M=3866, R=149) +ER@0.70: 0.954 (S=579, C=369, M=3866, R=149) +ER@0.75: 0.976 (S=682, C=266, M=3866, R=149) +ER@0.80: 0.993 (S=766, C=182, M=3866, R=149) +ER@0.85: 1.007 (S=832, C=116, M=3866, R=149) +ER@0.90: 1.018 (S=888, C=60, M=3866, R=149) +ER@0.95: 1.027 (S=927, C=21, M=3866, R=149) +ER@1.00: 1.031 (S=948, C=0, M=3866, R=149) +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.05-maxlen_4k +Metrics: +missing_rate: 0.8031 +redundant_rate: 0.1358 +match_cost: 0.3689 +semantic_score: 0.6455 +mean_error_rate: 0.9647 +mean_error_rate_v2: 0.9357 +jaccard_index: 0.1910 +jaccard_index_v2: 0.0643 +AP: 0.2907 +AR: 0.0662 +Avg-F1: 0.1079 +num_matched: 948.0000 +num_missed: 3866.0000 +num_redundant: 149.0000 +num_correct_5: 757.0000 +Bleu_1: 0.4054 +Bleu_1_w: 0.0774 +Bleu_2: 0.3015 +Bleu_2_w: 0.0576 +Bleu_3: 0.2369 +Bleu_3_w: 0.0452 +Bleu_4: 0.1922 +Bleu_4_w: 0.0367 +CIDEr: 1.1670 +CIDEr_w: 0.2229 +METEOR: 0.2061 +METEOR_w: 0.0394 + +Updating eval setup: not_talk_threshold: 0.05 -> 0.5 +ER@0.50: 1.727 (S=887, C=1687, M=2240, R=5188) +ER@0.55: 1.781 (S=1145, C=1429, M=2240, R=5188) +ER@0.60: 1.841 (S=1436, C=1138, M=2240, R=5188) +ER@0.65: 1.895 (S=1694, C=880, M=2240, R=5188) +ER@0.70: 1.947 (S=1947, C=627, M=2240, R=5188) +ER@0.75: 1.989 (S=2146, C=428, M=2240, R=5188) +ER@0.80: 2.025 (S=2318, C=256, M=2240, R=5188) +ER@0.85: 2.048 (S=2431, C=143, M=2240, R=5188) +ER@0.90: 2.064 (S=2509, C=65, M=2240, R=5188) +ER@0.95: 2.074 (S=2558, C=16, M=2240, R=5188) +ER@1.00: 2.078 (S=2574, C=0, M=2240, R=5188) +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +missing_rate: 0.4653 +redundant_rate: 0.6684 +match_cost: 0.4851 +semantic_score: 0.5699 +mean_error_rate: 1.9518 +mean_error_rate_v2: 0.9394 +jaccard_index: 0.2573 +jaccard_index_v2: 0.0606 +AP: 0.0781 +AR: 0.1259 +Avg-F1: 0.0964 +num_matched: 2574.0000 +num_missed: 2240.0000 +num_redundant: 5188.0000 +num_correct_5: 1687.0000 +Bleu_1: 0.3517 +Bleu_1_w: 0.0905 +Bleu_2: 0.2362 +Bleu_2_w: 0.0608 +Bleu_3: 0.1698 +Bleu_3_w: 0.0437 +Bleu_4: 0.1282 +Bleu_4_w: 0.0330 +CIDEr: 0.7115 +CIDEr_w: 0.1831 +METEOR: 0.1725 +METEOR_w: 0.0444 + +Evaluation datasets: +* holoassist/dialog_val | num samples: 291 + +Updating eval setup: inference_runner_type: None -> stream +ER@0.50: 1.404 (S=4285, C=6927, M=4049, R=13086) +ER@0.55: 1.467 (S=5258, C=5954, M=4049, R=13086) +ER@0.60: 1.532 (S=6241, C=4971, M=4049, R=13086) +ER@0.65: 1.601 (S=7305, C=3907, M=4049, R=13086) +ER@0.70: 1.665 (S=8278, C=2934, M=4049, R=13086) +ER@0.75: 1.726 (S=9211, C=2001, M=4049, R=13086) +ER@0.80: 1.777 (S=9989, C=1223, M=4049, R=13086) +ER@0.85: 1.814 (S=10554, C=658, M=4049, R=13086) +ER@0.90: 1.840 (S=10941, C=271, M=4049, R=13086) +ER@0.95: 1.851 (S=11117, C=95, M=4049, R=13086) +ER@1.00: 1.857 (S=11209, C=3, M=4049, R=13086) +Evalulation: holoassist-dialog_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +missing_rate: 0.2653 +redundant_rate: 0.5386 +match_cost: 0.4802 +semantic_score: 0.5589 +mean_error_rate: 1.6851 +mean_error_rate_v2: 0.9072 +jaccard_index: 0.3955 +jaccard_index_v2: 0.0928 +AP: 0.1083 +AR: 0.1724 +Avg-F1: 0.1330 +num_matched: 11212.0000 +num_missed: 4049.0000 +num_redundant: 13086.0000 +num_correct_5: 6927.0000 +Bleu_1: 0.3981 +Bleu_1_w: 0.1575 +Bleu_2: 0.2759 +Bleu_2_w: 0.1091 +Bleu_3: 0.2042 +Bleu_3_w: 0.0808 +Bleu_4: 0.1561 +Bleu_4_w: 0.0617 +CIDEr: 0.9008 +CIDEr_w: 0.3563 +METEOR: 0.1892 +METEOR_w: 0.0748 + +Evaluation datasets: +* epickitchens/dialog_val | num samples: 150 + +Updating eval setup: inference_runner_type: None -> stream +ER@0.50: 1.540 (S=2377, C=2349, M=1706, R=5824) +ER@0.55: 1.618 (S=2874, C=1852, M=1706, R=5824) +ER@0.60: 1.695 (S=3370, C=1356, M=1706, R=5824) +ER@0.65: 1.755 (S=3761, C=965, M=1706, R=5824) +ER@0.70: 1.805 (S=4080, C=646, M=1706, R=5824) +ER@0.75: 1.840 (S=4304, C=422, M=1706, R=5824) +ER@0.80: 1.867 (S=4476, C=250, M=1706, R=5824) +ER@0.85: 1.886 (S=4602, C=124, M=1706, R=5824) +ER@0.90: 1.896 (S=4666, C=60, M=1706, R=5824) +ER@0.95: 1.903 (S=4709, C=17, M=1706, R=5824) +ER@1.00: 1.905 (S=4725, C=1, M=1706, R=5824) +Evalulation: epickitchens-dialog_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +missing_rate: 0.2652 +redundant_rate: 0.5520 +match_cost: 0.5525 +semantic_score: 0.5049 +mean_error_rate: 1.7918 +mean_error_rate_v2: 0.9403 +jaccard_index: 0.3856 +jaccard_index_v2: 0.0597 +AP: 0.0693 +AR: 0.1137 +Avg-F1: 0.0861 +num_matched: 4726.0000 +num_missed: 1706.0000 +num_redundant: 5824.0000 +num_correct_5: 2349.0000 +Bleu_1: 0.3553 +Bleu_1_w: 0.1370 +Bleu_2: 0.2323 +Bleu_2_w: 0.0896 +Bleu_3: 0.1632 +Bleu_3_w: 0.0629 +Bleu_4: 0.1211 +Bleu_4_w: 0.0467 +CIDEr: 0.7920 +CIDEr_w: 0.3054 +METEOR: 0.1752 +METEOR_w: 0.0675 + +Evaluation datasets: +* egoexolearn/dialog_val | num samples: 123 + +Updating eval setup: inference_runner_type: None -> stream +ER@0.50: 1.734 (S=3194, C=3736, M=5061, R=12537) +ER@0.55: 1.798 (S=3960, C=2970, M=5061, R=12537) +ER@0.60: 1.857 (S=4675, C=2255, M=5061, R=12537) +ER@0.65: 1.909 (S=5292, C=1638, M=5061, R=12537) +ER@0.70: 1.950 (S=5781, C=1149, M=5061, R=12537) +ER@0.75: 1.981 (S=6158, C=772, M=5061, R=12537) +ER@0.80: 2.007 (S=6467, C=463, M=5061, R=12537) +ER@0.85: 2.024 (S=6673, C=257, M=5061, R=12537) +ER@0.90: 2.036 (S=6818, C=112, M=5061, R=12537) +ER@0.95: 2.043 (S=6897, C=33, M=5061, R=12537) +ER@1.00: 2.045 (S=6929, C=1, M=5061, R=12537) +Evalulation: egoexolearn-dialog_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +missing_rate: 0.4221 +redundant_rate: 0.6440 +match_cost: 0.5339 +semantic_score: 0.5291 +mean_error_rate: 1.9440 +mean_error_rate_v2: 0.9504 +jaccard_index: 0.2825 +jaccard_index_v2: 0.0496 +AP: 0.0625 +AR: 0.1015 +Avg-F1: 0.0774 +num_matched: 6930.0000 +num_missed: 5061.0000 +num_redundant: 12537.0000 +num_correct_5: 3736.0000 +Bleu_1: 0.3791 +Bleu_1_w: 0.1071 +Bleu_2: 0.2509 +Bleu_2_w: 0.0709 +Bleu_3: 0.1770 +Bleu_3_w: 0.0500 +Bleu_4: 0.1306 +Bleu_4_w: 0.0369 +CIDEr: 0.6850 +CIDEr_w: 0.1935 +METEOR: 0.1682 +METEOR_w: 0.0475 + +Evaluation datasets: +* wtag/dialog_val | num samples: 21 + +Updating eval setup: inference_runner_type: None -> stream +ER@0.50: 1.171 (S=409, C=307, M=357, R=491) +ER@0.55: 1.212 (S=453, C=263, M=357, R=491) +ER@0.60: 1.255 (S=499, C=217, M=357, R=491) +ER@0.65: 1.308 (S=556, C=160, M=357, R=491) +ER@0.70: 1.345 (S=595, C=121, M=357, R=491) +ER@0.75: 1.387 (S=640, C=76, M=357, R=491) +ER@0.80: 1.409 (S=664, C=52, M=357, R=491) +ER@0.85: 1.429 (S=685, C=31, M=357, R=491) +ER@0.90: 1.436 (S=693, C=23, M=357, R=491) +ER@0.95: 1.447 (S=705, C=11, M=357, R=491) +ER@1.00: 1.458 (S=716, C=0, M=357, R=491) +Evalulation: wtag-dialog_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +missing_rate: 0.3327 +redundant_rate: 0.4068 +match_cost: 0.5748 +semantic_score: 0.4648 +mean_error_rate: 1.3508 +mean_error_rate_v2: 0.9267 +jaccard_index: 0.4578 +jaccard_index_v2: 0.0733 +AP: 0.0950 +AR: 0.1068 +Avg-F1: 0.1006 +num_matched: 716.0000 +num_missed: 357.0000 +num_redundant: 491.0000 +num_correct_5: 307.0000 +Bleu_1: 0.3541 +Bleu_1_w: 0.1621 +Bleu_2: 0.2480 +Bleu_2_w: 0.1136 +Bleu_3: 0.1815 +Bleu_3_w: 0.0831 +Bleu_4: 0.1368 +Bleu_4_w: 0.0626 +CIDEr: 0.9979 +CIDEr_w: 0.4568 +METEOR: 0.1988 +METEOR_w: 0.0910 + +Evaluation datasets: +* assembly101/dialog_val | num samples: 336 + +Updating eval setup: inference_runner_type: None -> stream +ER@0.50: 1.394 (S=2962, C=3604, M=1752, R=6883) +ER@0.55: 1.483 (S=3698, C=2868, M=1752, R=6883) +ER@0.60: 1.557 (S=4313, C=2253, M=1752, R=6883) +ER@0.65: 1.624 (S=4871, C=1695, M=1752, R=6883) +ER@0.70: 1.679 (S=5331, C=1235, M=1752, R=6883) +ER@0.75: 1.722 (S=5692, C=874, M=1752, R=6883) +ER@0.80: 1.757 (S=5980, C=586, M=1752, R=6883) +ER@0.85: 1.785 (S=6216, C=350, M=1752, R=6883) +ER@0.90: 1.808 (S=6403, C=163, M=1752, R=6883) +ER@0.95: 1.820 (S=6507, C=59, M=1752, R=6883) +ER@1.00: 1.827 (S=6560, C=6, M=1752, R=6883) +Evalulation: assembly101-dialog_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +missing_rate: 0.2106 +redundant_rate: 0.5118 +match_cost: 0.5226 +semantic_score: 0.5341 +mean_error_rate: 1.6778 +mean_error_rate_v2: 0.9181 +jaccard_index: 0.4319 +jaccard_index_v2: 0.0819 +AP: 0.0926 +AR: 0.1497 +Avg-F1: 0.1144 +num_matched: 6566.0000 +num_missed: 1752.0000 +num_redundant: 6883.0000 +num_correct_5: 3604.0000 +Bleu_1: 0.3910 +Bleu_1_w: 0.1689 +Bleu_2: 0.2759 +Bleu_2_w: 0.1192 +Bleu_3: 0.2045 +Bleu_3_w: 0.0883 +Bleu_4: 0.1583 +Bleu_4_w: 0.0684 +CIDEr: 0.7943 +CIDEr_w: 0.3431 +METEOR: 0.1861 +METEOR_w: 0.0804 + +All Finished! Time: 11.55 minutes +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +Runs: +ego4d/dialog_val_L0_I10|stream|4k|0.05|summarize_and_drop +ego4d/dialog_val_L0_I10|stream|4k|0.5|summarize_and_drop +holoassist/dialog_val_L0_I10|stream|4k|0.5|summarize_and_drop +epickitchens/dialog_val_L0_I10|stream|4k|0.5|summarize_and_drop +egoexolearn/dialog_val_L0_I10|stream|4k|0.5|summarize_and_drop +wtag/dialog_val_L0_I10|stream|4k|0.5|summarize_and_drop +assembly101/dialog_val_L0_I10|stream|4k|0.5|summarize_and_drop +Traceback (most recent call last): + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 144, in + main(eval_args, slurm_args) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 133, in main + job.results() # wait for the job to finish + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in results + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 266, in result + r = self.results() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 289, in results + outcome, result = self._get_outcome_and_result() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 384, in _get_outcome_and_result + raise utils.UncompletedJobError("\n".join(message)) +submitit.core.utils.UncompletedJobError: Job 14356 (task: 0) with path /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/14356/14356_0_result.pkl +has not produced any output (state: CANCELLED by 636977) +No output/error stream produced ! Check: /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/14356/14356_0_log.out +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +{'assembly101/dialog-klg_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.5}]}, + 'ego4d/dialog-klg_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.5}]}, + 'ego4d/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.05}]}, + 'egoexolearn/dialog-klg_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.5}]}, + 'epickitchens/dialog-klg_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.5}]}, + 'holoassist/dialog-klg_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.5}]}, + 'wtag/dialog-klg_val_L0_I10': {'stream': [{'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}, + {'context_handling_method': 'summarize_and_drop', + 'eval_max_seq_len': 4096, + 'eval_max_seq_len_str': '4k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.5}]}} +Evaluation datasets: +* ego4d/dialog_val | num samples: 96 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.05 +Evalulation: ego4d-dialog_val_L0_I10/stream/notalk0.05-maxlen_4k +Metrics: +jaccard_index: 0.1525 +missing_rate: 0.8031 +redundant_rate: 0.1358 +semantic_score: 0.7066 +time_diff: 0.1618 +precision: 0.6901 +recall: 0.1572 +F1: 0.2561 +num_matched: 757.0000 +num_mismatched: 191.0000 +num_missed: 3866.0000 +num_redundant: 149.0000 +Bleu_1: 0.4054 +Bleu_1_w: 0.0618 +Bleu_2: 0.3015 +Bleu_2_w: 0.0460 +Bleu_3: 0.2369 +Bleu_3_w: 0.0361 +Bleu_4: 0.1922 +Bleu_4_w: 0.0293 +CIDEr: 1.1670 +CIDEr_w: 0.1780 +METEOR: 0.2061 +METEOR_w: 0.0314 + +Evaluation datasets: +* ego4d/dialog-klg_val | num samples: 96 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +Evalulation: ego4d-dialog-klg_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +jaccard_index: 0.2372 +missing_rate: 0.5825 +redundant_rate: 0.4179 +semantic_score: 0.7012 +time_diff: 1.4353 +precision: 0.4298 +recall: 0.3083 +F1: 0.3590 +num_matched: 1484.0000 +num_mismatched: 526.0000 +num_missed: 2804.0000 +num_redundant: 1443.0000 +Bleu_1: 0.4006 +Bleu_1_w: 0.0950 +Bleu_2: 0.2831 +Bleu_2_w: 0.0671 +Bleu_3: 0.2126 +Bleu_3_w: 0.0504 +Bleu_4: 0.1669 +Bleu_4_w: 0.0396 +CIDEr: 0.9483 +CIDEr_w: 0.2249 +METEOR: 0.1914 +METEOR_w: 0.0454 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +Evalulation: ego4d-dialog-klg_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +jaccard_index: 0.2593 +missing_rate: 0.4703 +redundant_rate: 0.4871 +semantic_score: 0.6905 +time_diff: 1.6317 +precision: 0.3773 +recall: 0.3897 +F1: 0.3834 +num_matched: 1876.0000 +num_mismatched: 674.0000 +num_missed: 2264.0000 +num_redundant: 2422.0000 +Bleu_1: 0.3825 +Bleu_1_w: 0.0992 +Bleu_2: 0.2616 +Bleu_2_w: 0.0678 +Bleu_3: 0.1904 +Bleu_3_w: 0.0494 +Bleu_4: 0.1449 +Bleu_4_w: 0.0376 +CIDEr: 0.8305 +CIDEr_w: 0.2153 +METEOR: 0.1791 +METEOR_w: 0.0464 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +Evalulation: ego4d-dialog-klg_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +jaccard_index: 0.2494 +missing_rate: 0.3752 +redundant_rate: 0.5589 +semantic_score: 0.6898 +time_diff: 1.6627 +precision: 0.3154 +recall: 0.4468 +F1: 0.3698 +num_matched: 2151.0000 +num_mismatched: 857.0000 +num_missed: 1806.0000 +num_redundant: 3811.0000 +Bleu_1: 0.3782 +Bleu_1_w: 0.0943 +Bleu_2: 0.2583 +Bleu_2_w: 0.0644 +Bleu_3: 0.1866 +Bleu_3_w: 0.0465 +Bleu_4: 0.1415 +Bleu_4_w: 0.0353 +CIDEr: 0.7857 +CIDEr_w: 0.1959 +METEOR: 0.1773 +METEOR_w: 0.0442 + +Updating eval setup: not_talk_threshold: 0.4 -> 0.5 +Evalulation: ego4d-dialog-klg_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +jaccard_index: 0.2068 +missing_rate: 0.2713 +redundant_rate: 0.6746 +semantic_score: 0.6920 +time_diff: 1.6411 +precision: 0.2318 +recall: 0.5191 +F1: 0.3205 +num_matched: 2499.0000 +num_mismatched: 1009.0000 +num_missed: 1306.0000 +num_redundant: 7273.0000 +Bleu_1: 0.3764 +Bleu_1_w: 0.0778 +Bleu_2: 0.2569 +Bleu_2_w: 0.0531 +Bleu_3: 0.1855 +Bleu_3_w: 0.0383 +Bleu_4: 0.1405 +Bleu_4_w: 0.0290 +CIDEr: 0.8067 +CIDEr_w: 0.1668 +METEOR: 0.1762 +METEOR_w: 0.0364 + +Evaluation datasets: +* holoassist/dialog-klg_val | num samples: 291 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +Evalulation: holoassist-dialog-klg_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +jaccard_index: 0.2401 +missing_rate: 0.6556 +redundant_rate: 0.1063 +semantic_score: 0.7072 +time_diff: 0.2639 +precision: 0.6485 +recall: 0.2499 +F1: 0.3608 +num_matched: 3814.0000 +num_mismatched: 1442.0000 +num_missed: 10005.0000 +num_redundant: 625.0000 +Bleu_1: 0.4497 +Bleu_1_w: 0.1080 +Bleu_2: 0.3331 +Bleu_2_w: 0.0800 +Bleu_3: 0.2596 +Bleu_3_w: 0.0623 +Bleu_4: 0.2075 +Bleu_4_w: 0.0498 +CIDEr: 1.2794 +CIDEr_w: 0.3072 +METEOR: 0.2181 +METEOR_w: 0.0524 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +Evalulation: holoassist-dialog-klg_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +jaccard_index: 0.2856 +missing_rate: 0.5639 +redundant_rate: 0.1503 +semantic_score: 0.7084 +time_diff: 0.3184 +precision: 0.5993 +recall: 0.3076 +F1: 0.4065 +num_matched: 4694.0000 +num_mismatched: 1962.0000 +num_missed: 8605.0000 +num_redundant: 1177.0000 +Bleu_1: 0.4467 +Bleu_1_w: 0.1275 +Bleu_2: 0.3288 +Bleu_2_w: 0.0939 +Bleu_3: 0.2549 +Bleu_3_w: 0.0728 +Bleu_4: 0.2027 +Bleu_4_w: 0.0579 +CIDEr: 1.2508 +CIDEr_w: 0.3572 +METEOR: 0.2162 +METEOR_w: 0.0618 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +Evalulation: holoassist-dialog-klg_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +jaccard_index: 0.3081 +missing_rate: 0.4152 +redundant_rate: 0.3046 +semantic_score: 0.7003 +time_diff: 0.4211 +precision: 0.4601 +recall: 0.3870 +F1: 0.4204 +num_matched: 5906.0000 +num_mismatched: 3019.0000 +num_missed: 6336.0000 +num_redundant: 3910.0000 +Bleu_1: 0.4307 +Bleu_1_w: 0.1327 +Bleu_2: 0.3123 +Bleu_2_w: 0.0962 +Bleu_3: 0.2399 +Bleu_3_w: 0.0739 +Bleu_4: 0.1894 +Bleu_4_w: 0.0583 +CIDEr: 1.1535 +CIDEr_w: 0.3554 +METEOR: 0.2069 +METEOR_w: 0.0638 + +Updating eval setup: not_talk_threshold: 0.4 -> 0.5 +Evalulation: holoassist-dialog-klg_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +jaccard_index: 0.2242 +missing_rate: 0.2352 +redundant_rate: 0.5908 +semantic_score: 0.6887 +time_diff: 0.4799 +precision: 0.2524 +recall: 0.4717 +F1: 0.3289 +num_matched: 7199.0000 +num_mismatched: 4472.0000 +num_missed: 3590.0000 +num_redundant: 16848.0000 +Bleu_1: 0.4083 +Bleu_1_w: 0.0916 +Bleu_2: 0.2891 +Bleu_2_w: 0.0648 +Bleu_3: 0.2179 +Bleu_3_w: 0.0489 +Bleu_4: 0.1687 +Bleu_4_w: 0.0378 +CIDEr: 0.9804 +CIDEr_w: 0.2198 +METEOR: 0.1935 +METEOR_w: 0.0434 + +Evaluation datasets: +* epickitchens/dialog-klg_val | num samples: 150 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +Evalulation: epickitchens-dialog-klg_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +jaccard_index: 0.2231 +missing_rate: 0.5463 +redundant_rate: 0.3338 +semantic_score: 0.6717 +time_diff: 0.5417 +precision: 0.4021 +recall: 0.2738 +F1: 0.3257 +num_matched: 1761.0000 +num_mismatched: 1157.0000 +num_missed: 3514.0000 +num_redundant: 1462.0000 +Bleu_1: 0.3952 +Bleu_1_w: 0.0882 +Bleu_2: 0.2765 +Bleu_2_w: 0.0617 +Bleu_3: 0.2043 +Bleu_3_w: 0.0456 +Bleu_4: 0.1577 +Bleu_4_w: 0.0352 +CIDEr: 1.0958 +CIDEr_w: 0.2444 +METEOR: 0.1939 +METEOR_w: 0.0433 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +Evalulation: epickitchens-dialog-klg_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +jaccard_index: 0.2339 +missing_rate: 0.4400 +redundant_rate: 0.4010 +semantic_score: 0.6655 +time_diff: 0.6095 +precision: 0.3439 +recall: 0.3215 +F1: 0.3323 +num_matched: 2068.0000 +num_mismatched: 1534.0000 +num_missed: 2830.0000 +num_redundant: 2411.0000 +Bleu_1: 0.3857 +Bleu_1_w: 0.0902 +Bleu_2: 0.2652 +Bleu_2_w: 0.0620 +Bleu_3: 0.1936 +Bleu_3_w: 0.0453 +Bleu_4: 0.1480 +Bleu_4_w: 0.0346 +CIDEr: 1.0297 +CIDEr_w: 0.2408 +METEOR: 0.1902 +METEOR_w: 0.0445 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +Evalulation: epickitchens-dialog-klg_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +jaccard_index: 0.2174 +missing_rate: 0.3392 +redundant_rate: 0.4994 +semantic_score: 0.6623 +time_diff: 0.6207 +precision: 0.2733 +recall: 0.3607 +F1: 0.3110 +num_matched: 2320.0000 +num_mismatched: 1930.0000 +num_missed: 2182.0000 +num_redundant: 4240.0000 +Bleu_1: 0.3760 +Bleu_1_w: 0.0817 +Bleu_2: 0.2538 +Bleu_2_w: 0.0552 +Bleu_3: 0.1819 +Bleu_3_w: 0.0395 +Bleu_4: 0.1370 +Bleu_4_w: 0.0298 +CIDEr: 0.9654 +CIDEr_w: 0.2099 +METEOR: 0.1854 +METEOR_w: 0.0403 + +Updating eval setup: not_talk_threshold: 0.4 -> 0.5 +Evalulation: epickitchens-dialog-klg_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +jaccard_index: 0.1807 +missing_rate: 0.2418 +redundant_rate: 0.6272 +semantic_score: 0.6557 +time_diff: 0.6182 +precision: 0.2021 +recall: 0.4111 +F1: 0.2710 +num_matched: 2644.0000 +num_mismatched: 2233.0000 +num_missed: 1555.0000 +num_redundant: 8204.0000 +Bleu_1: 0.3654 +Bleu_1_w: 0.0660 +Bleu_2: 0.2422 +Bleu_2_w: 0.0438 +Bleu_3: 0.1712 +Bleu_3_w: 0.0309 +Bleu_4: 0.1276 +Bleu_4_w: 0.0231 +CIDEr: 0.9017 +CIDEr_w: 0.1629 +METEOR: 0.1772 +METEOR_w: 0.0320 + +Evaluation datasets: +* egoexolearn/dialog-klg_val | num samples: 123 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +Evalulation: egoexolearn-dialog-klg_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +jaccard_index: 0.1475 +missing_rate: 0.7452 +redundant_rate: 0.3679 +semantic_score: 0.6786 +time_diff: 0.4626 +precision: 0.4202 +recall: 0.1694 +F1: 0.2414 +num_matched: 2031.0000 +num_mismatched: 1024.0000 +num_missed: 8936.0000 +num_redundant: 1778.0000 +Bleu_1: 0.4156 +Bleu_1_w: 0.0613 +Bleu_2: 0.2908 +Bleu_2_w: 0.0429 +Bleu_3: 0.2165 +Bleu_3_w: 0.0319 +Bleu_4: 0.1676 +Bleu_4_w: 0.0247 +CIDEr: 0.9464 +CIDEr_w: 0.1396 +METEOR: 0.1912 +METEOR_w: 0.0282 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +Evalulation: egoexolearn-dialog-klg_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +jaccard_index: 0.1675 +missing_rate: 0.6552 +redundant_rate: 0.4350 +semantic_score: 0.6711 +time_diff: 0.6654 +precision: 0.3474 +recall: 0.2120 +F1: 0.2633 +num_matched: 2542.0000 +num_mismatched: 1593.0000 +num_missed: 7856.0000 +num_redundant: 3183.0000 +Bleu_1: 0.4045 +Bleu_1_w: 0.0678 +Bleu_2: 0.2780 +Bleu_2_w: 0.0466 +Bleu_3: 0.2029 +Bleu_3_w: 0.0340 +Bleu_4: 0.1540 +Bleu_4_w: 0.0258 +CIDEr: 0.8411 +CIDEr_w: 0.1409 +METEOR: 0.1841 +METEOR_w: 0.0308 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +Evalulation: egoexolearn-dialog-klg_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +jaccard_index: 0.1726 +missing_rate: 0.4949 +redundant_rate: 0.5574 +semantic_score: 0.6555 +time_diff: 0.7835 +precision: 0.2474 +recall: 0.2824 +F1: 0.2637 +num_matched: 3386.0000 +num_mismatched: 2671.0000 +num_missed: 5934.0000 +num_redundant: 7629.0000 +Bleu_1: 0.3833 +Bleu_1_w: 0.0661 +Bleu_2: 0.2566 +Bleu_2_w: 0.0443 +Bleu_3: 0.1823 +Bleu_3_w: 0.0315 +Bleu_4: 0.1352 +Bleu_4_w: 0.0233 +CIDEr: 0.7185 +CIDEr_w: 0.1240 +METEOR: 0.1707 +METEOR_w: 0.0295 + +Updating eval setup: not_talk_threshold: 0.4 -> 0.5 +Evalulation: egoexolearn-dialog-klg_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +jaccard_index: 0.1458 +missing_rate: 0.3311 +redundant_rate: 0.6993 +semantic_score: 0.6466 +time_diff: 0.8158 +precision: 0.1675 +recall: 0.3726 +F1: 0.2311 +num_matched: 4468.0000 +num_mismatched: 3553.0000 +num_missed: 3970.0000 +num_redundant: 18656.0000 +Bleu_1: 0.3690 +Bleu_1_w: 0.0538 +Bleu_2: 0.2378 +Bleu_2_w: 0.0347 +Bleu_3: 0.1627 +Bleu_3_w: 0.0237 +Bleu_4: 0.1171 +Bleu_4_w: 0.0171 +CIDEr: 0.6205 +CIDEr_w: 0.0905 +METEOR: 0.1609 +METEOR_w: 0.0235 + +Evaluation datasets: +* wtag/dialog-klg_val | num samples: 21 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +Evalulation: wtag-dialog-klg_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +jaccard_index: 0.1995 +missing_rate: 0.5797 +redundant_rate: 0.1586 +semantic_score: 0.6858 +time_diff: 1.0368 +precision: 0.4310 +recall: 0.2153 +F1: 0.2871 +num_matched: 231.0000 +num_mismatched: 220.0000 +num_missed: 622.0000 +num_redundant: 85.0000 +Bleu_1: 0.4241 +Bleu_1_w: 0.0846 +Bleu_2: 0.3137 +Bleu_2_w: 0.0626 +Bleu_3: 0.2420 +Bleu_3_w: 0.0483 +Bleu_4: 0.1929 +Bleu_4_w: 0.0385 +CIDEr: 1.3310 +CIDEr_w: 0.2655 +METEOR: 0.2173 +METEOR_w: 0.0433 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +Evalulation: wtag-dialog-klg_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +jaccard_index: 0.2289 +missing_rate: 0.5126 +redundant_rate: 0.2182 +semantic_score: 0.7003 +time_diff: 1.2849 +precision: 0.4170 +recall: 0.2600 +F1: 0.3203 +num_matched: 279.0000 +num_mismatched: 244.0000 +num_missed: 550.0000 +num_redundant: 146.0000 +Bleu_1: 0.4273 +Bleu_1_w: 0.0978 +Bleu_2: 0.3186 +Bleu_2_w: 0.0729 +Bleu_3: 0.2478 +Bleu_3_w: 0.0567 +Bleu_4: 0.1991 +Bleu_4_w: 0.0456 +CIDEr: 1.4870 +CIDEr_w: 0.3403 +METEOR: 0.2197 +METEOR_w: 0.0503 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +Evalulation: wtag-dialog-klg_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +jaccard_index: 0.2426 +missing_rate: 0.4101 +redundant_rate: 0.2482 +semantic_score: 0.6882 +time_diff: 1.1640 +precision: 0.3694 +recall: 0.2898 +F1: 0.3248 +num_matched: 311.0000 +num_mismatched: 322.0000 +num_missed: 440.0000 +num_redundant: 209.0000 +Bleu_1: 0.4172 +Bleu_1_w: 0.1012 +Bleu_2: 0.3102 +Bleu_2_w: 0.0752 +Bleu_3: 0.2410 +Bleu_3_w: 0.0585 +Bleu_4: 0.1934 +Bleu_4_w: 0.0469 +CIDEr: 1.3370 +CIDEr_w: 0.3243 +METEOR: 0.2097 +METEOR_w: 0.0509 + +Updating eval setup: not_talk_threshold: 0.4 -> 0.5 +Evalulation: wtag-dialog-klg_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +jaccard_index: 0.2348 +missing_rate: 0.3793 +redundant_rate: 0.3033 +semantic_score: 0.6903 +time_diff: 0.9484 +precision: 0.3347 +recall: 0.2982 +F1: 0.3154 +num_matched: 320.0000 +num_mismatched: 346.0000 +num_missed: 407.0000 +num_redundant: 290.0000 +Bleu_1: 0.4214 +Bleu_1_w: 0.0989 +Bleu_2: 0.3118 +Bleu_2_w: 0.0732 +Bleu_3: 0.2368 +Bleu_3_w: 0.0556 +Bleu_4: 0.1839 +Bleu_4_w: 0.0432 +CIDEr: 1.2866 +CIDEr_w: 0.3021 +METEOR: 0.2111 +METEOR_w: 0.0496 + +Evaluation datasets: +* assembly101/dialog-klg_val | num samples: 336 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +Evalulation: assembly101-dialog-klg_val_L0_I10/stream/notalk0.2-maxlen_4k +Metrics: +jaccard_index: 0.3023 +missing_rate: 0.5042 +redundant_rate: 0.2580 +semantic_score: 0.7120 +time_diff: 0.6545 +precision: 0.5304 +recall: 0.3544 +F1: 0.4249 +num_matched: 2948.0000 +num_mismatched: 1176.0000 +num_missed: 4194.0000 +num_redundant: 1434.0000 +Bleu_1: 0.4448 +Bleu_1_w: 0.1345 +Bleu_2: 0.3359 +Bleu_2_w: 0.1015 +Bleu_3: 0.2638 +Bleu_3_w: 0.0797 +Bleu_4: 0.2141 +Bleu_4_w: 0.0647 +CIDEr: 1.1841 +CIDEr_w: 0.3580 +METEOR: 0.2179 +METEOR_w: 0.0659 + +Updating eval setup: not_talk_threshold: 0.2 -> 0.3 +Evalulation: assembly101-dialog-klg_val_L0_I10/stream/notalk0.3-maxlen_4k +Metrics: +jaccard_index: 0.3249 +missing_rate: 0.3729 +redundant_rate: 0.3298 +semantic_score: 0.7075 +time_diff: 0.7448 +precision: 0.4545 +recall: 0.4252 +F1: 0.4394 +num_matched: 3537.0000 +num_mismatched: 1679.0000 +num_missed: 3102.0000 +num_redundant: 2567.0000 +Bleu_1: 0.4329 +Bleu_1_w: 0.1407 +Bleu_2: 0.3224 +Bleu_2_w: 0.1048 +Bleu_3: 0.2507 +Bleu_3_w: 0.0815 +Bleu_4: 0.2020 +Bleu_4_w: 0.0656 +CIDEr: 1.0949 +CIDEr_w: 0.3558 +METEOR: 0.2097 +METEOR_w: 0.0681 + +Updating eval setup: not_talk_threshold: 0.3 -> 0.4 +Evalulation: assembly101-dialog-klg_val_L0_I10/stream/notalk0.4-maxlen_4k +Metrics: +jaccard_index: 0.3096 +missing_rate: 0.2691 +redundant_rate: 0.4068 +semantic_score: 0.6989 +time_diff: 0.8355 +precision: 0.3772 +recall: 0.4648 +F1: 0.4164 +num_matched: 3866.0000 +num_mismatched: 2214.0000 +num_missed: 2238.0000 +num_redundant: 4170.0000 +Bleu_1: 0.4176 +Bleu_1_w: 0.1293 +Bleu_2: 0.3049 +Bleu_2_w: 0.0944 +Bleu_3: 0.2324 +Bleu_3_w: 0.0719 +Bleu_4: 0.1842 +Bleu_4_w: 0.0570 +CIDEr: 1.0039 +CIDEr_w: 0.3108 +METEOR: 0.2002 +METEOR_w: 0.0620 + +Updating eval setup: not_talk_threshold: 0.4 -> 0.5 +Evalulation: assembly101-dialog-klg_val_L0_I10/stream/notalk0.5-maxlen_4k +Metrics: +jaccard_index: 0.2635 +missing_rate: 0.1862 +redundant_rate: 0.5217 +semantic_score: 0.6895 +time_diff: 0.8252 +precision: 0.2924 +recall: 0.4975 +F1: 0.3683 +num_matched: 4138.0000 +num_mismatched: 2631.0000 +num_missed: 1549.0000 +num_redundant: 7384.0000 +Bleu_1: 0.4049 +Bleu_1_w: 0.1067 +Bleu_2: 0.2918 +Bleu_2_w: 0.0769 +Bleu_3: 0.2195 +Bleu_3_w: 0.0579 +Bleu_4: 0.1724 +Bleu_4_w: 0.0454 +CIDEr: 0.9029 +CIDEr_w: 0.2379 +METEOR: 0.1914 +METEOR_w: 0.0504 + +All Finished! Time: 185.48 minutes +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +Runs: +ego4d/dialog_val_L0_I10|stream|4k|0.05|summarize_and_drop +ego4d/dialog-klg_val_L0_I10|stream|4k|0.2|summarize_and_drop +holoassist/dialog-klg_val_L0_I10|stream|4k|0.2|summarize_and_drop +epickitchens/dialog-klg_val_L0_I10|stream|4k|0.2|summarize_and_drop +egoexolearn/dialog-klg_val_L0_I10|stream|4k|0.2|summarize_and_drop +wtag/dialog-klg_val_L0_I10|stream|4k|0.2|summarize_and_drop +assembly101/dialog-klg_val_L0_I10|stream|4k|0.2|summarize_and_drop +ego4d/dialog-klg_val_L0_I10|stream|4k|0.3|summarize_and_drop +holoassist/dialog-klg_val_L0_I10|stream|4k|0.3|summarize_and_drop +epickitchens/dialog-klg_val_L0_I10|stream|4k|0.3|summarize_and_drop +egoexolearn/dialog-klg_val_L0_I10|stream|4k|0.3|summarize_and_drop +wtag/dialog-klg_val_L0_I10|stream|4k|0.3|summarize_and_drop +assembly101/dialog-klg_val_L0_I10|stream|4k|0.3|summarize_and_drop +ego4d/dialog-klg_val_L0_I10|stream|4k|0.4|summarize_and_drop +holoassist/dialog-klg_val_L0_I10|stream|4k|0.4|summarize_and_drop +epickitchens/dialog-klg_val_L0_I10|stream|4k|0.4|summarize_and_drop +egoexolearn/dialog-klg_val_L0_I10|stream|4k|0.4|summarize_and_drop +wtag/dialog-klg_val_L0_I10|stream|4k|0.4|summarize_and_drop +assembly101/dialog-klg_val_L0_I10|stream|4k|0.4|summarize_and_drop +ego4d/dialog-klg_val_L0_I10|stream|4k|0.5|summarize_and_drop +holoassist/dialog-klg_val_L0_I10|stream|4k|0.5|summarize_and_drop +epickitchens/dialog-klg_val_L0_I10|stream|4k|0.5|summarize_and_drop +egoexolearn/dialog-klg_val_L0_I10|stream|4k|0.5|summarize_and_drop +wtag/dialog-klg_val_L0_I10|stream|4k|0.5|summarize_and_drop +assembly101/dialog-klg_val_L0_I10|stream|4k|0.5|summarize_and_drop +Traceback (most recent call last): + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 170, in + main(eval_args, slurm_args) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 159, in main + job.results() # wait for the job to finish + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in results + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 266, in result + r = self.results() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 289, in results + outcome, result = self._get_outcome_and_result() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 384, in _get_outcome_and_result + raise utils.UncompletedJobError("\n".join(message)) +submitit.core.utils.UncompletedJobError: Job 14439 (task: 0) with path /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/14439/14439_0_result.pkl +has not produced any output (state: NODE_FAIL) +Error stream produced: +---------------------------------------- +slurmstepd: error: *** JOB 14439 ON h100-st-p548xlarge-2 CANCELLED AT 2024-08-22T21:55:20 DUE TO NODE FAILURE, SEE SLURMCTLD LOG FOR DETAILS *** + +Traceback (most recent call last): + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 166, in + eval_args, slurm_args = parser.parse_args_into_dataclasses() + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 155, in main + job = executor.submit(run_eval, eval_args, "slurm_inference", verbose=True) + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in results + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 266, in result + r = self.results() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 289, in results + outcome, result = self._get_outcome_and_result() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 384, in _get_outcome_and_result + raise utils.UncompletedJobError("\n".join(message)) +submitit.core.utils.UncompletedJobError: Job 14647 (task: 0) with path /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/14647/14647_0_result.pkl +has not produced any output (state: CANCELLED by 656171) +Error stream produced: +---------------------------------------- +slurmstepd: error: *** JOB 14647 ON h100-st-p548xlarge-129 CANCELLED AT 2024-08-23T04:29:36 DUE TO NODE FAILURE, SEE SLURMCTLD LOG FOR DETAILS *** + +Traceback (most recent call last): + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 167, in + main(eval_args, slurm_args) + File "/opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/mmassist/eval/eval.py", line 156, in main + job.results() # wait for the job to finish + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in results + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 287, in + return [tp.cast(R, sub_job.result()) for sub_job in self._sub_jobs] + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 266, in result + r = self.results() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 289, in results + outcome, result = self._get_outcome_and_result() + File "/data/home/imzyc/miniconda3/envs/mm/lib/python3.10/site-packages/submitit/core/core.py", line 384, in _get_outcome_and_result + raise utils.UncompletedJobError("\n".join(message)) +submitit.core.utils.UncompletedJobError: Job 14655 (task: 0) with path /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/14655/14655_0_result.pkl +has not produced any output (state: NODE_FAIL) +No output/error stream produced ! Check: /opt/hpcaas/.mounts/fs-036153e63d56f4dc2/home/imzyc/project/proactive-assist/slurm_logs/14655/14655_0_log.out +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +{'assembly101/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'drop_middle', + 'eval_max_seq_len': 32768, + 'eval_max_seq_len_str': '32k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.3}]}, + 'egoexolearn/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'drop_middle', + 'eval_max_seq_len': 32768, + 'eval_max_seq_len_str': '32k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}, + 'epickitchens/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'drop_middle', + 'eval_max_seq_len': 32768, + 'eval_max_seq_len_str': '32k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.2}]}, + 'holoassist/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'drop_middle', + 'eval_max_seq_len': 32768, + 'eval_max_seq_len_str': '32k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}, + 'wtag/dialog_val_L0_I10': {'stream': [{'context_handling_method': 'drop_middle', + 'eval_max_seq_len': 32768, + 'eval_max_seq_len_str': '32k', + 'inference_runner_type': 'stream', + 'not_talk_threshold': 0.4}]}} +Evaluation datasets: +* epickitchens/dialog_val | num samples: 150 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: eval_max_seq_len_str: 4k -> 32k +Updating eval setup: eval_max_seq_len: 4096 -> 32768 +Updating eval setup: not_talk_threshold: 0.5 -> 0.2 +Updating eval setup: context_handling_method: summarize_and_drop -> drop_middle +Evalulation: epickitchens-dialog_val_L0_I10/stream/notalk0.2-maxlen_32k-drop_middle +Metrics: +jaccard_index: 0.1588 +missing_rate: 0.5362 +redundant_rate: 0.5902 +semantic_score: 0.6553 +time_diff: 0.4651 +precision: 0.2341 +recall: 0.2649 +F1: 0.2486 +num_matched: 1704.0000 +num_mismatched: 1279.0000 +num_missed: 3449.0000 +num_redundant: 4296.0000 +Bleu_1: 0.3641 +Bleu_1_w: 0.0578 +Bleu_2: 0.2427 +Bleu_2_w: 0.0386 +Bleu_3: 0.1741 +Bleu_3_w: 0.0277 +Bleu_4: 0.1317 +Bleu_4_w: 0.0209 +CIDEr: 0.8875 +CIDEr_w: 0.1410 +METEOR: 0.1807 +METEOR_w: 0.0287 + +Evaluation datasets: +* holoassist/dialog_val | num samples: 291 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: eval_max_seq_len_str: 4k -> 32k +Updating eval setup: eval_max_seq_len: 4096 -> 32768 +Updating eval setup: not_talk_threshold: 0.5 -> 0.4 +Updating eval setup: context_handling_method: summarize_and_drop -> drop_middle +Evalulation: holoassist-dialog_val_L0_I10/stream/notalk0.4-maxlen_32k-drop_middle +Metrics: +jaccard_index: 0.1546 +missing_rate: 0.2483 +redundant_rate: 0.7170 +semantic_score: 0.6805 +time_diff: 0.4487 +precision: 0.1691 +recall: 0.4491 +F1: 0.2457 +num_matched: 6854.0000 +num_mismatched: 4617.0000 +num_missed: 3790.0000 +num_redundant: 29061.0000 +Bleu_1: 0.3892 +Bleu_1_w: 0.0602 +Bleu_2: 0.2702 +Bleu_2_w: 0.0418 +Bleu_3: 0.2002 +Bleu_3_w: 0.0310 +Bleu_4: 0.1530 +Bleu_4_w: 0.0237 +CIDEr: 0.8604 +CIDEr_w: 0.1331 +METEOR: 0.1885 +METEOR_w: 0.0291 + +Evaluation datasets: +* egoexolearn/dialog_val | num samples: 123 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: eval_max_seq_len_str: 4k -> 32k +Updating eval setup: eval_max_seq_len: 4096 -> 32768 +Updating eval setup: not_talk_threshold: 0.5 -> 0.4 +Updating eval setup: context_handling_method: summarize_and_drop -> drop_middle +Evalulation: egoexolearn-dialog_val_L0_I10/stream/notalk0.4-maxlen_32k-drop_middle +Metrics: +jaccard_index: 0.0782 +missing_rate: 0.3061 +redundant_rate: 0.8457 +semantic_score: 0.6425 +time_diff: 0.6191 +precision: 0.0836 +recall: 0.3759 +F1: 0.1367 +num_matched: 4507.0000 +num_mismatched: 3813.0000 +num_missed: 3671.0000 +num_redundant: 45615.0000 +Bleu_1: 0.3620 +Bleu_1_w: 0.0283 +Bleu_2: 0.2327 +Bleu_2_w: 0.0182 +Bleu_3: 0.1599 +Bleu_3_w: 0.0125 +Bleu_4: 0.1151 +Bleu_4_w: 0.0090 +CIDEr: 0.5858 +CIDEr_w: 0.0458 +METEOR: 0.1635 +METEOR_w: 0.0128 + +Evaluation datasets: +* assembly101/dialog_val | num samples: 336 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: eval_max_seq_len_str: 4k -> 32k +Updating eval setup: eval_max_seq_len: 4096 -> 32768 +Updating eval setup: not_talk_threshold: 0.5 -> 0.3 +Updating eval setup: context_handling_method: summarize_and_drop -> drop_middle +Evalulation: assembly101-dialog_val_L0_I10/stream/notalk0.3-maxlen_32k-drop_middle +Metrics: +jaccard_index: 0.1655 +missing_rate: 0.3450 +redundant_rate: 0.6649 +semantic_score: 0.6726 +time_diff: 0.7135 +precision: 0.1947 +recall: 0.3806 +F1: 0.2576 +num_matched: 3166.0000 +num_mismatched: 2282.0000 +num_missed: 2870.0000 +num_redundant: 10810.0000 +Bleu_1: 0.3941 +Bleu_1_w: 0.0652 +Bleu_2: 0.2816 +Bleu_2_w: 0.0466 +Bleu_3: 0.2109 +Bleu_3_w: 0.0349 +Bleu_4: 0.1648 +Bleu_4_w: 0.0273 +CIDEr: 0.8206 +CIDEr_w: 0.1358 +METEOR: 0.1914 +METEOR_w: 0.0317 + +Evaluation datasets: +* wtag/dialog_val | num samples: 21 + +Updating eval setup: inference_runner_type: None -> stream +Updating eval setup: eval_max_seq_len_str: 4k -> 32k +Updating eval setup: eval_max_seq_len: 4096 -> 32768 +Updating eval setup: not_talk_threshold: 0.5 -> 0.4 +Updating eval setup: context_handling_method: summarize_and_drop -> drop_middle +Evalulation: wtag-dialog_val_L0_I10/stream/notalk0.4-maxlen_32k-drop_middle +Metrics: +jaccard_index: 0.0647 +missing_rate: 0.1240 +redundant_rate: 0.8470 +semantic_score: 0.6565 +time_diff: 0.9101 +precision: 0.0661 +recall: 0.3784 +F1: 0.1125 +num_matched: 406.0000 +num_mismatched: 534.0000 +num_missed: 133.0000 +num_redundant: 5203.0000 +Bleu_1: 0.3100 +Bleu_1_w: 0.0201 +Bleu_2: 0.2060 +Bleu_2_w: 0.0133 +Bleu_3: 0.1422 +Bleu_3_w: 0.0092 +Bleu_4: 0.1003 +Bleu_4_w: 0.0065 +CIDEr: 0.7078 +CIDEr_w: 0.0458 +METEOR: 0.1850 +METEOR_w: 0.0120 + +All Finished! Time: 122.71 minutes +Model: /fsx_0/user/imzyc/proact_exps/20240821-L4096-I10-ep4-NOSEP-nr0.1-klgmix-1s-lora-bs512 +Runs: +epickitchens/dialog_val_L0_I10|stream|32k|0.2|drop_middle +holoassist/dialog_val_L0_I10|stream|32k|0.4|drop_middle +egoexolearn/dialog_val_L0_I10|stream|32k|0.4|drop_middle +assembly101/dialog_val_L0_I10|stream|32k|0.3|drop_middle +wtag/dialog_val_L0_I10|stream|32k|0.4|drop_middle