DavidNguyen commited on
Commit
25e2860
·
verified ·
1 Parent(s): 47fc5a4

dd1d6df5f4902c9b8cd21b3b129e2168962f1cd5bd125680152a8b6e5914d5e2

Browse files
sft/1M3/Full_smoe_tcmoe/logs/0629_0427_llava_v1.5_pope_llava_model_args_59313d/rank2_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 2 eval done
sft/1M3/Full_smoe_tcmoe/logs/0629_0427_llava_v1.5_pope_llava_model_args_59313d/rank3_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 3 eval done
sft/1M3/Full_smoe_tcmoe/logs/0629_0427_llava_v1.5_pope_llava_model_args_59313d/results.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "pope": {
4
+ "pope_accuracy,none": 0.8478888888888889,
5
+ "pope_accuracy_stderr,none": "N/A",
6
+ "pope_precision,none": 0.8689720065866855,
7
+ "pope_precision_stderr,none": "N/A",
8
+ "pope_recall,none": 0.8208888888888889,
9
+ "pope_recall_stderr,none": "N/A",
10
+ "pope_f1_score,none": 0.8442463718432178,
11
+ "pope_f1_score_stderr,none": "N/A",
12
+ "pope_yes_ratio,none": 0.5,
13
+ "pope_yes_ratio_stderr,none": "N/A",
14
+ "alias": "pope"
15
+ }
16
+ },
17
+ "configs": {
18
+ "pope": {
19
+ "task": "pope",
20
+ "dataset_path": "lmms-lab/POPE",
21
+ "dataset_kwargs": {
22
+ "token": true
23
+ },
24
+ "test_split": "test",
25
+ "doc_to_visual": "<function pope_doc_to_visual at 0x7feae07b48b0>",
26
+ "doc_to_text": "<function pope_doc_to_text at 0x7feae07b4e50>",
27
+ "doc_to_target": "answer",
28
+ "process_results": "<function pope_process_results at 0x7feae07bb430>",
29
+ "description": "",
30
+ "target_delimiter": " ",
31
+ "fewshot_delimiter": "\n\n",
32
+ "metric_list": [
33
+ {
34
+ "metric": "pope_accuracy",
35
+ "aggregation": "<function pope_aggregate_accuracy at 0x7feae07bb9d0>",
36
+ "higher_is_better": true
37
+ },
38
+ {
39
+ "metric": "pope_precision",
40
+ "aggregation": "<function pope_aggregate_precision at 0x7feae07bbf70>",
41
+ "higher_is_better": true
42
+ },
43
+ {
44
+ "metric": "pope_recall",
45
+ "aggregation": "<function pope_aggregate_recall at 0x7feae07bd550>",
46
+ "higher_is_better": true
47
+ },
48
+ {
49
+ "metric": "pope_f1_score",
50
+ "aggregation": "<function pope_aggregate_f1_score at 0x7feae07bdaf0>",
51
+ "higher_is_better": true
52
+ },
53
+ {
54
+ "metric": "pope_yes_ratio",
55
+ "aggregation": "<function pope_aggregate_yes_ratio at 0x7feae07bf0d0>",
56
+ "higher_is_better": true
57
+ }
58
+ ],
59
+ "output_type": "generate_until",
60
+ "generation_kwargs": {
61
+ "max_new_tokens": 128,
62
+ "temperature": 0.0,
63
+ "top_p": 1.0,
64
+ "num_beams": 1,
65
+ "do_sample": false,
66
+ "until": [
67
+ "\n\n"
68
+ ]
69
+ },
70
+ "repeats": 1,
71
+ "should_decontaminate": false,
72
+ "metadata": [
73
+ {
74
+ "version": 0.0
75
+ }
76
+ ]
77
+ }
78
+ },
79
+ "versions": {
80
+ "pope": "Yaml"
81
+ },
82
+ "n-shot": {
83
+ "pope": 0
84
+ },
85
+ "model_configs": {
86
+ "model": "llava",
87
+ "model_args": "pretrained=/cm/archive/namnv78/checkpoints/Xphi35-siglip224/SMOE/1M3/Full_smoe_tcmoe,conv_template=phi35",
88
+ "batch_size": "1",
89
+ "device": null,
90
+ "limit": null,
91
+ "bootstrap_iters": 100000,
92
+ "gen_kwargs": ""
93
+ },
94
+ "git_hash": "289c7fe5"
95
+ }
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/gpt_response/hallusion_output_vd_model.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/gpt_response/hallusion_output_vs_model.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/hallusion_bench_image.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/rank2_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 2 eval done
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/rank3_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 3 eval done
sft/1M3/Full_smoe_tcmoe/logs/0629_0506_llava...image_llava_model_args_59313d/results.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "hallusion_bench_image": {
4
+ "aAcc,none": 45.6362,
5
+ "aAcc_stderr,none": "N/A",
6
+ "fAcc,none": 21.3873,
7
+ "fAcc_stderr,none": "N/A",
8
+ "qAcc,none": 16.7033,
9
+ "qAcc_stderr,none": "N/A",
10
+ "alias": "hallusion_bench_image"
11
+ }
12
+ },
13
+ "configs": {
14
+ "hallusion_bench_image": {
15
+ "task": "hallusion_bench_image",
16
+ "dataset_path": "lmms-lab/HallusionBench",
17
+ "dataset_kwargs": {
18
+ "token": true
19
+ },
20
+ "test_split": "image",
21
+ "doc_to_visual": "<function hb_doc_to_visual at 0x7f63de76f280>",
22
+ "doc_to_text": "<function hb_doc_to_text at 0x7f63de76f9d0>",
23
+ "doc_to_target": "gt_answer_details",
24
+ "process_results": "<function hb_process_results at 0x7f63de78c1f0>",
25
+ "description": "",
26
+ "target_delimiter": " ",
27
+ "fewshot_delimiter": "\n\n",
28
+ "metric_list": [
29
+ {
30
+ "metric": "aAcc",
31
+ "aggregation": "<function hb_aggregation_result_aAcc at 0x7f63de78cb80>",
32
+ "higher_is_better": true
33
+ },
34
+ {
35
+ "metric": "qAcc",
36
+ "aggregation": "<function hb_aggregation_result_qAcc at 0x7f63de69a1f0>",
37
+ "higher_is_better": true
38
+ },
39
+ {
40
+ "metric": "fAcc",
41
+ "aggregation": "<function hb_aggregation_result_fAcc at 0x7f63de69a9d0>",
42
+ "higher_is_better": true
43
+ }
44
+ ],
45
+ "output_type": "generate_until",
46
+ "generation_kwargs": {
47
+ "max_new_tokens": 128,
48
+ "temperature": 0.0,
49
+ "top_p": 1.0,
50
+ "num_beams": 1,
51
+ "do_sample": false,
52
+ "until": [
53
+ "\n\n"
54
+ ]
55
+ },
56
+ "repeats": 1,
57
+ "should_decontaminate": false,
58
+ "metadata": [
59
+ {
60
+ "version": 0.0
61
+ }
62
+ ],
63
+ "model_specific_prompt_kwargs": {
64
+ "default": {
65
+ "pre_prompt": "",
66
+ "post_prompt": ""
67
+ }
68
+ }
69
+ }
70
+ },
71
+ "versions": {
72
+ "hallusion_bench_image": "Yaml"
73
+ },
74
+ "n-shot": {
75
+ "hallusion_bench_image": 0
76
+ },
77
+ "model_configs": {
78
+ "model": "llava",
79
+ "model_args": "pretrained=/cm/archive/namnv78/checkpoints/Xphi35-siglip224/SMOE/1M3/Full_smoe_tcmoe,conv_template=phi35",
80
+ "batch_size": "1",
81
+ "device": null,
82
+ "limit": null,
83
+ "bootstrap_iters": 100000,
84
+ "gen_kwargs": ""
85
+ },
86
+ "git_hash": "289c7fe5"
87
+ }