Upload folder using huggingface_hub

#822
This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitattributes +21 -0
  2. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2142_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt +1 -0
  3. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt +1 -0
  4. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt +1 -0
  5. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt +1 -0
  6. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/mmmu_val.json +3 -0
  7. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt +1 -0
  8. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt +1 -0
  9. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt +1 -0
  10. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt +1 -0
  11. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/results.json +66 -0
  12. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mathvista_testmini.json +3 -0
  13. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mme.json +3 -0
  14. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmmu_val.json +3 -0
  15. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmstar.json +3 -0
  16. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank0_metric_eval_done.txt +1 -0
  17. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank1_metric_eval_done.txt +1 -0
  18. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/results.json +285 -0
  19. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/submissions/mathvista_testmini_scores.json +0 -0
  20. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mathvista_testmini.json +3 -0
  21. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mme.json +3 -0
  22. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmmu_val.json +3 -0
  23. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmstar.json +3 -0
  24. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank0_metric_eval_done.txt +1 -0
  25. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank1_metric_eval_done.txt +1 -0
  26. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/results.json +285 -0
  27. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/submissions/mathvista_testmini_scores.json +0 -0
  28. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mathvista_testmini.json +3 -0
  29. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mme.json +3 -0
  30. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmmu_val.json +3 -0
  31. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmstar.json +3 -0
  32. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank0_metric_eval_done.txt +1 -0
  33. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank1_metric_eval_done.txt +1 -0
  34. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/results.json +285 -0
  35. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/submissions/mathvista_testmini_scores.json +0 -0
  36. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mathvista_testmini.json +3 -0
  37. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mme.json +3 -0
  38. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmmu_val.json +3 -0
  39. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmstar.json +3 -0
  40. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank0_metric_eval_done.txt +1 -0
  41. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank1_metric_eval_done.txt +1 -0
  42. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/results.json +285 -0
  43. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/submissions/mathvista_testmini_scores.json +0 -0
  44. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mathvista_testmini.json +3 -0
  45. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mme.json +3 -0
  46. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmmu_val.json +3 -0
  47. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmstar.json +3 -0
  48. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank0_metric_eval_done.txt +1 -0
  49. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank1_metric_eval_done.txt +1 -0
  50. sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/results.json +285 -0
.gitattributes CHANGED
@@ -359,3 +359,24 @@ sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mm
359
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/mmerealworld_lite.json filter=lfs diff=lfs merge=lfs -text
360
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/submissions/mmbench_en_dev_results.xlsx filter=lfs diff=lfs merge=lfs -text
361
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/textvqa_val.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/mmerealworld_lite.json filter=lfs diff=lfs merge=lfs -text
360
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/submissions/mmbench_en_dev_results.xlsx filter=lfs diff=lfs merge=lfs -text
361
  sft/665K36/revise_Full_smoe_sharev3/checkpoint-12477/logs/0717_2000_llava...l_mme_llava_model_args_82420a/textvqa_val.json filter=lfs diff=lfs merge=lfs -text
362
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/mmmu_val.json filter=lfs diff=lfs merge=lfs -text
363
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text
364
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mme.json filter=lfs diff=lfs merge=lfs -text
365
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmmu_val.json filter=lfs diff=lfs merge=lfs -text
366
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmstar.json filter=lfs diff=lfs merge=lfs -text
367
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text
368
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mme.json filter=lfs diff=lfs merge=lfs -text
369
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmmu_val.json filter=lfs diff=lfs merge=lfs -text
370
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmstar.json filter=lfs diff=lfs merge=lfs -text
371
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text
372
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mme.json filter=lfs diff=lfs merge=lfs -text
373
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmmu_val.json filter=lfs diff=lfs merge=lfs -text
374
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmstar.json filter=lfs diff=lfs merge=lfs -text
375
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text
376
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mme.json filter=lfs diff=lfs merge=lfs -text
377
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmmu_val.json filter=lfs diff=lfs merge=lfs -text
378
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmstar.json filter=lfs diff=lfs merge=lfs -text
379
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mathvista_testmini.json filter=lfs diff=lfs merge=lfs -text
380
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mme.json filter=lfs diff=lfs merge=lfs -text
381
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmmu_val.json filter=lfs diff=lfs merge=lfs -text
382
+ sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmstar.json filter=lfs diff=lfs merge=lfs -text
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2142_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 2 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2143_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 3 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/mmmu_val.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3bab9cc5ed7c089d9dc6fd7b61bbefdf945d5ef635cdf5121500ac690836c9e
3
+ size 36750400
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank2_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 2 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/rank3_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 3 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2159_llava...u_val_llava_model_args_861273/results.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mmmu_val": {
4
+ "mmmu_acc,none": 0.39667,
5
+ "mmmu_acc_stderr,none": "N/A",
6
+ "alias": "mmmu_val"
7
+ }
8
+ },
9
+ "configs": {
10
+ "mmmu_val": {
11
+ "task": "mmmu_val",
12
+ "dataset_path": "lmms-lab/MMMU",
13
+ "test_split": "validation",
14
+ "doc_to_visual": "<function mmmu_doc_to_visual at 0x7f91978c0790>",
15
+ "doc_to_text": "<function mmmu_doc_to_text at 0x7f91977604c0>",
16
+ "doc_to_target": "answer",
17
+ "process_results": "<function mmmu_process_results at 0x7f91976f73a0>",
18
+ "description": "",
19
+ "target_delimiter": " ",
20
+ "fewshot_delimiter": "\n\n",
21
+ "metric_list": [
22
+ {
23
+ "metric": "mmmu_acc",
24
+ "aggregation": "<function mmmu_aggregate_results at 0x7f91975f7310>",
25
+ "higher_is_better": true
26
+ }
27
+ ],
28
+ "output_type": "generate_until",
29
+ "generation_kwargs": {
30
+ "max_new_tokens": 128,
31
+ "until": [
32
+ "\n\n"
33
+ ],
34
+ "image_aspect_ratio": "original"
35
+ },
36
+ "repeats": 1,
37
+ "should_decontaminate": false,
38
+ "metadata": [
39
+ {
40
+ "version": 0.0
41
+ }
42
+ ],
43
+ "model_specific_generation_kwargs": {
44
+ "llava": {
45
+ "image_aspect_ratio": "original"
46
+ }
47
+ }
48
+ }
49
+ },
50
+ "versions": {
51
+ "mmmu_val": "Yaml"
52
+ },
53
+ "n-shot": {
54
+ "mmmu_val": 0
55
+ },
56
+ "model_configs": {
57
+ "model": "llava",
58
+ "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-4159,conv_template=phi35",
59
+ "batch_size": "1",
60
+ "device": null,
61
+ "limit": null,
62
+ "bootstrap_iters": 100000,
63
+ "gen_kwargs": ""
64
+ },
65
+ "git_hash": "289c7fe5"
66
+ }
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mathvista_testmini.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dea24a08322e3d4261f46343ad938d9cee0240e86f63937df3a56af0f35b31db
3
+ size 45280348
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mme.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ff23a00ca67cbf25c3913c38b6130c53471ff8ede1639ac638afab6dd079a0
3
+ size 94629687
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmmu_val.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6066f4915e0fe06637da76ce56045cb5c220d5f17687c9fa4c81f604f31555ad
3
+ size 36750432
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/mmstar.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59eae9c9fd88fdf94ee4899dba7f649d4538bce9a912b884dcfe62d72d81e9f
3
+ size 60426711
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/results.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mathvista_testmini": {
4
+ "gpt_eval_score,none": 24.4,
5
+ "gpt_eval_score_stderr,none": "N/A",
6
+ "alias": "mathvista_testmini"
7
+ },
8
+ "mme": {
9
+ "mme_cognition_score,none": 300.3571428571429,
10
+ "mme_cognition_score_stderr,none": "N/A",
11
+ "mme_percetion_score,none": 1333.4614845938377,
12
+ "mme_percetion_score_stderr,none": "N/A",
13
+ "alias": "mme"
14
+ },
15
+ "mmmu_val": {
16
+ "mmmu_acc,none": 0.39667,
17
+ "mmmu_acc_stderr,none": "N/A",
18
+ "alias": "mmmu_val"
19
+ },
20
+ "mmstar": {
21
+ "coarse perception,none": 0.6777537264839942,
22
+ "coarse perception_stderr,none": "N/A",
23
+ "fine-grained perception,none": 0.3289425202652911,
24
+ "fine-grained perception_stderr,none": "N/A",
25
+ "instance reasoning,none": 0.5032956116682105,
26
+ "instance reasoning_stderr,none": "N/A",
27
+ "logical reasoning,none": 0.37952872210297955,
28
+ "logical reasoning_stderr,none": "N/A",
29
+ "math,none": 0.2650193798449612,
30
+ "math_stderr,none": "N/A",
31
+ "science & technology,none": 0.2725281195018929,
32
+ "science & technology_stderr,none": "N/A",
33
+ "alias": "mmstar"
34
+ }
35
+ },
36
+ "configs": {
37
+ "mathvista_testmini": {
38
+ "task": "mathvista_testmini",
39
+ "dataset_path": "AI4Math/MathVista",
40
+ "dataset_kwargs": {
41
+ "token": true
42
+ },
43
+ "test_split": "testmini",
44
+ "doc_to_visual": "<function mathvista_doc_to_visual at 0x7f7d7e34c700>",
45
+ "doc_to_text": "<function mathvista_doc_to_text at 0x7f7d7e354dc0>",
46
+ "doc_to_target": "answer",
47
+ "process_results": "<function mathvista_process_results at 0x7f7d7e3614c0>",
48
+ "description": "",
49
+ "target_delimiter": " ",
50
+ "fewshot_delimiter": "\n\n",
51
+ "metric_list": [
52
+ {
53
+ "metric": "gpt_eval_score",
54
+ "aggregation": "<function mathvista_aggregate_results at 0x7f7d7e36db80>",
55
+ "higher_is_better": true
56
+ }
57
+ ],
58
+ "output_type": "generate_until",
59
+ "generation_kwargs": {
60
+ "until": [
61
+ "ASSISTANT:"
62
+ ],
63
+ "max_new_tokens": 1024,
64
+ "temperature": 0.0,
65
+ "top_p": 1.0,
66
+ "num_beams": 1,
67
+ "do_sample": false,
68
+ "image_aspect_ratio": "original"
69
+ },
70
+ "repeats": 1,
71
+ "should_decontaminate": false,
72
+ "model_specific_prompt_kwargs": {
73
+ "default": {
74
+ "shot_type": "format-prompt",
75
+ "shot": 0,
76
+ "use_caption": false,
77
+ "use_ocr": false
78
+ },
79
+ "phi3v": {
80
+ "shot_type": "solution"
81
+ }
82
+ },
83
+ "model_specific_generation_kwargs": {
84
+ "llava": {
85
+ "image_aspect_ratio": "original"
86
+ }
87
+ }
88
+ },
89
+ "mme": {
90
+ "task": "mme",
91
+ "dataset_path": "lmms-lab/MME",
92
+ "dataset_kwargs": {
93
+ "token": false
94
+ },
95
+ "test_split": "test",
96
+ "doc_to_visual": "<function mme_doc_to_visual at 0x7f7ddf039940>",
97
+ "doc_to_text": "<function mme_doc_to_text at 0x7f7ddd5dd160>",
98
+ "doc_to_target": "answer",
99
+ "process_results": "<function mme_process_results at 0x7f7ddd5dd700>",
100
+ "description": "",
101
+ "target_delimiter": " ",
102
+ "fewshot_delimiter": "\n\n",
103
+ "metric_list": [
104
+ {
105
+ "metric": "mme_percetion_score",
106
+ "aggregation": "<function mme_aggregate_results at 0x7f7ddd5ddc10>",
107
+ "higher_is_better": true
108
+ },
109
+ {
110
+ "metric": "mme_cognition_score",
111
+ "aggregation": "<function mme_aggregate_results at 0x7f7ddd5e60d0>",
112
+ "higher_is_better": true
113
+ }
114
+ ],
115
+ "output_type": "generate_until",
116
+ "generation_kwargs": {
117
+ "max_new_tokens": 16,
118
+ "temperature": 0.0,
119
+ "top_p": 1.0,
120
+ "num_beams": 1,
121
+ "do_sample": false,
122
+ "until": [
123
+ "\n\n"
124
+ ]
125
+ },
126
+ "repeats": 1,
127
+ "should_decontaminate": false,
128
+ "metadata": [
129
+ {
130
+ "version": 0.0
131
+ }
132
+ ],
133
+ "model_specific_prompt_kwargs": {
134
+ "default": {
135
+ "pre_prompt": "",
136
+ "post_prompt": "\nAnswer the question using a single word or phrase."
137
+ },
138
+ "gpt4v": {
139
+ "pre_prompt": "",
140
+ "post_prompt": "\nAnswer the question with Yes or No."
141
+ },
142
+ "qwen_vl": {
143
+ "pre_prompt": "",
144
+ "post_prompt": " Answer:"
145
+ },
146
+ "otterhd": {
147
+ "pre_prompt": "",
148
+ "post_prompt": " Answer:"
149
+ },
150
+ "xcomposer2_4khd": {
151
+ "pre_prompt": "[UNUSED_TOKEN_146]user\n",
152
+ "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
153
+ }
154
+ }
155
+ },
156
+ "mmmu_val": {
157
+ "task": "mmmu_val",
158
+ "dataset_path": "lmms-lab/MMMU",
159
+ "test_split": "validation",
160
+ "doc_to_visual": "<function mmmu_doc_to_visual at 0x7f7d90842700>",
161
+ "doc_to_text": "<function mmmu_doc_to_text at 0x7f7d907b1430>",
162
+ "doc_to_target": "answer",
163
+ "process_results": "<function mmmu_process_results at 0x7f7d9067f310>",
164
+ "description": "",
165
+ "target_delimiter": " ",
166
+ "fewshot_delimiter": "\n\n",
167
+ "metric_list": [
168
+ {
169
+ "metric": "mmmu_acc",
170
+ "aggregation": "<function mmmu_aggregate_results at 0x7f7d90579280>",
171
+ "higher_is_better": true
172
+ }
173
+ ],
174
+ "output_type": "generate_until",
175
+ "generation_kwargs": {
176
+ "max_new_tokens": 128,
177
+ "until": [
178
+ "\n\n"
179
+ ],
180
+ "image_aspect_ratio": "original"
181
+ },
182
+ "repeats": 1,
183
+ "should_decontaminate": false,
184
+ "metadata": [
185
+ {
186
+ "version": 0.0
187
+ }
188
+ ],
189
+ "model_specific_generation_kwargs": {
190
+ "llava": {
191
+ "image_aspect_ratio": "original"
192
+ }
193
+ }
194
+ },
195
+ "mmstar": {
196
+ "task": "mmstar",
197
+ "dataset_path": "Lin-Chen/MMStar",
198
+ "dataset_kwargs": {
199
+ "token": true
200
+ },
201
+ "test_split": "val",
202
+ "doc_to_visual": "<function mmstar_doc_to_visual at 0x7f7d7e27bd30>",
203
+ "doc_to_text": "<function mmstar_doc_to_text at 0x7f7d7e2841f0>",
204
+ "doc_to_target": "answer",
205
+ "process_results": "<function mmstar_process_results at 0x7f7d7e284700>",
206
+ "description": "",
207
+ "target_delimiter": " ",
208
+ "fewshot_delimiter": "\n\n",
209
+ "metric_list": [
210
+ {
211
+ "metric": "coarse perception",
212
+ "aggregation": "<function mmstar_aggregate_results at 0x7f7d7e284b80>",
213
+ "higher_is_better": true
214
+ },
215
+ {
216
+ "metric": "fine-grained perception",
217
+ "aggregation": "<function mmstar_aggregate_results at 0x7f7d7e284f70>",
218
+ "higher_is_better": true
219
+ },
220
+ {
221
+ "metric": "instance reasoning",
222
+ "aggregation": "<function mmstar_aggregate_results at 0x7f7d7e20a3a0>",
223
+ "higher_is_better": true
224
+ },
225
+ {
226
+ "metric": "logical reasoning",
227
+ "aggregation": "<function mmstar_aggregate_results at 0x7f7d7e20a790>",
228
+ "higher_is_better": true
229
+ },
230
+ {
231
+ "metric": "science & technology",
232
+ "aggregation": "<function mmstar_aggregate_results at 0x7f7d7e20ab80>",
233
+ "higher_is_better": true
234
+ },
235
+ {
236
+ "metric": "math",
237
+ "aggregation": "<function mmstar_aggregate_results at 0x7f7d7e20af70>",
238
+ "higher_is_better": true
239
+ }
240
+ ],
241
+ "output_type": "generate_until",
242
+ "generation_kwargs": {
243
+ "until": [
244
+ "\n\n"
245
+ ],
246
+ "do_sample": false
247
+ },
248
+ "repeats": 1,
249
+ "should_decontaminate": false,
250
+ "metadata": [
251
+ {
252
+ "version": 0.0
253
+ }
254
+ ],
255
+ "model_specific_prompt_kwargs": {
256
+ "default": {
257
+ "pre_prompt": "",
258
+ "post_prompt": "\nAnswer with the option's letter from the given choices directly"
259
+ }
260
+ }
261
+ }
262
+ },
263
+ "versions": {
264
+ "mathvista_testmini": "Yaml",
265
+ "mme": "Yaml",
266
+ "mmmu_val": "Yaml",
267
+ "mmstar": "Yaml"
268
+ },
269
+ "n-shot": {
270
+ "mathvista_testmini": 0,
271
+ "mme": 0,
272
+ "mmmu_val": 0,
273
+ "mmstar": 0
274
+ },
275
+ "model_configs": {
276
+ "model": "llava",
277
+ "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-4159,conv_template=phi35",
278
+ "batch_size": "1",
279
+ "device": null,
280
+ "limit": null,
281
+ "bootstrap_iters": 100000,
282
+ "gen_kwargs": ""
283
+ },
284
+ "git_hash": "289c7fe5"
285
+ }
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2213_llava...mstar_llava_model_args_861273/submissions/mathvista_testmini_scores.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mathvista_testmini.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ba84a4cbb0576f85800446579c3dcd431f37dd7139cdb6f7db6663823358fd2
3
+ size 45272253
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mme.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:011e40f78ee9008a61b460145cf49396d68e89126511bce959ec55b38e3f0158
3
+ size 94631375
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmmu_val.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f900a5b0df3eef25875d5833f0bc4b017bf4b6013ba00120ee1b519f1f2f05
3
+ size 36750618
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/mmstar.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0937b93e04b8698a8f641913eb0ae96114a2bdf93057c229af5a93cf653bd2
3
+ size 60427356
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/results.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mathvista_testmini": {
4
+ "gpt_eval_score,none": 23.2,
5
+ "gpt_eval_score_stderr,none": "N/A",
6
+ "alias": "mathvista_testmini"
7
+ },
8
+ "mme": {
9
+ "mme_cognition_score,none": 314.2857142857143,
10
+ "mme_cognition_score_stderr,none": "N/A",
11
+ "mme_percetion_score,none": 1367.7411964785915,
12
+ "mme_percetion_score_stderr,none": "N/A",
13
+ "alias": "mme"
14
+ },
15
+ "mmmu_val": {
16
+ "mmmu_acc,none": 0.41333,
17
+ "mmmu_acc_stderr,none": "N/A",
18
+ "alias": "mmmu_val"
19
+ },
20
+ "mmstar": {
21
+ "coarse perception,none": 0.676844470255589,
22
+ "coarse perception_stderr,none": "N/A",
23
+ "fine-grained perception,none": 0.3550294767870302,
24
+ "fine-grained perception_stderr,none": "N/A",
25
+ "instance reasoning,none": 0.5366379757463484,
26
+ "instance reasoning_stderr,none": "N/A",
27
+ "logical reasoning,none": 0.3494995653411495,
28
+ "logical reasoning_stderr,none": "N/A",
29
+ "math,none": 0.31132495767620066,
30
+ "math_stderr,none": "N/A",
31
+ "science & technology,none": 0.2568647183257686,
32
+ "science & technology_stderr,none": "N/A",
33
+ "alias": "mmstar"
34
+ }
35
+ },
36
+ "configs": {
37
+ "mathvista_testmini": {
38
+ "task": "mathvista_testmini",
39
+ "dataset_path": "AI4Math/MathVista",
40
+ "dataset_kwargs": {
41
+ "token": true
42
+ },
43
+ "test_split": "testmini",
44
+ "doc_to_visual": "<function mathvista_doc_to_visual at 0x7f92e5d69700>",
45
+ "doc_to_text": "<function mathvista_doc_to_text at 0x7f92e5d72dc0>",
46
+ "doc_to_target": "answer",
47
+ "process_results": "<function mathvista_process_results at 0x7f92e5d804c0>",
48
+ "description": "",
49
+ "target_delimiter": " ",
50
+ "fewshot_delimiter": "\n\n",
51
+ "metric_list": [
52
+ {
53
+ "metric": "gpt_eval_score",
54
+ "aggregation": "<function mathvista_aggregate_results at 0x7f92e5d8bb80>",
55
+ "higher_is_better": true
56
+ }
57
+ ],
58
+ "output_type": "generate_until",
59
+ "generation_kwargs": {
60
+ "until": [
61
+ "ASSISTANT:"
62
+ ],
63
+ "max_new_tokens": 1024,
64
+ "temperature": 0.0,
65
+ "top_p": 1.0,
66
+ "num_beams": 1,
67
+ "do_sample": false,
68
+ "image_aspect_ratio": "original"
69
+ },
70
+ "repeats": 1,
71
+ "should_decontaminate": false,
72
+ "model_specific_prompt_kwargs": {
73
+ "default": {
74
+ "shot_type": "format-prompt",
75
+ "shot": 0,
76
+ "use_caption": false,
77
+ "use_ocr": false
78
+ },
79
+ "phi3v": {
80
+ "shot_type": "solution"
81
+ }
82
+ },
83
+ "model_specific_generation_kwargs": {
84
+ "llava": {
85
+ "image_aspect_ratio": "original"
86
+ }
87
+ }
88
+ },
89
+ "mme": {
90
+ "task": "mme",
91
+ "dataset_path": "lmms-lab/MME",
92
+ "dataset_kwargs": {
93
+ "token": false
94
+ },
95
+ "test_split": "test",
96
+ "doc_to_visual": "<function mme_doc_to_visual at 0x7f9346a62940>",
97
+ "doc_to_text": "<function mme_doc_to_text at 0x7f9345006160>",
98
+ "doc_to_target": "answer",
99
+ "process_results": "<function mme_process_results at 0x7f9345006700>",
100
+ "description": "",
101
+ "target_delimiter": " ",
102
+ "fewshot_delimiter": "\n\n",
103
+ "metric_list": [
104
+ {
105
+ "metric": "mme_percetion_score",
106
+ "aggregation": "<function mme_aggregate_results at 0x7f9345006c10>",
107
+ "higher_is_better": true
108
+ },
109
+ {
110
+ "metric": "mme_cognition_score",
111
+ "aggregation": "<function mme_aggregate_results at 0x7f934500e0d0>",
112
+ "higher_is_better": true
113
+ }
114
+ ],
115
+ "output_type": "generate_until",
116
+ "generation_kwargs": {
117
+ "max_new_tokens": 16,
118
+ "temperature": 0.0,
119
+ "top_p": 1.0,
120
+ "num_beams": 1,
121
+ "do_sample": false,
122
+ "until": [
123
+ "\n\n"
124
+ ]
125
+ },
126
+ "repeats": 1,
127
+ "should_decontaminate": false,
128
+ "metadata": [
129
+ {
130
+ "version": 0.0
131
+ }
132
+ ],
133
+ "model_specific_prompt_kwargs": {
134
+ "default": {
135
+ "pre_prompt": "",
136
+ "post_prompt": "\nAnswer the question using a single word or phrase."
137
+ },
138
+ "gpt4v": {
139
+ "pre_prompt": "",
140
+ "post_prompt": "\nAnswer the question with Yes or No."
141
+ },
142
+ "qwen_vl": {
143
+ "pre_prompt": "",
144
+ "post_prompt": " Answer:"
145
+ },
146
+ "otterhd": {
147
+ "pre_prompt": "",
148
+ "post_prompt": " Answer:"
149
+ },
150
+ "xcomposer2_4khd": {
151
+ "pre_prompt": "[UNUSED_TOKEN_146]user\n",
152
+ "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
153
+ }
154
+ }
155
+ },
156
+ "mmmu_val": {
157
+ "task": "mmmu_val",
158
+ "dataset_path": "lmms-lab/MMMU",
159
+ "test_split": "validation",
160
+ "doc_to_visual": "<function mmmu_doc_to_visual at 0x7f92f886c700>",
161
+ "doc_to_text": "<function mmmu_doc_to_text at 0x7f92f8837430>",
162
+ "doc_to_target": "answer",
163
+ "process_results": "<function mmmu_process_results at 0x7f92f876a310>",
164
+ "description": "",
165
+ "target_delimiter": " ",
166
+ "fewshot_delimiter": "\n\n",
167
+ "metric_list": [
168
+ {
169
+ "metric": "mmmu_acc",
170
+ "aggregation": "<function mmmu_aggregate_results at 0x7f92f8652280>",
171
+ "higher_is_better": true
172
+ }
173
+ ],
174
+ "output_type": "generate_until",
175
+ "generation_kwargs": {
176
+ "max_new_tokens": 128,
177
+ "until": [
178
+ "\n\n"
179
+ ],
180
+ "image_aspect_ratio": "original"
181
+ },
182
+ "repeats": 1,
183
+ "should_decontaminate": false,
184
+ "metadata": [
185
+ {
186
+ "version": 0.0
187
+ }
188
+ ],
189
+ "model_specific_generation_kwargs": {
190
+ "llava": {
191
+ "image_aspect_ratio": "original"
192
+ }
193
+ }
194
+ },
195
+ "mmstar": {
196
+ "task": "mmstar",
197
+ "dataset_path": "Lin-Chen/MMStar",
198
+ "dataset_kwargs": {
199
+ "token": true
200
+ },
201
+ "test_split": "val",
202
+ "doc_to_visual": "<function mmstar_doc_to_visual at 0x7f92e5c99d30>",
203
+ "doc_to_text": "<function mmstar_doc_to_text at 0x7f92e5ca21f0>",
204
+ "doc_to_target": "answer",
205
+ "process_results": "<function mmstar_process_results at 0x7f92e5ca2700>",
206
+ "description": "",
207
+ "target_delimiter": " ",
208
+ "fewshot_delimiter": "\n\n",
209
+ "metric_list": [
210
+ {
211
+ "metric": "coarse perception",
212
+ "aggregation": "<function mmstar_aggregate_results at 0x7f92e5ca2b80>",
213
+ "higher_is_better": true
214
+ },
215
+ {
216
+ "metric": "fine-grained perception",
217
+ "aggregation": "<function mmstar_aggregate_results at 0x7f92e5ca2f70>",
218
+ "higher_is_better": true
219
+ },
220
+ {
221
+ "metric": "instance reasoning",
222
+ "aggregation": "<function mmstar_aggregate_results at 0x7f92e5c273a0>",
223
+ "higher_is_better": true
224
+ },
225
+ {
226
+ "metric": "logical reasoning",
227
+ "aggregation": "<function mmstar_aggregate_results at 0x7f92e5c27790>",
228
+ "higher_is_better": true
229
+ },
230
+ {
231
+ "metric": "science & technology",
232
+ "aggregation": "<function mmstar_aggregate_results at 0x7f92e5c27b80>",
233
+ "higher_is_better": true
234
+ },
235
+ {
236
+ "metric": "math",
237
+ "aggregation": "<function mmstar_aggregate_results at 0x7f92e5c27f70>",
238
+ "higher_is_better": true
239
+ }
240
+ ],
241
+ "output_type": "generate_until",
242
+ "generation_kwargs": {
243
+ "until": [
244
+ "\n\n"
245
+ ],
246
+ "do_sample": false
247
+ },
248
+ "repeats": 1,
249
+ "should_decontaminate": false,
250
+ "metadata": [
251
+ {
252
+ "version": 0.0
253
+ }
254
+ ],
255
+ "model_specific_prompt_kwargs": {
256
+ "default": {
257
+ "pre_prompt": "",
258
+ "post_prompt": "\nAnswer with the option's letter from the given choices directly"
259
+ }
260
+ }
261
+ }
262
+ },
263
+ "versions": {
264
+ "mathvista_testmini": "Yaml",
265
+ "mme": "Yaml",
266
+ "mmmu_val": "Yaml",
267
+ "mmstar": "Yaml"
268
+ },
269
+ "n-shot": {
270
+ "mathvista_testmini": 0,
271
+ "mme": 0,
272
+ "mmmu_val": 0,
273
+ "mmstar": 0
274
+ },
275
+ "model_configs": {
276
+ "model": "llava",
277
+ "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-8318,conv_template=phi35",
278
+ "batch_size": "1",
279
+ "device": null,
280
+ "limit": null,
281
+ "bootstrap_iters": 100000,
282
+ "gen_kwargs": ""
283
+ },
284
+ "git_hash": "289c7fe5"
285
+ }
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2225_llava...mstar_llava_model_args_fc3596/submissions/mathvista_testmini_scores.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mathvista_testmini.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843ba4f9777b2c69442aa0ea4e48e2845ed19bb9ed320b49c8647cc3da343c28
3
+ size 45272419
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mme.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5033ba470671eeca7bf50fb890fbf8716c3cb6e2ac150839a8111b765e658d
3
+ size 94631595
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmmu_val.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47bd208990c79ee8ec97895fccc9d91a4b18e85291f2892d986c4839972eac86
3
+ size 36750492
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/mmstar.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4829e00e1150da10de6554ddf43d0512f21f6ce92e6db3d896e4ca0cab1e669
3
+ size 60427313
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/results.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mathvista_testmini": {
4
+ "gpt_eval_score,none": 23.7,
5
+ "gpt_eval_score_stderr,none": "N/A",
6
+ "alias": "mathvista_testmini"
7
+ },
8
+ "mme": {
9
+ "mme_cognition_score,none": 321.7857142857143,
10
+ "mme_cognition_score_stderr,none": "N/A",
11
+ "mme_percetion_score,none": 1418.2278911564626,
12
+ "mme_percetion_score_stderr,none": "N/A",
13
+ "alias": "mme"
14
+ },
15
+ "mmmu_val": {
16
+ "mmmu_acc,none": 0.41222,
17
+ "mmmu_acc_stderr,none": "N/A",
18
+ "alias": "mmmu_val"
19
+ },
20
+ "mmstar": {
21
+ "coarse perception,none": 0.6918706627011363,
22
+ "coarse perception_stderr,none": "N/A",
23
+ "fine-grained perception,none": 0.3625644804716286,
24
+ "fine-grained perception_stderr,none": "N/A",
25
+ "instance reasoning,none": 0.5205089434882838,
26
+ "instance reasoning_stderr,none": "N/A",
27
+ "logical reasoning,none": 0.3660535284297661,
28
+ "logical reasoning_stderr,none": "N/A",
29
+ "math,none": 0.28080727078321305,
30
+ "math_stderr,none": "N/A",
31
+ "science & technology,none": 0.19842818316868963,
32
+ "science & technology_stderr,none": "N/A",
33
+ "alias": "mmstar"
34
+ }
35
+ },
36
+ "configs": {
37
+ "mathvista_testmini": {
38
+ "task": "mathvista_testmini",
39
+ "dataset_path": "AI4Math/MathVista",
40
+ "dataset_kwargs": {
41
+ "token": true
42
+ },
43
+ "test_split": "testmini",
44
+ "doc_to_visual": "<function mathvista_doc_to_visual at 0x7f00a1460790>",
45
+ "doc_to_text": "<function mathvista_doc_to_text at 0x7f00a1469e50>",
46
+ "doc_to_target": "answer",
47
+ "process_results": "<function mathvista_process_results at 0x7f00a1475550>",
48
+ "description": "",
49
+ "target_delimiter": " ",
50
+ "fewshot_delimiter": "\n\n",
51
+ "metric_list": [
52
+ {
53
+ "metric": "gpt_eval_score",
54
+ "aggregation": "<function mathvista_aggregate_results at 0x7f00a1481c10>",
55
+ "higher_is_better": true
56
+ }
57
+ ],
58
+ "output_type": "generate_until",
59
+ "generation_kwargs": {
60
+ "until": [
61
+ "ASSISTANT:"
62
+ ],
63
+ "max_new_tokens": 1024,
64
+ "temperature": 0.0,
65
+ "top_p": 1.0,
66
+ "num_beams": 1,
67
+ "do_sample": false,
68
+ "image_aspect_ratio": "original"
69
+ },
70
+ "repeats": 1,
71
+ "should_decontaminate": false,
72
+ "model_specific_prompt_kwargs": {
73
+ "default": {
74
+ "shot_type": "format-prompt",
75
+ "shot": 0,
76
+ "use_caption": false,
77
+ "use_ocr": false
78
+ },
79
+ "phi3v": {
80
+ "shot_type": "solution"
81
+ }
82
+ },
83
+ "model_specific_generation_kwargs": {
84
+ "llava": {
85
+ "image_aspect_ratio": "original"
86
+ }
87
+ }
88
+ },
89
+ "mme": {
90
+ "task": "mme",
91
+ "dataset_path": "lmms-lab/MME",
92
+ "dataset_kwargs": {
93
+ "token": false
94
+ },
95
+ "test_split": "test",
96
+ "doc_to_visual": "<function mme_doc_to_visual at 0x7f010216f940>",
97
+ "doc_to_text": "<function mme_doc_to_text at 0x7f0100713160>",
98
+ "doc_to_target": "answer",
99
+ "process_results": "<function mme_process_results at 0x7f0100713700>",
100
+ "description": "",
101
+ "target_delimiter": " ",
102
+ "fewshot_delimiter": "\n\n",
103
+ "metric_list": [
104
+ {
105
+ "metric": "mme_percetion_score",
106
+ "aggregation": "<function mme_aggregate_results at 0x7f0100713c10>",
107
+ "higher_is_better": true
108
+ },
109
+ {
110
+ "metric": "mme_cognition_score",
111
+ "aggregation": "<function mme_aggregate_results at 0x7f010071b0d0>",
112
+ "higher_is_better": true
113
+ }
114
+ ],
115
+ "output_type": "generate_until",
116
+ "generation_kwargs": {
117
+ "max_new_tokens": 16,
118
+ "temperature": 0.0,
119
+ "top_p": 1.0,
120
+ "num_beams": 1,
121
+ "do_sample": false,
122
+ "until": [
123
+ "\n\n"
124
+ ]
125
+ },
126
+ "repeats": 1,
127
+ "should_decontaminate": false,
128
+ "metadata": [
129
+ {
130
+ "version": 0.0
131
+ }
132
+ ],
133
+ "model_specific_prompt_kwargs": {
134
+ "default": {
135
+ "pre_prompt": "",
136
+ "post_prompt": "\nAnswer the question using a single word or phrase."
137
+ },
138
+ "gpt4v": {
139
+ "pre_prompt": "",
140
+ "post_prompt": "\nAnswer the question with Yes or No."
141
+ },
142
+ "qwen_vl": {
143
+ "pre_prompt": "",
144
+ "post_prompt": " Answer:"
145
+ },
146
+ "otterhd": {
147
+ "pre_prompt": "",
148
+ "post_prompt": " Answer:"
149
+ },
150
+ "xcomposer2_4khd": {
151
+ "pre_prompt": "[UNUSED_TOKEN_146]user\n",
152
+ "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
153
+ }
154
+ }
155
+ },
156
+ "mmmu_val": {
157
+ "task": "mmmu_val",
158
+ "dataset_path": "lmms-lab/MMMU",
159
+ "test_split": "validation",
160
+ "doc_to_visual": "<function mmmu_doc_to_visual at 0x7f00b4139790>",
161
+ "doc_to_text": "<function mmmu_doc_to_text at 0x7f00b40664c0>",
162
+ "doc_to_target": "answer",
163
+ "process_results": "<function mmmu_process_results at 0x7f00b40123a0>",
164
+ "description": "",
165
+ "target_delimiter": " ",
166
+ "fewshot_delimiter": "\n\n",
167
+ "metric_list": [
168
+ {
169
+ "metric": "mmmu_acc",
170
+ "aggregation": "<function mmmu_aggregate_results at 0x7f00b3f3a310>",
171
+ "higher_is_better": true
172
+ }
173
+ ],
174
+ "output_type": "generate_until",
175
+ "generation_kwargs": {
176
+ "max_new_tokens": 128,
177
+ "until": [
178
+ "\n\n"
179
+ ],
180
+ "image_aspect_ratio": "original"
181
+ },
182
+ "repeats": 1,
183
+ "should_decontaminate": false,
184
+ "metadata": [
185
+ {
186
+ "version": 0.0
187
+ }
188
+ ],
189
+ "model_specific_generation_kwargs": {
190
+ "llava": {
191
+ "image_aspect_ratio": "original"
192
+ }
193
+ }
194
+ },
195
+ "mmstar": {
196
+ "task": "mmstar",
197
+ "dataset_path": "Lin-Chen/MMStar",
198
+ "dataset_kwargs": {
199
+ "token": true
200
+ },
201
+ "test_split": "val",
202
+ "doc_to_visual": "<function mmstar_doc_to_visual at 0x7f00a138edc0>",
203
+ "doc_to_text": "<function mmstar_doc_to_text at 0x7f00a1398280>",
204
+ "doc_to_target": "answer",
205
+ "process_results": "<function mmstar_process_results at 0x7f00a1398790>",
206
+ "description": "",
207
+ "target_delimiter": " ",
208
+ "fewshot_delimiter": "\n\n",
209
+ "metric_list": [
210
+ {
211
+ "metric": "coarse perception",
212
+ "aggregation": "<function mmstar_aggregate_results at 0x7f00a1398c10>",
213
+ "higher_is_better": true
214
+ },
215
+ {
216
+ "metric": "fine-grained perception",
217
+ "aggregation": "<function mmstar_aggregate_results at 0x7f00a131f040>",
218
+ "higher_is_better": true
219
+ },
220
+ {
221
+ "metric": "instance reasoning",
222
+ "aggregation": "<function mmstar_aggregate_results at 0x7f00a131f430>",
223
+ "higher_is_better": true
224
+ },
225
+ {
226
+ "metric": "logical reasoning",
227
+ "aggregation": "<function mmstar_aggregate_results at 0x7f00a131f820>",
228
+ "higher_is_better": true
229
+ },
230
+ {
231
+ "metric": "science & technology",
232
+ "aggregation": "<function mmstar_aggregate_results at 0x7f00a131fc10>",
233
+ "higher_is_better": true
234
+ },
235
+ {
236
+ "metric": "math",
237
+ "aggregation": "<function mmstar_aggregate_results at 0x7f00a1324040>",
238
+ "higher_is_better": true
239
+ }
240
+ ],
241
+ "output_type": "generate_until",
242
+ "generation_kwargs": {
243
+ "until": [
244
+ "\n\n"
245
+ ],
246
+ "do_sample": false
247
+ },
248
+ "repeats": 1,
249
+ "should_decontaminate": false,
250
+ "metadata": [
251
+ {
252
+ "version": 0.0
253
+ }
254
+ ],
255
+ "model_specific_prompt_kwargs": {
256
+ "default": {
257
+ "pre_prompt": "",
258
+ "post_prompt": "\nAnswer with the option's letter from the given choices directly"
259
+ }
260
+ }
261
+ }
262
+ },
263
+ "versions": {
264
+ "mathvista_testmini": "Yaml",
265
+ "mme": "Yaml",
266
+ "mmmu_val": "Yaml",
267
+ "mmstar": "Yaml"
268
+ },
269
+ "n-shot": {
270
+ "mathvista_testmini": 0,
271
+ "mme": 0,
272
+ "mmmu_val": 0,
273
+ "mmstar": 0
274
+ },
275
+ "model_configs": {
276
+ "model": "llava",
277
+ "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-12477,conv_template=phi35",
278
+ "batch_size": "1",
279
+ "device": null,
280
+ "limit": null,
281
+ "bootstrap_iters": 100000,
282
+ "gen_kwargs": ""
283
+ },
284
+ "git_hash": "289c7fe5"
285
+ }
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2237_llava...mstar_llava_model_args_82420a/submissions/mathvista_testmini_scores.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mathvista_testmini.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c597f6e7990da8a6152fa835b8fb94f55ffdf7abd8c89e3f77272a9574aa9099
3
+ size 45275763
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mme.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d1013eec1a07465ff759dc3031c2921cb4081bcc386e98c43120cc47847ee69
3
+ size 94631509
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmmu_val.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4026a47ec71284e1d594a154cb844d5aeaba76ce68754ec9b86d52078f260021
3
+ size 36750611
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/mmstar.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ccf81f0fb7a589e721f551930e15431159b8a722590a0146130265df917a3ca
3
+ size 60427594
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/results.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mathvista_testmini": {
4
+ "gpt_eval_score,none": 24.0,
5
+ "gpt_eval_score_stderr,none": "N/A",
6
+ "alias": "mathvista_testmini"
7
+ },
8
+ "mme": {
9
+ "mme_cognition_score,none": 324.2857142857143,
10
+ "mme_cognition_score_stderr,none": "N/A",
11
+ "mme_percetion_score,none": 1410.4883953581434,
12
+ "mme_percetion_score_stderr,none": "N/A",
13
+ "alias": "mme"
14
+ },
15
+ "mmmu_val": {
16
+ "mmmu_acc,none": 0.42222,
17
+ "mmmu_acc_stderr,none": "N/A",
18
+ "alias": "mmmu_val"
19
+ },
20
+ "mmstar": {
21
+ "coarse perception,none": 0.6964404085200243,
22
+ "coarse perception_stderr,none": "N/A",
23
+ "fine-grained perception,none": 0.3732928027511668,
24
+ "fine-grained perception_stderr,none": "N/A",
25
+ "instance reasoning,none": 0.5295256540272851,
26
+ "instance reasoning_stderr,none": "N/A",
27
+ "logical reasoning,none": 0.375954518528776,
28
+ "logical reasoning_stderr,none": "N/A",
29
+ "math,none": 0.3063853247794707,
30
+ "math_stderr,none": "N/A",
31
+ "science & technology,none": 0.23852115038371227,
32
+ "science & technology_stderr,none": "N/A",
33
+ "alias": "mmstar"
34
+ }
35
+ },
36
+ "configs": {
37
+ "mathvista_testmini": {
38
+ "task": "mathvista_testmini",
39
+ "dataset_path": "AI4Math/MathVista",
40
+ "dataset_kwargs": {
41
+ "token": true
42
+ },
43
+ "test_split": "testmini",
44
+ "doc_to_visual": "<function mathvista_doc_to_visual at 0x7f0e528ca700>",
45
+ "doc_to_text": "<function mathvista_doc_to_text at 0x7f0e528d3dc0>",
46
+ "doc_to_target": "answer",
47
+ "process_results": "<function mathvista_process_results at 0x7f0e528e14c0>",
48
+ "description": "",
49
+ "target_delimiter": " ",
50
+ "fewshot_delimiter": "\n\n",
51
+ "metric_list": [
52
+ {
53
+ "metric": "gpt_eval_score",
54
+ "aggregation": "<function mathvista_aggregate_results at 0x7f0e528ebb80>",
55
+ "higher_is_better": true
56
+ }
57
+ ],
58
+ "output_type": "generate_until",
59
+ "generation_kwargs": {
60
+ "until": [
61
+ "ASSISTANT:"
62
+ ],
63
+ "max_new_tokens": 1024,
64
+ "temperature": 0.0,
65
+ "top_p": 1.0,
66
+ "num_beams": 1,
67
+ "do_sample": false,
68
+ "image_aspect_ratio": "original"
69
+ },
70
+ "repeats": 1,
71
+ "should_decontaminate": false,
72
+ "model_specific_prompt_kwargs": {
73
+ "default": {
74
+ "shot_type": "format-prompt",
75
+ "shot": 0,
76
+ "use_caption": false,
77
+ "use_ocr": false
78
+ },
79
+ "phi3v": {
80
+ "shot_type": "solution"
81
+ }
82
+ },
83
+ "model_specific_generation_kwargs": {
84
+ "llava": {
85
+ "image_aspect_ratio": "original"
86
+ }
87
+ }
88
+ },
89
+ "mme": {
90
+ "task": "mme",
91
+ "dataset_path": "lmms-lab/MME",
92
+ "dataset_kwargs": {
93
+ "token": false
94
+ },
95
+ "test_split": "test",
96
+ "doc_to_visual": "<function mme_doc_to_visual at 0x7f0eb35e6940>",
97
+ "doc_to_text": "<function mme_doc_to_text at 0x7f0eb1b8a160>",
98
+ "doc_to_target": "answer",
99
+ "process_results": "<function mme_process_results at 0x7f0eb1b8a700>",
100
+ "description": "",
101
+ "target_delimiter": " ",
102
+ "fewshot_delimiter": "\n\n",
103
+ "metric_list": [
104
+ {
105
+ "metric": "mme_percetion_score",
106
+ "aggregation": "<function mme_aggregate_results at 0x7f0eb1b8ac10>",
107
+ "higher_is_better": true
108
+ },
109
+ {
110
+ "metric": "mme_cognition_score",
111
+ "aggregation": "<function mme_aggregate_results at 0x7f0eb1b920d0>",
112
+ "higher_is_better": true
113
+ }
114
+ ],
115
+ "output_type": "generate_until",
116
+ "generation_kwargs": {
117
+ "max_new_tokens": 16,
118
+ "temperature": 0.0,
119
+ "top_p": 1.0,
120
+ "num_beams": 1,
121
+ "do_sample": false,
122
+ "until": [
123
+ "\n\n"
124
+ ]
125
+ },
126
+ "repeats": 1,
127
+ "should_decontaminate": false,
128
+ "metadata": [
129
+ {
130
+ "version": 0.0
131
+ }
132
+ ],
133
+ "model_specific_prompt_kwargs": {
134
+ "default": {
135
+ "pre_prompt": "",
136
+ "post_prompt": "\nAnswer the question using a single word or phrase."
137
+ },
138
+ "gpt4v": {
139
+ "pre_prompt": "",
140
+ "post_prompt": "\nAnswer the question with Yes or No."
141
+ },
142
+ "qwen_vl": {
143
+ "pre_prompt": "",
144
+ "post_prompt": " Answer:"
145
+ },
146
+ "otterhd": {
147
+ "pre_prompt": "",
148
+ "post_prompt": " Answer:"
149
+ },
150
+ "xcomposer2_4khd": {
151
+ "pre_prompt": "[UNUSED_TOKEN_146]user\n",
152
+ "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
153
+ }
154
+ }
155
+ },
156
+ "mmmu_val": {
157
+ "task": "mmmu_val",
158
+ "dataset_path": "lmms-lab/MMMU",
159
+ "test_split": "validation",
160
+ "doc_to_visual": "<function mmmu_doc_to_visual at 0x7f0e650c3700>",
161
+ "doc_to_text": "<function mmmu_doc_to_text at 0x7f0e6507a430>",
162
+ "doc_to_target": "answer",
163
+ "process_results": "<function mmmu_process_results at 0x7f0e64f7a310>",
164
+ "description": "",
165
+ "target_delimiter": " ",
166
+ "fewshot_delimiter": "\n\n",
167
+ "metric_list": [
168
+ {
169
+ "metric": "mmmu_acc",
170
+ "aggregation": "<function mmmu_aggregate_results at 0x7f0e64e31280>",
171
+ "higher_is_better": true
172
+ }
173
+ ],
174
+ "output_type": "generate_until",
175
+ "generation_kwargs": {
176
+ "max_new_tokens": 128,
177
+ "until": [
178
+ "\n\n"
179
+ ],
180
+ "image_aspect_ratio": "original"
181
+ },
182
+ "repeats": 1,
183
+ "should_decontaminate": false,
184
+ "metadata": [
185
+ {
186
+ "version": 0.0
187
+ }
188
+ ],
189
+ "model_specific_generation_kwargs": {
190
+ "llava": {
191
+ "image_aspect_ratio": "original"
192
+ }
193
+ }
194
+ },
195
+ "mmstar": {
196
+ "task": "mmstar",
197
+ "dataset_path": "Lin-Chen/MMStar",
198
+ "dataset_kwargs": {
199
+ "token": true
200
+ },
201
+ "test_split": "val",
202
+ "doc_to_visual": "<function mmstar_doc_to_visual at 0x7f0e527fcd30>",
203
+ "doc_to_text": "<function mmstar_doc_to_text at 0x7f0e528021f0>",
204
+ "doc_to_target": "answer",
205
+ "process_results": "<function mmstar_process_results at 0x7f0e52802700>",
206
+ "description": "",
207
+ "target_delimiter": " ",
208
+ "fewshot_delimiter": "\n\n",
209
+ "metric_list": [
210
+ {
211
+ "metric": "coarse perception",
212
+ "aggregation": "<function mmstar_aggregate_results at 0x7f0e52802b80>",
213
+ "higher_is_better": true
214
+ },
215
+ {
216
+ "metric": "fine-grained perception",
217
+ "aggregation": "<function mmstar_aggregate_results at 0x7f0e52802f70>",
218
+ "higher_is_better": true
219
+ },
220
+ {
221
+ "metric": "instance reasoning",
222
+ "aggregation": "<function mmstar_aggregate_results at 0x7f0e527893a0>",
223
+ "higher_is_better": true
224
+ },
225
+ {
226
+ "metric": "logical reasoning",
227
+ "aggregation": "<function mmstar_aggregate_results at 0x7f0e52789790>",
228
+ "higher_is_better": true
229
+ },
230
+ {
231
+ "metric": "science & technology",
232
+ "aggregation": "<function mmstar_aggregate_results at 0x7f0e52789b80>",
233
+ "higher_is_better": true
234
+ },
235
+ {
236
+ "metric": "math",
237
+ "aggregation": "<function mmstar_aggregate_results at 0x7f0e52789f70>",
238
+ "higher_is_better": true
239
+ }
240
+ ],
241
+ "output_type": "generate_until",
242
+ "generation_kwargs": {
243
+ "until": [
244
+ "\n\n"
245
+ ],
246
+ "do_sample": false
247
+ },
248
+ "repeats": 1,
249
+ "should_decontaminate": false,
250
+ "metadata": [
251
+ {
252
+ "version": 0.0
253
+ }
254
+ ],
255
+ "model_specific_prompt_kwargs": {
256
+ "default": {
257
+ "pre_prompt": "",
258
+ "post_prompt": "\nAnswer with the option's letter from the given choices directly"
259
+ }
260
+ }
261
+ }
262
+ },
263
+ "versions": {
264
+ "mathvista_testmini": "Yaml",
265
+ "mme": "Yaml",
266
+ "mmmu_val": "Yaml",
267
+ "mmstar": "Yaml"
268
+ },
269
+ "n-shot": {
270
+ "mathvista_testmini": 0,
271
+ "mme": 0,
272
+ "mmmu_val": 0,
273
+ "mmstar": 0
274
+ },
275
+ "model_configs": {
276
+ "model": "llava",
277
+ "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-16636,conv_template=phi35",
278
+ "batch_size": "1",
279
+ "device": null,
280
+ "limit": null,
281
+ "bootstrap_iters": 100000,
282
+ "gen_kwargs": ""
283
+ },
284
+ "git_hash": "289c7fe5"
285
+ }
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2249_llava...mstar_llava_model_args_fe2b4a/submissions/mathvista_testmini_scores.json ADDED
The diff for this file is too large to render. See raw diff
 
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mathvista_testmini.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86378d064486a95db78016ad5425190064d153e0f9021768efc7fa43820edd8c
3
+ size 45276045
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mme.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a82efa94480755a72c2a10eda7fb180405a023f189abefceb5ffb7063f342426
3
+ size 94631601
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmmu_val.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c498ee36877cbd7e8a0700b0d8e55d660a8b97813d5104e13a420f69cd4e9b
3
+ size 36750667
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/mmstar.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5965c6560c55dd4db61a68e735a651305207df081f43f026b6c609641ccd2348
3
+ size 60427706
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank0_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 0 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/rank1_metric_eval_done.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ rank 1 eval done
sft/665K36/revise_Full_smoe_sharev3/analysts/0717_2301_llava...mstar_llava_model_args_fe4e53/results.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "mathvista_testmini": {
4
+ "gpt_eval_score,none": 24.0,
5
+ "gpt_eval_score_stderr,none": "N/A",
6
+ "alias": "mathvista_testmini"
7
+ },
8
+ "mme": {
9
+ "mme_cognition_score,none": 305.3571428571429,
10
+ "mme_cognition_score_stderr,none": "N/A",
11
+ "mme_percetion_score,none": 1400.8141256502602,
12
+ "mme_percetion_score_stderr,none": "N/A",
13
+ "alias": "mme"
14
+ },
15
+ "mmmu_val": {
16
+ "mmmu_acc,none": 0.42444,
17
+ "mmmu_acc_stderr,none": "N/A",
18
+ "alias": "mmmu_val"
19
+ },
20
+ "mmstar": {
21
+ "coarse perception,none": 0.7007139127935286,
22
+ "coarse perception_stderr,none": "N/A",
23
+ "fine-grained perception,none": 0.36133628101203635,
24
+ "fine-grained perception_stderr,none": "N/A",
25
+ "instance reasoning,none": 0.5332709723793451,
26
+ "instance reasoning_stderr,none": "N/A",
27
+ "logical reasoning,none": 0.37952487556447956,
28
+ "logical reasoning_stderr,none": "N/A",
29
+ "math,none": 0.2904916243428673,
30
+ "math_stderr,none": "N/A",
31
+ "science & technology,none": 0.2396233390154238,
32
+ "science & technology_stderr,none": "N/A",
33
+ "alias": "mmstar"
34
+ }
35
+ },
36
+ "configs": {
37
+ "mathvista_testmini": {
38
+ "task": "mathvista_testmini",
39
+ "dataset_path": "AI4Math/MathVista",
40
+ "dataset_kwargs": {
41
+ "token": true
42
+ },
43
+ "test_split": "testmini",
44
+ "doc_to_visual": "<function mathvista_doc_to_visual at 0x7f60bd21f700>",
45
+ "doc_to_text": "<function mathvista_doc_to_text at 0x7f60bd228dc0>",
46
+ "doc_to_target": "answer",
47
+ "process_results": "<function mathvista_process_results at 0x7f60bd2364c0>",
48
+ "description": "",
49
+ "target_delimiter": " ",
50
+ "fewshot_delimiter": "\n\n",
51
+ "metric_list": [
52
+ {
53
+ "metric": "gpt_eval_score",
54
+ "aggregation": "<function mathvista_aggregate_results at 0x7f60bd241b80>",
55
+ "higher_is_better": true
56
+ }
57
+ ],
58
+ "output_type": "generate_until",
59
+ "generation_kwargs": {
60
+ "until": [
61
+ "ASSISTANT:"
62
+ ],
63
+ "max_new_tokens": 1024,
64
+ "temperature": 0.0,
65
+ "top_p": 1.0,
66
+ "num_beams": 1,
67
+ "do_sample": false,
68
+ "image_aspect_ratio": "original"
69
+ },
70
+ "repeats": 1,
71
+ "should_decontaminate": false,
72
+ "model_specific_prompt_kwargs": {
73
+ "default": {
74
+ "shot_type": "format-prompt",
75
+ "shot": 0,
76
+ "use_caption": false,
77
+ "use_ocr": false
78
+ },
79
+ "phi3v": {
80
+ "shot_type": "solution"
81
+ }
82
+ },
83
+ "model_specific_generation_kwargs": {
84
+ "llava": {
85
+ "image_aspect_ratio": "original"
86
+ }
87
+ }
88
+ },
89
+ "mme": {
90
+ "task": "mme",
91
+ "dataset_path": "lmms-lab/MME",
92
+ "dataset_kwargs": {
93
+ "token": false
94
+ },
95
+ "test_split": "test",
96
+ "doc_to_visual": "<function mme_doc_to_visual at 0x7f611df29940>",
97
+ "doc_to_text": "<function mme_doc_to_text at 0x7f611c4cd160>",
98
+ "doc_to_target": "answer",
99
+ "process_results": "<function mme_process_results at 0x7f611c4cd700>",
100
+ "description": "",
101
+ "target_delimiter": " ",
102
+ "fewshot_delimiter": "\n\n",
103
+ "metric_list": [
104
+ {
105
+ "metric": "mme_percetion_score",
106
+ "aggregation": "<function mme_aggregate_results at 0x7f611c4cdc10>",
107
+ "higher_is_better": true
108
+ },
109
+ {
110
+ "metric": "mme_cognition_score",
111
+ "aggregation": "<function mme_aggregate_results at 0x7f611c4d50d0>",
112
+ "higher_is_better": true
113
+ }
114
+ ],
115
+ "output_type": "generate_until",
116
+ "generation_kwargs": {
117
+ "max_new_tokens": 16,
118
+ "temperature": 0.0,
119
+ "top_p": 1.0,
120
+ "num_beams": 1,
121
+ "do_sample": false,
122
+ "until": [
123
+ "\n\n"
124
+ ]
125
+ },
126
+ "repeats": 1,
127
+ "should_decontaminate": false,
128
+ "metadata": [
129
+ {
130
+ "version": 0.0
131
+ }
132
+ ],
133
+ "model_specific_prompt_kwargs": {
134
+ "default": {
135
+ "pre_prompt": "",
136
+ "post_prompt": "\nAnswer the question using a single word or phrase."
137
+ },
138
+ "gpt4v": {
139
+ "pre_prompt": "",
140
+ "post_prompt": "\nAnswer the question with Yes or No."
141
+ },
142
+ "qwen_vl": {
143
+ "pre_prompt": "",
144
+ "post_prompt": " Answer:"
145
+ },
146
+ "otterhd": {
147
+ "pre_prompt": "",
148
+ "post_prompt": " Answer:"
149
+ },
150
+ "xcomposer2_4khd": {
151
+ "pre_prompt": "[UNUSED_TOKEN_146]user\n",
152
+ "post_prompt": " Answer this question briefly[UNUSED_TOKEN_145]\n[UNUSED_TOKEN_146]assistant\n"
153
+ }
154
+ }
155
+ },
156
+ "mmmu_val": {
157
+ "task": "mmmu_val",
158
+ "dataset_path": "lmms-lab/MMMU",
159
+ "test_split": "validation",
160
+ "doc_to_visual": "<function mmmu_doc_to_visual at 0x7f60cfa0b700>",
161
+ "doc_to_text": "<function mmmu_doc_to_text at 0x7f60cf9bb430>",
162
+ "doc_to_target": "answer",
163
+ "process_results": "<function mmmu_process_results at 0x7f60cf8b3310>",
164
+ "description": "",
165
+ "target_delimiter": " ",
166
+ "fewshot_delimiter": "\n\n",
167
+ "metric_list": [
168
+ {
169
+ "metric": "mmmu_acc",
170
+ "aggregation": "<function mmmu_aggregate_results at 0x7f60cf834280>",
171
+ "higher_is_better": true
172
+ }
173
+ ],
174
+ "output_type": "generate_until",
175
+ "generation_kwargs": {
176
+ "max_new_tokens": 128,
177
+ "until": [
178
+ "\n\n"
179
+ ],
180
+ "image_aspect_ratio": "original"
181
+ },
182
+ "repeats": 1,
183
+ "should_decontaminate": false,
184
+ "metadata": [
185
+ {
186
+ "version": 0.0
187
+ }
188
+ ],
189
+ "model_specific_generation_kwargs": {
190
+ "llava": {
191
+ "image_aspect_ratio": "original"
192
+ }
193
+ }
194
+ },
195
+ "mmstar": {
196
+ "task": "mmstar",
197
+ "dataset_path": "Lin-Chen/MMStar",
198
+ "dataset_kwargs": {
199
+ "token": true
200
+ },
201
+ "test_split": "val",
202
+ "doc_to_visual": "<function mmstar_doc_to_visual at 0x7f60bd150d30>",
203
+ "doc_to_text": "<function mmstar_doc_to_text at 0x7f60bd1581f0>",
204
+ "doc_to_target": "answer",
205
+ "process_results": "<function mmstar_process_results at 0x7f60bd158700>",
206
+ "description": "",
207
+ "target_delimiter": " ",
208
+ "fewshot_delimiter": "\n\n",
209
+ "metric_list": [
210
+ {
211
+ "metric": "coarse perception",
212
+ "aggregation": "<function mmstar_aggregate_results at 0x7f60bd158b80>",
213
+ "higher_is_better": true
214
+ },
215
+ {
216
+ "metric": "fine-grained perception",
217
+ "aggregation": "<function mmstar_aggregate_results at 0x7f60bd158f70>",
218
+ "higher_is_better": true
219
+ },
220
+ {
221
+ "metric": "instance reasoning",
222
+ "aggregation": "<function mmstar_aggregate_results at 0x7f60bd0de3a0>",
223
+ "higher_is_better": true
224
+ },
225
+ {
226
+ "metric": "logical reasoning",
227
+ "aggregation": "<function mmstar_aggregate_results at 0x7f60bd0de790>",
228
+ "higher_is_better": true
229
+ },
230
+ {
231
+ "metric": "science & technology",
232
+ "aggregation": "<function mmstar_aggregate_results at 0x7f60bd0deb80>",
233
+ "higher_is_better": true
234
+ },
235
+ {
236
+ "metric": "math",
237
+ "aggregation": "<function mmstar_aggregate_results at 0x7f60bd0def70>",
238
+ "higher_is_better": true
239
+ }
240
+ ],
241
+ "output_type": "generate_until",
242
+ "generation_kwargs": {
243
+ "until": [
244
+ "\n\n"
245
+ ],
246
+ "do_sample": false
247
+ },
248
+ "repeats": 1,
249
+ "should_decontaminate": false,
250
+ "metadata": [
251
+ {
252
+ "version": 0.0
253
+ }
254
+ ],
255
+ "model_specific_prompt_kwargs": {
256
+ "default": {
257
+ "pre_prompt": "",
258
+ "post_prompt": "\nAnswer with the option's letter from the given choices directly"
259
+ }
260
+ }
261
+ }
262
+ },
263
+ "versions": {
264
+ "mathvista_testmini": "Yaml",
265
+ "mme": "Yaml",
266
+ "mmmu_val": "Yaml",
267
+ "mmstar": "Yaml"
268
+ },
269
+ "n-shot": {
270
+ "mathvista_testmini": 0,
271
+ "mme": 0,
272
+ "mmmu_val": 0,
273
+ "mmstar": 0
274
+ },
275
+ "model_configs": {
276
+ "model": "llava",
277
+ "model_args": "pretrained=/cm/archive/namnv78_new/revise_checkpoints/Xphi35-siglip224/SMOE/665K36/revise_Full_smoe_sharev3/checkpoint-20791,conv_template=phi35",
278
+ "batch_size": "1",
279
+ "device": null,
280
+ "limit": null,
281
+ "bootstrap_iters": 100000,
282
+ "gen_kwargs": ""
283
+ },
284
+ "git_hash": "289c7fe5"
285
+ }