diff --git a/.gitattributes b/.gitattributes index 12ae3ac19ddf750be8eb15c631164db1098cc1f0..dafdfd192f82399957ed9ed8ade0162a8a06ae20 100644 --- a/.gitattributes +++ b/.gitattributes @@ -37,3 +37,13 @@ llava_base/llava_base_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge llava_base_v2/llava_base_v2_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_phi_c01/llava_phi_c01_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_phi_c02/llava_phi_c02_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_phi_c03/llava_phi_c03_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_phi_c05/llava_phi_c05_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_phi_c06/llava_phi_c06_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_phi_c09/llava_phi_c09_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +llava_phi_c10/llava_phi_c10_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text +vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG.xlsx filter=lfs diff=lfs merge=lfs -text +vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_openai_result.xlsx filter=lfs diff=lfs merge=lfs -text diff --git a/llava_base/.ipynb_checkpoints/llava_base_MME_score-checkpoint.csv b/llava_base/.ipynb_checkpoints/llava_base_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..72cd7fb8ad6cc80cc1e1c6c7bca03dd18794307b --- /dev/null +++ b/llava_base/.ipynb_checkpoints/llava_base_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1259.7040816326532","214.28571428571428","130.0","108.0","105.0","22.5","173.33333333333334","104.28571428571428","125.0","143.33333333333331","75.0","12.5","133.33333333333334","110.20408163265304","156.5","75.0" diff --git a/llava_base/.ipynb_checkpoints/llava_base_POPE_score-checkpoint.csv b/llava_base/.ipynb_checkpoints/llava_base_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..dec5b5ebf8f97ed2375ea39e8cd164bc2ef63b68 --- /dev/null +++ b/llava_base/.ipynb_checkpoints/llava_base_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","81.02643856920683","75.3","97.20149253731343","69.46666666666667" +"popular","81.12105877773453","74.0","97.47427502338635","69.46666666666667" +"random","81.59749412685983","74.6","98.86148007590133","69.46666666666667" +"adversarial","80.37022753567297","77.3","95.33394327538883","69.46666666666667" diff --git a/llava_base/.ipynb_checkpoints/llava_base_SEEDBench_IMG_acc-checkpoint.csv b/llava_base/.ipynb_checkpoints/llava_base_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..90418ad3a5eceeb66a54623132a3388c3c73e2e8 --- /dev/null +++ b/llava_base/.ipynb_checkpoints/llava_base_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5265598650927488","0.5091417509141751","0.590387766247952","0.6597938144329897","0.5040899795501023","0.27257866775643647","0.7178594046865104","0.4337899543378995","0.2857142857142857","0.743202416918429" diff --git a/llava_base/.ipynb_checkpoints/llava_base_ScienceQA_VAL_acc-checkpoint.csv b/llava_base/.ipynb_checkpoints/llava_base_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..265ee3e55d49b25a5ed9f65cb8e3f2bf3b9b887e --- /dev/null +++ b/llava_base/.ipynb_checkpoints/llava_base_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.5970433953266572","0.625","1.0","1.0","1.0","1.0","1.0","0.5869565217391305","0.6190476190476191","0.53125","1.0","0.125","0.5833333333333334","0.8723404255319149","1.0","0.21428571428571427","0.8","1.0","0.9032258064516129","0.6","0.0","0.5","1.0","0.52","0.7611940298507462","0.9811320754716981","0.1794871794871795","0.8","0.29411764705882354","0.1276595744680851","0.4225352112676056","1.0","1.0","0.8","0.23308270676691728","0.3387096774193548","0.646551724137931","0.5","0.5","0.19148936170212766","0.9","0.5","0.5147058823529411","0.5555555555555556","0.6046511627906976","0.6","0.8","0.75","1.0","0.926829268292683","0.5","0.8571428571428571","0.373134328358209","0.949685534591195","0.3333333333333333","0.8947368421052632","0.6666666666666666","0.13333333333333333","1.0","0.0","1.0","0.4444444444444444","0.19607843137254902","0.5555555555555556","1.0","0.34375","0.6666666666666666" diff --git a/llava_base/.ipynb_checkpoints/llava_base_TextVQA_VAL_acc-checkpoint.csv b/llava_base/.ipynb_checkpoints/llava_base_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..869088f6e6e914f7ee4b920a607db9593d1c7402 --- /dev/null +++ b/llava_base/.ipynb_checkpoints/llava_base_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"15.498000000000001" diff --git a/llava_base/llava_base_AI2D_TEST.xlsx b/llava_base/llava_base_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..108e62bda2679c9998d812ea379d360b9a62b803 Binary files /dev/null and b/llava_base/llava_base_AI2D_TEST.xlsx differ diff --git a/llava_base/llava_base_AI2D_TEST_acc.csv b/llava_base/llava_base_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..25db4d959b2143e4a7c36c5020afbf2844ad2301 --- /dev/null +++ b/llava_base/llava_base_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.4413860103626943","0.375","0.5806451612903226","0.5357142857142857","0.4750430292598967","0.37170263788968827","0.3249097472924188","0.5133470225872689","0.4807692307692308","0.379746835443038","0.34328358208955223","0.5121951219512195","0.6111111111111112","0.3848396501457726","0.5625","0.4090909090909091" diff --git a/llava_base/llava_base_AI2D_TEST_openai_result.pkl b/llava_base/llava_base_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..54888c204955c4cf42a59e11f5b6232c3fc9e8e9 --- /dev/null +++ b/llava_base/llava_base_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a91a41f4cd9c52ccdd9bc8e420b5d453663fc641f4e8c42089701d5608dbd5 +size 241186 diff --git a/llava_base/llava_base_AI2D_TEST_openai_result.xlsx b/llava_base/llava_base_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..cfcb1a8b9e8a1cf99a13feb797d80dc0d3508547 Binary files /dev/null and b/llava_base/llava_base_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_base/llava_base_HallusionBench.xlsx b/llava_base/llava_base_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..368fbf493f5e055348707f2683425dfa003fe1d6 Binary files /dev/null and b/llava_base/llava_base_HallusionBench.xlsx differ diff --git a/llava_base/llava_base_HallusionBench_PREV.pkl b/llava_base/llava_base_HallusionBench_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cdcf98536dace7a68351787598119e909feeb174 --- /dev/null +++ b/llava_base/llava_base_HallusionBench_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c368882dba8fcf9cd24d9d56b278a37bcee7b46e25e3419009d638c9f4bb119a +size 127440 diff --git a/llava_base/llava_base_HallusionBench_auxmatch.xlsx b/llava_base/llava_base_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d921d12aa20bf15257973c8297a3e061127148f1 Binary files /dev/null and b/llava_base/llava_base_HallusionBench_auxmatch.xlsx differ diff --git a/llava_base/llava_base_HallusionBench_score.csv b/llava_base/llava_base_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..5872903942569a40636febb09650fb3c67970b81 --- /dev/null +++ b/llava_base/llava_base_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","33.64879074658255","15.31791907514451","12.087912087912088" +"VD","38.40947546531303","21.304347826086957","15.884476534296029" +"VS","25.833333333333336","3.4482758620689653","6.179775280898876" +"VD_video","15.294117647058824","2.083333333333333","1.4492753623188406" +"VS_chart","22.30769230769231","0.0","9.210526315789473" +"VD_illusion","48.61111111111111","22.58064516129032","11.11111111111111" +"VD_figure","50.0","29.268292682926827","23.076923076923077" +"VS_table","23.214285714285715","0.0","0.0" +"VD_ocr","59.55056179775281","46.51162790697674","46.51162790697674" +"VS_map","29.6875","0.0","3.125" +"VS_ocr","35.18518518518518","15.384615384615385","11.11111111111111" +"VD_math","35.18518518518518","5.555555555555555","11.11111111111111" diff --git a/llava_base/llava_base_MME.xlsx b/llava_base/llava_base_MME.xlsx index b124fde839debc3752cf518b5e9dff41b5dfcf41..ebae6f39ba3fabb0f2a5e568e36205fd378751be 100644 Binary files a/llava_base/llava_base_MME.xlsx and b/llava_base/llava_base_MME.xlsx differ diff --git a/llava_base/llava_base_MME_auxmatch.xlsx b/llava_base/llava_base_MME_auxmatch.xlsx index 9054aa032eccdac4367fb209d88a36d287d05f68..df6ead2f351a44ccfeb309a0dd3a141663d638ee 100644 Binary files a/llava_base/llava_base_MME_auxmatch.xlsx and b/llava_base/llava_base_MME_auxmatch.xlsx differ diff --git a/llava_base/llava_base_POPE.xlsx b/llava_base/llava_base_POPE.xlsx index 05dd8560f5c963d50c0c07d95e29ed76a91e1c7a..e0b92d175e07ec3dec3cef46f5d6e8dc3366d32a 100644 Binary files a/llava_base/llava_base_POPE.xlsx and b/llava_base/llava_base_POPE.xlsx differ diff --git a/llava_base/llava_base_POPE_PREV.pkl b/llava_base/llava_base_POPE_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..36b3265f1093e4eefead28a880b93362fba3866e --- /dev/null +++ b/llava_base/llava_base_POPE_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d2867aacdff31a0e6b1459610a9123dc7e91a3de36c37d1be73e8aff3a089fe +size 42809 diff --git a/llava_base/llava_base_POPE_auxmatch.xlsx b/llava_base/llava_base_POPE_auxmatch.xlsx index ca92fce653f6ad9f78cfd0aa5dd2545d9e70131d..cc922fb892fe15f0a9957facf4b83369df832330 100644 Binary files a/llava_base/llava_base_POPE_auxmatch.xlsx and b/llava_base/llava_base_POPE_auxmatch.xlsx differ diff --git a/llava_base/llava_base_POPE_score.csv b/llava_base/llava_base_POPE_score.csv index dfa7aaeddb79377758dcdd1af7981184f28dd9f6..6509bbabd462d4fa1ac3ecf875359253da090eb4 100644 --- a/llava_base/llava_base_POPE_score.csv +++ b/llava_base/llava_base_POPE_score.csv @@ -1,5 +1,5 @@ "split","Overall","acc","precision","recall" "Overall","81.02643856920683","75.3","97.20149253731343","69.46666666666667" -"adversarial","80.37022753567297","77.3","95.33394327538883","69.46666666666667" "popular","81.12105877773453","74.0","97.47427502338635","69.46666666666667" +"adversarial","80.37022753567297","77.3","95.33394327538883","69.46666666666667" "random","81.59749412685983","74.6","98.86148007590133","69.46666666666667" diff --git a/llava_base/llava_base_Q-Bench1_VAL.xlsx b/llava_base/llava_base_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6c496394103eb768f00f1d08be2c76b6f32d2936 Binary files /dev/null and b/llava_base/llava_base_Q-Bench1_VAL.xlsx differ diff --git a/llava_base/llava_base_Q-Bench1_VAL_acc.csv b/llava_base/llava_base_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..66f9d8c3961178310297cd97805d7976c3fe15ba --- /dev/null +++ b/llava_base/llava_base_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5351170568561873","0.526595744680851","0.5485714285714286","0.5128205128205128","0.6571428571428571","0.3","0.6696428571428571","0.53","0.6","0.5113636363636364","0.5793103448275863","0.4942528735632184","0.6470588235294118" diff --git a/llava_base/llava_base_Q-Bench1_VAL_openai_result.pkl b/llava_base/llava_base_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..987b6f962822dc3aa0dbc306a291648261d47319 --- /dev/null +++ b/llava_base/llava_base_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28991b05fdf580881649ed2e8e681aca2cc72490b747178deefa9b7d37952698 +size 93585 diff --git a/llava_base/llava_base_Q-Bench1_VAL_openai_result.xlsx b/llava_base/llava_base_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e719a3871fe92d79cda6cc4415f1ae6fb51f3f33 Binary files /dev/null and b/llava_base/llava_base_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_base/llava_base_RealWorldQA.xlsx b/llava_base/llava_base_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e745328586ef9066feac62a218b2e231e0263849 Binary files /dev/null and b/llava_base/llava_base_RealWorldQA.xlsx differ diff --git a/llava_base/llava_base_RealWorldQA_PREV.pkl b/llava_base/llava_base_RealWorldQA_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3857d1781d6f8e8d923614d6122be543042e2599 --- /dev/null +++ b/llava_base/llava_base_RealWorldQA_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:053811597f0f10d8022535ce9ec32b6e3ab8daeb44f8694c5b7932e2083e60e3 +size 3826 diff --git a/llava_base/llava_base_RealWorldQA_acc.csv b/llava_base/llava_base_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..3a149dedde5a6be6e92cd556e30080beb161f469 --- /dev/null +++ b/llava_base/llava_base_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.39738562091503266" diff --git a/llava_base/llava_base_RealWorldQA_openai_result.pkl b/llava_base/llava_base_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9fbb67e5577618b5a22ee1e5a3d128a7467aca05 --- /dev/null +++ b/llava_base/llava_base_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a857ca6130723ceff478d8434ebfe6b22ef785fd027403cc8276558652d9c22 +size 59088 diff --git a/llava_base/llava_base_RealWorldQA_openai_result.xlsx b/llava_base/llava_base_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..83953e9a12f59e1b8dc5b6b11b4c6c40a5488f4d Binary files /dev/null and b/llava_base/llava_base_RealWorldQA_openai_result.xlsx differ diff --git a/llava_base/llava_base_SEEDBench_IMG.xlsx b/llava_base/llava_base_SEEDBench_IMG.xlsx index d2f7457730d68db265ae283c9f12d56291056d66..772177963383ed9dcd1e5f9dc4b9bb2fa3526361 100644 Binary files a/llava_base/llava_base_SEEDBench_IMG.xlsx and b/llava_base/llava_base_SEEDBench_IMG.xlsx differ diff --git a/llava_base/llava_base_SEEDBench_IMG_openai_result.xlsx b/llava_base/llava_base_SEEDBench_IMG_openai_result.xlsx index 81b5935e81d352042edc7f4639d3b9e617d41dc3..70150ad911b1a05e7ddfd7b07c3d3c2553fcf0ce 100644 --- a/llava_base/llava_base_SEEDBench_IMG_openai_result.xlsx +++ b/llava_base/llava_base_SEEDBench_IMG_openai_result.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7b848ae407b25ea999e8e77dac01c39f68d147660cfdc70f0a9bf556535bb133 -size 1061587 +oid sha256:9bcdd6c6c6e008a04008cc8af05b6fa5ace113a2e291a0a06903c7ea3032761a +size 1061588 diff --git a/llava_base/llava_base_ScienceQA_VAL.xlsx b/llava_base/llava_base_ScienceQA_VAL.xlsx index 2e6599e3babb6768c7c301cb7f8446ba6dca01ee..f24df781d2e4cc5e4f90847f56bc3d18ed12f4b2 100644 Binary files a/llava_base/llava_base_ScienceQA_VAL.xlsx and b/llava_base/llava_base_ScienceQA_VAL.xlsx differ diff --git a/llava_base/llava_base_ScienceQA_VAL_PREV.pkl b/llava_base/llava_base_ScienceQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..00c5ea97cc6da44c9607398f21a823fcf5ed48b0 --- /dev/null +++ b/llava_base/llava_base_ScienceQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362aa0fb5597a075efb0989640491d8dabc7134d2eb21dc7f56783bcf976e4e2 +size 10875 diff --git a/llava_base/llava_base_ScienceQA_VAL_openai_result.xlsx b/llava_base/llava_base_ScienceQA_VAL_openai_result.xlsx index 8891047dc745bec089739e031815d0d211b3023b..3ca041dd94362ca8a1673b3fd0ec6153f0cbd1ba 100644 Binary files a/llava_base/llava_base_ScienceQA_VAL_openai_result.xlsx and b/llava_base/llava_base_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_base/llava_base_TextVQA_VAL.xlsx b/llava_base/llava_base_TextVQA_VAL.xlsx index 82f7be7ab41e406788c83b4d82411e36d854fbc0..7ce1cd3d3f7d36066944e438c81433619ca95fbf 100644 Binary files a/llava_base/llava_base_TextVQA_VAL.xlsx and b/llava_base/llava_base_TextVQA_VAL.xlsx differ diff --git a/llava_base/llava_base_TextVQA_VAL_PREV.pkl b/llava_base/llava_base_TextVQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a2d9677727ec66dbc87f01babbb24d119e069b95 --- /dev/null +++ b/llava_base/llava_base_TextVQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9596b33c0105a0daac9fd01eb1470408225ad2c140362535ae27051b5910cd15 +size 47012 diff --git a/llava_base_v2/.ipynb_checkpoints/llava_base_v2_HallusionBench_score-checkpoint.csv b/llava_base_v2/.ipynb_checkpoints/llava_base_v2_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..afbe2c6ff867726f707497097167de4318657167 --- /dev/null +++ b/llava_base_v2/.ipynb_checkpoints/llava_base_v2_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","44.05888538380652","18.20809248554913","15.604395604395604" +"VD","48.73096446700508","23.47826086956522","19.855595667870034" +"VS","36.388888888888886","7.758620689655173","8.98876404494382" +"VS_map","43.75","13.636363636363635","6.25" +"VD_illusion","54.861111111111114","20.967741935483872","15.277777777777779" +"VD_ocr","57.30337078651685","39.53488372093023","30.23255813953488" +"VD_math","52.77777777777778","19.444444444444446","27.77777777777778" +"VD_video","30.0","2.083333333333333","5.797101449275362" +"VD_figure","62.5","39.02439024390244","30.76923076923077" +"VS_chart","29.230769230769234","2.5","10.526315789473683" +"VS_ocr","40.74074074074074","15.384615384615385","11.11111111111111" +"VS_table","38.392857142857146","3.571428571428571","6.976744186046512" diff --git a/llava_base_v2/.ipynb_checkpoints/llava_base_v2_MME_score-checkpoint.csv b/llava_base_v2/.ipynb_checkpoints/llava_base_v2_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..ee4503300ae3238ddbfdf151a2662b3fa0a7917c --- /dev/null +++ b/llava_base_v2/.ipynb_checkpoints/llava_base_v2_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1268.2366946778711","265.0","95.0","110.5","69.11764705882354","57.5","185.0","115.0","153.33333333333334","190.0","56.0","47.5","128.33333333333334","130.95238095238096","150.0","45.0" diff --git a/llava_base_v2/.ipynb_checkpoints/llava_base_v2_ScienceQA_VAL_acc-checkpoint.csv b/llava_base_v2/.ipynb_checkpoints/llava_base_v2_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..dc4d46dbc159a80025eb6c1ae60bf4543957d944 --- /dev/null +++ b/llava_base_v2/.ipynb_checkpoints/llava_base_v2_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6547448736289938","0.84375","1.0","1.0","1.0","0.5","1.0","0.5","0.6904761904761905","0.59375","1.0","0.375","0.7619047619047619","0.9361702127659575","1.0","0.5238095238095238","0.8","1.0","1.0","0.6","0.0","0.5","1.0","0.6","0.7164179104477612","1.0","0.4358974358974359","0.68","0.47058823529411764","0.0851063829787234","0.4647887323943662","1.0","1.0","1.0","0.2781954887218045","0.5483870967741935","0.8017241379310345","0.5","0.5","0.2765957446808511","0.8","1.0","0.25","0.5555555555555556","0.5348837209302325","0.7","0.8","1.0","1.0","1.0","0.5","0.8571428571428571","0.417910447761194","0.9905660377358491","0.3333333333333333","0.7894736842105263","0.6666666666666666","0.3","1.0","0.0","1.0","0.6666666666666666","0.13725490196078433","0.7777777777777778","1.0","0.359375","1.0" diff --git a/llava_base_v2/llava_base_v2_AI2D_TEST.xlsx b/llava_base_v2/llava_base_v2_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..cfbc687c936b25918ec303d36cd2f56cff1ba78c Binary files /dev/null and b/llava_base_v2/llava_base_v2_AI2D_TEST.xlsx differ diff --git a/llava_base_v2/llava_base_v2_AI2D_TEST_acc.csv b/llava_base_v2/llava_base_v2_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..900ef9c0a1d4d935504e2569e696f885e0edae87 --- /dev/null +++ b/llava_base_v2/llava_base_v2_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5761010362694301","0.75","0.6451612903225806","0.5714285714285714","0.6600688468158348","0.4892086330935252","0.4296028880866426","0.5913757700205339","0.4807692307692308","0.4810126582278481","0.4626865671641791","0.5365853658536586","0.7777777777777778","0.5335276967930029","0.625","0.5" diff --git a/llava_base_v2/llava_base_v2_AI2D_TEST_openai_result.pkl b/llava_base_v2/llava_base_v2_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..294c74c6e3d6fedf01edbc5f5a27572c86713add --- /dev/null +++ b/llava_base_v2/llava_base_v2_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19f30bb2c7308b8dc6c9aa69174a7179fd4a487dd041325d78f5c3c2752b1826 +size 171966 diff --git a/llava_base_v2/llava_base_v2_AI2D_TEST_openai_result.xlsx b/llava_base_v2/llava_base_v2_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..78e083a23bbad254b69707e8cecd05ea2235b26e Binary files /dev/null and b/llava_base_v2/llava_base_v2_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_base_v2/llava_base_v2_HallusionBench.xlsx b/llava_base_v2/llava_base_v2_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7c343b724d0928b966e5aa35be12a0a067751b8e Binary files /dev/null and b/llava_base_v2/llava_base_v2_HallusionBench.xlsx differ diff --git a/llava_base_v2/llava_base_v2_HallusionBench_PREV.pkl b/llava_base_v2/llava_base_v2_HallusionBench_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..16a23dd611200a6dad85ba4a155b364a93c5a844 --- /dev/null +++ b/llava_base_v2/llava_base_v2_HallusionBench_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b93ac3fc083461d16975077f8f62141c24cc879641a88978408c9ce9160ed8 +size 118397 diff --git a/llava_base_v2/llava_base_v2_HallusionBench_auxmatch.xlsx b/llava_base_v2/llava_base_v2_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..edd3c3933b4bdd2a3a4999a0b11eb26aaa96ecb7 Binary files /dev/null and b/llava_base_v2/llava_base_v2_HallusionBench_auxmatch.xlsx differ diff --git a/llava_base_v2/llava_base_v2_HallusionBench_score.csv b/llava_base_v2/llava_base_v2_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..e6ad0422099d964177190914bfdd9b91ac173bf5 --- /dev/null +++ b/llava_base_v2/llava_base_v2_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","44.05888538380652","18.20809248554913","15.604395604395604" +"VS","36.388888888888886","7.758620689655173","8.98876404494382" +"VD","48.73096446700508","23.47826086956522","19.855595667870034" +"VS_table","38.392857142857146","3.571428571428571","6.976744186046512" +"VD_math","52.77777777777778","19.444444444444446","27.77777777777778" +"VD_illusion","54.861111111111114","20.967741935483872","15.277777777777779" +"VS_chart","29.230769230769234","2.5","10.526315789473683" +"VS_ocr","40.74074074074074","15.384615384615385","11.11111111111111" +"VD_ocr","57.30337078651685","39.53488372093023","30.23255813953488" +"VD_video","30.0","2.083333333333333","5.797101449275362" +"VD_figure","62.5","39.02439024390244","30.76923076923077" +"VS_map","43.75","13.636363636363635","6.25" diff --git a/llava_base_v2/llava_base_v2_MME.xlsx b/llava_base_v2/llava_base_v2_MME.xlsx index ef8228e8f41b91108e65ff2b79f63d12ac129b74..8363fde5f0374d0cce5002b995d0bacd89eaef6d 100644 Binary files a/llava_base_v2/llava_base_v2_MME.xlsx and b/llava_base_v2/llava_base_v2_MME.xlsx differ diff --git a/llava_base_v2/llava_base_v2_MME_PREV.pkl b/llava_base_v2/llava_base_v2_MME_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..982ae5a78ae33ee4b2e8acb111abd888320e3894 --- /dev/null +++ b/llava_base_v2/llava_base_v2_MME_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c7dc51edd95ab4497f6d1380c721c10f51a74018472654929654631bce42c7 +size 12783 diff --git a/llava_base_v2/llava_base_v2_MME_auxmatch.xlsx b/llava_base_v2/llava_base_v2_MME_auxmatch.xlsx index d6b378d85645082e20656c86d5235fa12a907f02..fbfb645b0c76eeebdb417bca482be4c3003a70a6 100644 Binary files a/llava_base_v2/llava_base_v2_MME_auxmatch.xlsx and b/llava_base_v2/llava_base_v2_MME_auxmatch.xlsx differ diff --git a/llava_base_v2/llava_base_v2_POPE.xlsx b/llava_base_v2/llava_base_v2_POPE.xlsx index 40f842ff525fd4c5b0f72964f4926bcb80dc17f5..79a955054329fa82a994465799345841de8e8f7e 100644 Binary files a/llava_base_v2/llava_base_v2_POPE.xlsx and b/llava_base_v2/llava_base_v2_POPE.xlsx differ diff --git a/llava_base_v2/llava_base_v2_POPE_PREV.pkl b/llava_base_v2/llava_base_v2_POPE_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..144adef38e4dc7976da5f3745285a56985cfbbcf --- /dev/null +++ b/llava_base_v2/llava_base_v2_POPE_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:724795fe00aebee612a9f76a41fa75e7e08ad66535e99ca47941cbf79b1da953 +size 25414 diff --git a/llava_base_v2/llava_base_v2_POPE_auxmatch.xlsx b/llava_base_v2/llava_base_v2_POPE_auxmatch.xlsx index 65b848be637bfc2e036d39cf228010e28db792ef..13d45e6972513cdba85c9884c075829b57322445 100644 Binary files a/llava_base_v2/llava_base_v2_POPE_auxmatch.xlsx and b/llava_base_v2/llava_base_v2_POPE_auxmatch.xlsx differ diff --git a/llava_base_v2/llava_base_v2_Q-Bench1_VAL.xlsx b/llava_base_v2/llava_base_v2_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bef4586d4b098a870bba8156ae2def9af3f35a4e Binary files /dev/null and b/llava_base_v2/llava_base_v2_Q-Bench1_VAL.xlsx differ diff --git a/llava_base_v2/llava_base_v2_Q-Bench1_VAL_acc.csv b/llava_base_v2/llava_base_v2_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..100cd1a04919a7b4df40992712e680fc8871b698 --- /dev/null +++ b/llava_base_v2/llava_base_v2_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.591304347826087","0.6117021276595744","0.5942857142857143","0.6495726495726496","0.7714285714285715","0.46","0.7053571428571429","0.64","0.7333333333333333","0.4375","0.5793103448275863","0.5057471264367817","0.611764705882353" diff --git a/llava_base_v2/llava_base_v2_Q-Bench1_VAL_openai_result.pkl b/llava_base_v2/llava_base_v2_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..171957904300e4e9bec7171be4c10d51b4ae0071 --- /dev/null +++ b/llava_base_v2/llava_base_v2_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4faa31f69a2c6db99e7009049d025ec6a4f2ff103cf4a2b7b26f12bd731609e +size 81407 diff --git a/llava_base_v2/llava_base_v2_Q-Bench1_VAL_openai_result.xlsx b/llava_base_v2/llava_base_v2_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ed58d9164db8fac223dfe313e98f915d9029d275 Binary files /dev/null and b/llava_base_v2/llava_base_v2_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_base_v2/llava_base_v2_RealWorldQA.xlsx b/llava_base_v2/llava_base_v2_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..140511b523ae6132f72e9a3555cc8205e706017b Binary files /dev/null and b/llava_base_v2/llava_base_v2_RealWorldQA.xlsx differ diff --git a/llava_base_v2/llava_base_v2_RealWorldQA_acc.csv b/llava_base_v2/llava_base_v2_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..aa6b76a07a3701d2ecb117bc17acb00fb92748f2 --- /dev/null +++ b/llava_base_v2/llava_base_v2_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4562091503267974" diff --git a/llava_base_v2/llava_base_v2_RealWorldQA_openai_result.pkl b/llava_base_v2/llava_base_v2_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..011ea909209425fa17c02c78561029c68a226ca3 --- /dev/null +++ b/llava_base_v2/llava_base_v2_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:047d51a3f927faf5ff232da433e57e952387b3340d84f14f1f6547b9e047e439 +size 50836 diff --git a/llava_base_v2/llava_base_v2_RealWorldQA_openai_result.xlsx b/llava_base_v2/llava_base_v2_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..edbfd70c274904cda3dac9a2f4124817456bc67d Binary files /dev/null and b/llava_base_v2/llava_base_v2_RealWorldQA_openai_result.xlsx differ diff --git a/llava_base_v2/llava_base_v2_SEEDBench_IMG.xlsx b/llava_base_v2/llava_base_v2_SEEDBench_IMG.xlsx index 9649c2c9697c07b56b28a98927fc69a7dbc83e5f..044a3fbaa4c163d0758d648cbd5a012c3db68258 100644 Binary files a/llava_base_v2/llava_base_v2_SEEDBench_IMG.xlsx and b/llava_base_v2/llava_base_v2_SEEDBench_IMG.xlsx differ diff --git a/llava_base_v2/llava_base_v2_SEEDBench_IMG_PREV.pkl b/llava_base_v2/llava_base_v2_SEEDBench_IMG_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9434f6ff2bb51dcf585227faf83fb70b7ae9dc00 --- /dev/null +++ b/llava_base_v2/llava_base_v2_SEEDBench_IMG_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c41670e3848710024df67f56cc187869234917ef08ce9fa640396721ffbcb587 +size 81727 diff --git a/llava_base_v2/llava_base_v2_SEEDBench_IMG_openai_result.xlsx b/llava_base_v2/llava_base_v2_SEEDBench_IMG_openai_result.xlsx index 23c504324c334b5b5f61f860cd6d1d9fcf59a457..2dc7cfe7a7e98f3b3c24846c1471b0fc1b48f485 100644 --- a/llava_base_v2/llava_base_v2_SEEDBench_IMG_openai_result.xlsx +++ b/llava_base_v2/llava_base_v2_SEEDBench_IMG_openai_result.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a1fc04a9084a0a9286ff0db0b50f97609316cb52d240b64c18401463689e9cc -size 1057192 +oid sha256:cc81d0c41913e4975de953d38e42aacac0ada1eda277b79e050a52eeb18d1bf3 +size 1057193 diff --git a/llava_base_v2/llava_base_v2_ScienceQA_VAL.xlsx b/llava_base_v2/llava_base_v2_ScienceQA_VAL.xlsx index d6ead0c5c8b92ef45fc26321c7dd47da46dd4185..c9f6b3f8aee010a2469f1fe6d697e21cd331a67b 100644 Binary files a/llava_base_v2/llava_base_v2_ScienceQA_VAL.xlsx and b/llava_base_v2/llava_base_v2_ScienceQA_VAL.xlsx differ diff --git a/llava_base_v2/llava_base_v2_ScienceQA_VAL_PREV.pkl b/llava_base_v2/llava_base_v2_ScienceQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4fd83b887e6cd3f7919230f0969ea0bcdbe521b2 --- /dev/null +++ b/llava_base_v2/llava_base_v2_ScienceQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:975ef0705a771fdd01049437b4c2e952dccf896be0a313c14587bb8a8ed9b821 +size 10371 diff --git a/llava_base_v2/llava_base_v2_ScienceQA_VAL_openai_result.xlsx b/llava_base_v2/llava_base_v2_ScienceQA_VAL_openai_result.xlsx index cd4c0bcaaa01bc7cd957ccb11c9422e3d99bef52..1df6c6d5ff6f40ae02ab7ef18d05d790f4a323c3 100644 Binary files a/llava_base_v2/llava_base_v2_ScienceQA_VAL_openai_result.xlsx and b/llava_base_v2/llava_base_v2_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_base_v2/llava_base_v2_TextVQA_VAL.xlsx b/llava_base_v2/llava_base_v2_TextVQA_VAL.xlsx index 7dec6284e500f502ffae6f753073d0f2273b8388..6c58a4dd3170d177797140019a40a8e2f3918601 100644 Binary files a/llava_base_v2/llava_base_v2_TextVQA_VAL.xlsx and b/llava_base_v2/llava_base_v2_TextVQA_VAL.xlsx differ diff --git a/llava_base_v2/llava_base_v2_TextVQA_VAL_PREV.pkl b/llava_base_v2/llava_base_v2_TextVQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8d55f4868a5f4b9e8b4b87559638b39e21350590 --- /dev/null +++ b/llava_base_v2/llava_base_v2_TextVQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ef136df5e39eb3cf6079f71ed2f070e3b76b879c4e1c0b8a1a21c67f6d4ef1 +size 49385 diff --git a/llava_llama/01_SEEDBench_IMG.pkl b/llava_llama/01_SEEDBench_IMG.pkl new file mode 100644 index 0000000000000000000000000000000000000000..750b7b596839ccc1a080784b473ee69f8af35f82 --- /dev/null +++ b/llava_llama/01_SEEDBench_IMG.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:679a3461d4a8588001e55cd957af83cf3ac04eea2a1c5cd19690491f06659cfd +size 229136 diff --git a/llava_llama/llava_llama_A-OKVQA.xlsx b/llava_llama/llava_llama_A-OKVQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..20d63bd82505b39a81356f09cbef8e2ac1fad2ab Binary files /dev/null and b/llava_llama/llava_llama_A-OKVQA.xlsx differ diff --git a/llava_llama/llava_llama_A-OKVQA_acc.csv b/llava_llama/llava_llama_A-OKVQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea5f02d5b9a2ec615b296440e4c225629b042cbf --- /dev/null +++ b/llava_llama/llava_llama_A-OKVQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","ALL" +"val","0.0","0.0" diff --git a/llava_llama/llava_llama_A-OKVQA_openai_result.pkl b/llava_llama/llava_llama_A-OKVQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fcdcf56d67c65d82f0562e2eef903a468063d9d0 --- /dev/null +++ b/llava_llama/llava_llama_A-OKVQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065741c96f599bed334550f640fdc8eb1bfb6d5ef4af728f57503d3cad928bd2 +size 151261 diff --git a/llava_llama/llava_llama_A-OKVQA_openai_result.xlsx b/llava_llama/llava_llama_A-OKVQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..642ecaab13e46fcae58dac593f2c52ee8cdc9890 Binary files /dev/null and b/llava_llama/llava_llama_A-OKVQA_openai_result.xlsx differ diff --git a/llava_llama/llava_llama_MME.xlsx b/llava_llama/llava_llama_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..348b868a278e86cf3afb2a0bfbf3553955b28099 Binary files /dev/null and b/llava_llama/llava_llama_MME.xlsx differ diff --git a/llava_llama/llava_llama_MME_auxmatch.xlsx b/llava_llama/llava_llama_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3787d3a384971c24f78e966eaea6f6b2ff1661f4 Binary files /dev/null and b/llava_llama/llava_llama_MME_auxmatch.xlsx differ diff --git a/llava_llama/llava_llama_MME_score.csv b/llava_llama/llava_llama_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..265c0316fb96c4a9af3ef2a60528c60bd6bfd291 --- /dev/null +++ b/llava_llama/llava_llama_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"395.2577030812325","142.85714285714286","0.0","75.5","27.352941176470587","40.0","25.0","57.85714285714286","90.0","10.0","13.5","25.0","0.0","61.90476190476191","92.0","20.0" diff --git a/llava_llama/llava_llama_POPE.xlsx b/llava_llama/llava_llama_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c5c53370ca0e7fdbb7a2e301beb500b63987d1c3 Binary files /dev/null and b/llava_llama/llava_llama_POPE.xlsx differ diff --git a/llava_llama/llava_llama_POPE_auxmatch.xlsx b/llava_llama/llava_llama_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..683cc9f738518a39bf575e7052b5cbda898503fe Binary files /dev/null and b/llava_llama/llava_llama_POPE_auxmatch.xlsx differ diff --git a/llava_llama/llava_llama_POPE_score.csv b/llava_llama/llava_llama_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..29bdeac4480b9f1060d3946452dc07ddf6e10cba --- /dev/null +++ b/llava_llama/llava_llama_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","60.60957910014514","28.055555555555557","87.36401673640167","46.400000000000006" +"random","62.115127175368144","26.833333333333332","93.92712550607287","46.400000000000006" +"popular","60.155574762316334","28.7","85.5036855036855","46.400000000000006" +"adversarial","59.61456102783727","28.633333333333333","83.35329341317366","46.400000000000006" diff --git a/llava_llama/llava_llama_Q-Bench1_VAL.xlsx b/llava_llama/llava_llama_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c5f9051d13ddd8b49cda0bfa7b8c7769c0473c0f Binary files /dev/null and b/llava_llama/llava_llama_Q-Bench1_VAL.xlsx differ diff --git a/llava_llama/llava_llama_Q-Bench1_VAL_acc.csv b/llava_llama/llava_llama_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..7a416c47f3993550826c146e48d8b44e359c8717 --- /dev/null +++ b/llava_llama/llava_llama_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0" diff --git a/llava_llama/llava_llama_Q-Bench1_VAL_openai_result.pkl b/llava_llama/llava_llama_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4721aa90f9d4a8d783b2c4d5f684b8fff702df15 --- /dev/null +++ b/llava_llama/llava_llama_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b4da67dcddfa9e4ac4d243d7a8eaf45a81163e7ca1dda11741eabdb544f9e5 +size 195215 diff --git a/llava_llama/llava_llama_Q-Bench1_VAL_openai_result.xlsx b/llava_llama/llava_llama_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f5aba38cd2b6c75565af2dc0dc1e673a357a9622 Binary files /dev/null and b/llava_llama/llava_llama_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_llama/llava_llama_ScienceQA_VAL.xlsx b/llava_llama/llava_llama_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9ccacb69a28a5f651a3a3c9c25d8c69c9869b5f1 Binary files /dev/null and b/llava_llama/llava_llama_ScienceQA_VAL.xlsx differ diff --git a/llava_llama/llava_llama_ScienceQA_VAL_acc.csv b/llava_llama/llava_llama_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..0d47d29ed5e0d4f697beb43590a68a51bba2182f --- /dev/null +++ b/llava_llama/llava_llama_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.0004768717215069146","0.0","0.0","0.0","1.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0" diff --git a/llava_llama/llava_llama_ScienceQA_VAL_openai_result.pkl b/llava_llama/llava_llama_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a92d681efc53d12a5eaa4a95147eedaddd4415a2 --- /dev/null +++ b/llava_llama/llava_llama_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2817af7803724e80ebd6484a48433ba27cc664a7c26cfde4e0acfab611d3707b +size 276793 diff --git a/llava_llama/llava_llama_ScienceQA_VAL_openai_result.xlsx b/llava_llama/llava_llama_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d469ba83f59752f2327a2668e1d13eca32d09950 Binary files /dev/null and b/llava_llama/llava_llama_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_llama/llava_llama_TextVQA_VAL.xlsx b/llava_llama/llava_llama_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..292a0eb53dfce9874103b76dd998c6ad49cd52b7 Binary files /dev/null and b/llava_llama/llava_llama_TextVQA_VAL.xlsx differ diff --git a/llava_llama/llava_llama_TextVQA_VAL_acc.csv b/llava_llama/llava_llama_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..0c618d11218dd88e8d5709231740b0abc2ad0980 --- /dev/null +++ b/llava_llama/llava_llama_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"13.294" diff --git a/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_HallusionBench_score-checkpoint.csv b/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..77e4895ce3b4b2ae59f0e0c7296641cb946dbb4b --- /dev/null +++ b/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","42.586750788643535","14.450867052023122","15.824175824175823" +"VS","40.27777777777778","6.0344827586206895","14.04494382022472" +"VD","43.9932318104907","18.695652173913043","16.967509025270758" +"VD_math","42.592592592592595","5.555555555555555","14.814814814814813" +"VS_ocr","50.0","11.538461538461538","18.51851851851852" +"VD_ocr","57.30337078651685","32.55813953488372","34.883720930232556" +"VD_illusion","55.55555555555556","22.58064516129032","16.666666666666664" +"VS_table","37.5","3.571428571428571","6.976744186046512" +"VS_map","48.4375","9.090909090909092","9.375" +"VD_figure","57.49999999999999","29.268292682926827","23.076923076923077" +"VD_video","21.764705882352942","2.083333333333333","4.3478260869565215" +"VS_chart","34.61538461538461","2.5","18.421052631578945" diff --git a/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_MME_score-checkpoint.csv b/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..6adf3b5b27079fc56019185d18cb265c7c52d0f5 --- /dev/null +++ b/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1280.2602040816328","259.2857142857143","115.0","106.0","80.0","57.5","180.0","104.28571428571428","140.0","185.0","66.75","47.5","123.33333333333333","142.1768707482993","142.0","50.0" diff --git a/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_RealWorldQA_acc-checkpoint.csv b/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..c1dcc905c8bb23b361f07ce9026e9cadab27736a --- /dev/null +++ b/llava_moe_e4t2/.ipynb_checkpoints/llava_moe_e4t2_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4823529411764706" diff --git a/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST.xlsx b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7e34681e5b05f36966e5f29ccbbfc55e50ec6d54 Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_acc.csv b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..5e68e9ab46a810ef9cab6c7c86cc5575471c27e5 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5705958549222798","0.625","0.5806451612903226","0.6428571428571429","0.6592082616179001","0.4988009592326139","0.4368231046931408","0.5872689938398358","0.5961538461538461","0.4810126582278481","0.417910447761194","0.5853658536585366","0.7222222222222222","0.4868804664723032","0.375","0.45454545454545453" diff --git a/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_openai_result.pkl b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..85682f624d8101f513c5c3ca6babd1f15dadce29 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68c614754661c5428adfde3556889f76eb29fc9c2705c51a8b9cff1b6042aa46 +size 169024 diff --git a/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_openai_result.xlsx b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..456febea73ee14f4ada7fcea0a454251564ca1b4 Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_HallusionBench.xlsx b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d8148e93a9e0af759f19770e212fe6aa4badc1ec Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_PREV.pkl b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cdf802ec011e896c69133f3c00bc5aadd7999f3b --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d0a174fceac770d9dbd60508c59662dc27492f43ec6e0979308ba51b957df3c +size 130496 diff --git a/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_auxmatch.xlsx b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d4aaf523803e66c2876933f18ab5129b19edd694 Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_auxmatch.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_score.csv b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..a2c6623f5fba38cf5248d1ba450f1702756ec37c --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","42.586750788643535","14.450867052023122","15.824175824175823" +"VD","43.9932318104907","18.695652173913043","16.967509025270758" +"VS","40.27777777777778","6.0344827586206895","14.04494382022472" +"VD_figure","57.49999999999999","29.268292682926827","23.076923076923077" +"VD_ocr","57.30337078651685","32.55813953488372","34.883720930232556" +"VS_ocr","50.0","11.538461538461538","18.51851851851852" +"VS_table","37.5","3.571428571428571","6.976744186046512" +"VD_video","21.764705882352942","2.083333333333333","4.3478260869565215" +"VS_map","48.4375","9.090909090909092","9.375" +"VD_math","42.592592592592595","5.555555555555555","14.814814814814813" +"VD_illusion","55.55555555555556","22.58064516129032","16.666666666666664" +"VS_chart","34.61538461538461","2.5","18.421052631578945" diff --git a/llava_moe_e4t2/llava_moe_e4t2_MME.xlsx b/llava_moe_e4t2/llava_moe_e4t2_MME.xlsx index 745523f718cb28144af5c451f6bc92690640e91c..ad923967d3ed3ef1b81fa2ae0666325a0daa59e1 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_MME.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_MME.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_MME_PREV.pkl b/llava_moe_e4t2/llava_moe_e4t2_MME_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..21450c0a5f0c9b9c7ca200e1911fb46faf50a021 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_MME_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b09b44989cd10d9fc81e3b78f1920457cc5dd42cb3192c0438802009ddbd18 +size 15541 diff --git a/llava_moe_e4t2/llava_moe_e4t2_MME_auxmatch.xlsx b/llava_moe_e4t2/llava_moe_e4t2_MME_auxmatch.xlsx index 2e558ccbe97de2552c40a68982e91a6b8931b9d2..4cad6e6412db7794a2e750b6a6e3e9ba64d2b117 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_MME_auxmatch.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_MME_auxmatch.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_POPE.xlsx b/llava_moe_e4t2/llava_moe_e4t2_POPE.xlsx index fa9e383b21080cd2605bb3c6c70edbc3336e8847..23ce6bf0e9fe2164e699ead5cff20e6589cc8d68 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_POPE.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_POPE.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_POPE_PREV.pkl b/llava_moe_e4t2/llava_moe_e4t2_POPE_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1adcf75fab608e531eb18001cef241bc5d8099e6 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_POPE_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495c6481df432b13a12050f62076be07b282f1041f734b4064dd20592bfe924c +size 25412 diff --git a/llava_moe_e4t2/llava_moe_e4t2_POPE_auxmatch.xlsx b/llava_moe_e4t2/llava_moe_e4t2_POPE_auxmatch.xlsx index da1e48419e2afa41008e98619f4f92b905ea0ae9..8cb93bc52885d75e9b2a783908f6bf771686d9cc 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_POPE_auxmatch.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_POPE_auxmatch.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL.xlsx b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5ec3d92160b205d59bdcdcde6c33784cdb6d0f31 Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_acc.csv b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..459469cc910968782cf57de8f3caaf06eb52a1c8 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5919732441471572","0.5957446808510638","0.64","0.6324786324786325","0.8","0.42","0.75","0.62","0.7111111111111111","0.45454545454545453","0.5793103448275863","0.4942528735632184","0.6" diff --git a/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_openai_result.pkl b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..16a81ac48aa0d55247f433067675e957c9a31fcd --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c3e53cc0c59b14d50a5da1e27e972ee5a42e427e1766d8f7e19f43b121f6692 +size 79379 diff --git a/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_openai_result.xlsx b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ed7f0fb4fd9a3cb5648cefda0cc670ca82af2d3b Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA.xlsx b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8128bb9863496d0e89c9157a70d3a89f3e14a0d6 Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_PREV.pkl b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3814c4b6060b49406791c5d5928eb317408e36dd --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8491d4ed78801ad9c7b818df109640d582ed36818ad0d84cd52d3aef85f3432 +size 3679 diff --git a/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_acc.csv b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..c1dcc905c8bb23b361f07ce9026e9cadab27736a --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4823529411764706" diff --git a/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_openai_result.pkl b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6d3ffe87638440c6e8f1e1bcdf09a858e070c3f6 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f1e112745e0c20a7b63f76948c8d60eb3961495f13c63f3c44f60ad989cf70 +size 48165 diff --git a/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_openai_result.xlsx b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..950e4a181a6eb0b88a60c83ae8493693b1e7d0d7 Binary files /dev/null and b/llava_moe_e4t2/llava_moe_e4t2_RealWorldQA_openai_result.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG.xlsx b/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG.xlsx index 75458ea2fb02aa554cb210b04882ee78a2ec86e7..d7dc4586ad377103137c5d427c9b1d4c1064f6da 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_PREV.pkl b/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6b10ebdb5c90f6bf7eee2d68c940653e8413e9dc --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f6312381d3da3da54d2a57cf951bac76abfbce6f4b097b753aa6b0dbdea8e8c +size 81720 diff --git a/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_openai_result.xlsx b/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_openai_result.xlsx index 77036ae393a9ebbcf4f73b84398d233b0fc2a4c1..0578e10a349eb107a60a50bb40fae5333dfd7944 100644 --- a/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_openai_result.xlsx +++ b/llava_moe_e4t2/llava_moe_e4t2_SEEDBench_IMG_openai_result.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9927917613e699b95f8173dad936f07c264a222b38a4e61585cd5f73236e0070 -size 1057032 +oid sha256:ce828d3d82bcb37a8562ceca33e3347fe0a590f1d43c00d8b4d2a7c1b73111fd +size 1057031 diff --git a/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL.xlsx b/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL.xlsx index f7b73f21f4d7c1c7c1fac6bd6278455e302b12e4..1c1fd9cebf5e1e192a4d1891d39c045fe476ce2d 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL_PREV.pkl b/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..712665d4e60597b539d95f7ceeb7f781d6d4fd82 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b8b4ada46a1f756eaba836a701a374f09420187f2c0f52bd3ab6ffff86ef157 +size 10348 diff --git a/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL_openai_result.xlsx b/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL_openai_result.xlsx index 8572c3c204ad3f116ad0515bfef2e2aaf63fb793..5e99093b7f8b4abc25fc24813ab97349868cc8cd 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL_openai_result.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_TextVQA_VAL.xlsx b/llava_moe_e4t2/llava_moe_e4t2_TextVQA_VAL.xlsx index a7690968b84dea20342adc78f5e6d8e04c9d1fd2..22da836c131c1522e51b6a35180ae576c58206ef 100644 Binary files a/llava_moe_e4t2/llava_moe_e4t2_TextVQA_VAL.xlsx and b/llava_moe_e4t2/llava_moe_e4t2_TextVQA_VAL.xlsx differ diff --git a/llava_moe_e4t2/llava_moe_e4t2_TextVQA_VAL_PREV.pkl b/llava_moe_e4t2/llava_moe_e4t2_TextVQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fe9846a24eefd5c53e88419f4f6c2dd611015910 --- /dev/null +++ b/llava_moe_e4t2/llava_moe_e4t2_TextVQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f368c275646a7bac34232b86e167009987545495bf00519c9ee797c4259f62af +size 49054 diff --git a/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_HallusionBench_score-checkpoint.csv b/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..841f107b12ec6fa78d4b509dddd467a5c1779be8 --- /dev/null +++ b/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","42.90220820189275","18.786127167630056","16.483516483516482" +"VD","46.7005076142132","25.65217391304348","19.494584837545126" +"VS","36.666666666666664","5.172413793103448","11.797752808988763" +"VD_figure","56.25","31.70731707317073","23.076923076923077" +"VD_ocr","60.67415730337079","48.837209302325576","39.53488372093023" +"VS_table","34.82142857142857","0.0","9.30232558139535" +"VD_video","24.705882352941178","2.083333333333333","1.4492753623188406" +"VD_illusion","54.861111111111114","27.419354838709676","16.666666666666664" +"VS_chart","30.76923076923077","2.5","14.473684210526317" +"VS_ocr","40.74074074074074","11.538461538461538","11.11111111111111" +"VS_map","48.4375","9.090909090909092","9.375" +"VD_math","51.85185185185185","19.444444444444446","27.77777777777778" diff --git a/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_MME_score-checkpoint.csv b/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..035525e9361b31f3d6bac42bd1d797119959230e --- /dev/null +++ b/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1251.123949579832","266.42857142857144","110.0","107.25","75.58823529411765","62.5","160.0","106.42857142857143","130.0","190.0","59.75","47.5","138.33333333333331","130.95238095238096","149.25","50.0" diff --git a/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_SEEDBench_IMG_acc-checkpoint.csv b/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..68c4448c620bc3cdbfc6bafca54fc9b162be8cdd --- /dev/null +++ b/llava_moe_e5t3/.ipynb_checkpoints/llava_moe_e5t3_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.6068015739179314","0.6665949666594967","0.685417804478427","0.7938144329896907","0.6063394683026585","0.2676747037188394","0.7381253958201394","0.5190258751902588","0.35714285714285715","0.770392749244713" diff --git a/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST.xlsx b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5103125975f22e891976506ec0cd333d53cdeefb Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_acc.csv b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..97a1be0e85d016ff630b44eaf089ce39b561e29d --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5744818652849741","0.75","0.6129032258064516","0.5357142857142857","0.657487091222031","0.4748201438848921","0.45126353790613716","0.5975359342915811","0.5769230769230769","0.5063291139240507","0.4626865671641791","0.5853658536585366","0.75","0.5102040816326531","0.5625","0.45454545454545453" diff --git a/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_openai_result.pkl b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..bfbed5f8ccb923c741341cf8253885597da3f625 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0fa4e20fe58f9a3747d848c6242e06ad4bdb46848c2e65c9a9bcff216623ac9 +size 171432 diff --git a/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_openai_result.xlsx b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a94290cd71da174b712b5802d15ff3e838a63bfc Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_HallusionBench.xlsx b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..55f07716b8e3a490b4839db2252860b1c2edf676 Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_PREV.pkl b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..58a7435e426fb503e52fcfd2ca4d4c0536d9efd1 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946cbe725331084b907b75307742bd0b031ea5f8e37f5cfeb67a52dc274e8fae +size 124198 diff --git a/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_auxmatch.xlsx b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..337800dad4d12bfc44459322d29160870a15140f Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_auxmatch.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_score.csv b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..fc6acc464ff476e147ef7f845f9b136a2ae92f24 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","42.90220820189275","18.786127167630056","16.483516483516482" +"VS","36.666666666666664","5.172413793103448","11.797752808988763" +"VD","46.7005076142132","25.65217391304348","19.494584837545126" +"VD_math","51.85185185185185","19.444444444444446","27.77777777777778" +"VS_table","34.82142857142857","0.0","9.30232558139535" +"VS_chart","30.76923076923077","2.5","14.473684210526317" +"VD_illusion","54.861111111111114","27.419354838709676","16.666666666666664" +"VS_map","48.4375","9.090909090909092","9.375" +"VD_video","24.705882352941178","2.083333333333333","1.4492753623188406" +"VD_ocr","60.67415730337079","48.837209302325576","39.53488372093023" +"VD_figure","56.25","31.70731707317073","23.076923076923077" +"VS_ocr","40.74074074074074","11.538461538461538","11.11111111111111" diff --git a/llava_moe_e5t3/llava_moe_e5t3_MME.xlsx b/llava_moe_e5t3/llava_moe_e5t3_MME.xlsx index 5ac360345806e930d20530c11f479453241a64a6..6597171a42d2a287b457f83a2e2a7cddca324714 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_MME.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_MME.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_MME_PREV.pkl b/llava_moe_e5t3/llava_moe_e5t3_MME_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8fb05cd0e4c2e603b030a5237d84826008003033 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_MME_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c975da862282297869e685faad16fb79e4777c8bb84863be48a09245417c03f +size 15961 diff --git a/llava_moe_e5t3/llava_moe_e5t3_MME_auxmatch.xlsx b/llava_moe_e5t3/llava_moe_e5t3_MME_auxmatch.xlsx index ce4b982e0d7f7f60f72cda7136d69249dcca9971..c67254500f8ba764ddaa7c03b8264ce8bf103ba5 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_MME_auxmatch.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_MME_auxmatch.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_POPE.xlsx b/llava_moe_e5t3/llava_moe_e5t3_POPE.xlsx index 0eacb72282218b3abb740fa6678735409d94a861..504efb96883bb1c49d97fd5e5a22392e9a3f7a1d 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_POPE.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_POPE.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_POPE_PREV.pkl b/llava_moe_e5t3/llava_moe_e5t3_POPE_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9c5979522a90ffb978552fce120ad7a32e5bf80e --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_POPE_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f6859372df1dd06b2590679904ba8c80ed9d6652c1f57e9626ee2158d1c7b9 +size 25412 diff --git a/llava_moe_e5t3/llava_moe_e5t3_POPE_auxmatch.xlsx b/llava_moe_e5t3/llava_moe_e5t3_POPE_auxmatch.xlsx index 8382424890fcbbfebcd85ce5b00f97a8a49867b8..bb3318e4b00db9757ec6a26fa4bed152c7fac67b 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_POPE_auxmatch.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_POPE_auxmatch.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_POPE_score.csv b/llava_moe_e5t3/llava_moe_e5t3_POPE_score.csv index fc13d26526aca84fad53bc6102e0bdc2e4b7a7a0..657d9295c96b77e06fd4bc051fa2ada16f51601d 100644 --- a/llava_moe_e5t3/llava_moe_e5t3_POPE_score.csv +++ b/llava_moe_e5t3/llava_moe_e5t3_POPE_score.csv @@ -1,5 +1,5 @@ "split","Overall","acc","precision","recall" "Overall","78.32000000000001","81.93333333333334","97.89999999999999","65.26666666666667" -"random","78.76106194690264","82.39999999999999","99.29006085192698","65.26666666666667" "adversarial","77.76012708498808","81.33333333333333","96.16895874263261","65.26666666666667" "popular","78.44551282051283","82.06666666666666","98.29317269076306","65.26666666666667" +"random","78.76106194690264","82.39999999999999","99.29006085192698","65.26666666666667" diff --git a/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL.xlsx b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1b45357104b1007ced3d0173d81d37e8caa9b9c9 Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_acc.csv b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..a5580449b1491d1e5ccd494ec05b07b6f96ee4fa --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5752508361204013","0.601063829787234","0.6057142857142858","0.6410256410256411","0.7285714285714285","0.4","0.7142857142857143","0.59","0.6888888888888889","0.4602272727272727","0.5793103448275863","0.47126436781609193","0.5647058823529412" diff --git a/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_openai_result.pkl b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b57e68da7e2986600134b2c00fc49e9c576a3e39 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67db33aa56f7ef24abaf79288ffb0d1a045e6bd3b6924928efe5cbfefad1d7d4 +size 82031 diff --git a/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_openai_result.xlsx b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..892c26be6af0e07434bb1a2558412a739b7faec5 Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA.xlsx b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6f8ef2050f626b863b24f2c3d76f7889539e27ba Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_acc.csv b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..5d134b81fc8c92447bda994ab9222bc924f82add --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.47581699346405226" diff --git a/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_openai_result.pkl b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..dd7ce47d62c88f9396ae96d759e0e264eec2bdf2 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d7357e7d7821161b8e86f5defc11f07c207d3640295f075747e8abc32ab9b7a +size 49998 diff --git a/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_openai_result.xlsx b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b1b0ac4b80a5da0c9d7783b3f64bf2fe4e166412 Binary files /dev/null and b/llava_moe_e5t3/llava_moe_e5t3_RealWorldQA_openai_result.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG.xlsx b/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG.xlsx index 341b083ab001097d488f13ba23edfaf12da4553c..ba32a73b5704b1bef2c8a4623b3f6232c47cc578 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_PREV.pkl b/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8035b4f5c53ca506a91fbffca7e2ca0306d74e44 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7d08e07c2d3dc86c0be87960b86a4d364804acbbc739ec11fd1f19180d2410 +size 81737 diff --git a/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_openai_result.xlsx b/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_openai_result.xlsx index 17b7af501102c92c01b310ef253a913b4c8b84d4..19f3f054813c8291c7727284fb86146299d4cf56 100644 --- a/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_openai_result.xlsx +++ b/llava_moe_e5t3/llava_moe_e5t3_SEEDBench_IMG_openai_result.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:568a01029fe9f72c3802ae9a36e01520fdc87f0089e93bc5a9e444ddc7495834 +oid sha256:aaf06cbd0f2d25c6a6f9793f7393bd10e615033aeb7563f96ff5bf6c1cf1cc6b size 1057146 diff --git a/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL.xlsx b/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL.xlsx index e880d4b57f8b2aef531d4a092b046987c3ca6026..078f9d949b0af6c58851b04ddc4d1ca142cfb51b 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL_PREV.pkl b/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e5301afda538d95872f8489578bfae6d0bfc9507 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a31c031a812d712a88cbc0b085341ba6bba7741863a24778eaa8b4569a9aa53 +size 10369 diff --git a/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL_openai_result.xlsx b/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL_openai_result.xlsx index 041d6b97dd3890d7b7c18c273511a54ac2bf9066..84cc7d2078e59b4d0adc4eeb4d4260dd060b3103 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL_openai_result.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_TextVQA_VAL.xlsx b/llava_moe_e5t3/llava_moe_e5t3_TextVQA_VAL.xlsx index ffaf8db61e270a16ba5d1c50f174466fc615c275..416dd83cd269bde13b3fb1768c15d1b508a92802 100644 Binary files a/llava_moe_e5t3/llava_moe_e5t3_TextVQA_VAL.xlsx and b/llava_moe_e5t3/llava_moe_e5t3_TextVQA_VAL.xlsx differ diff --git a/llava_moe_e5t3/llava_moe_e5t3_TextVQA_VAL_PREV.pkl b/llava_moe_e5t3/llava_moe_e5t3_TextVQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1e1833806c05c5d6ec7621778dd6ebdaea4d9e57 --- /dev/null +++ b/llava_moe_e5t3/llava_moe_e5t3_TextVQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bcb119c5c640234ed599b34e35dd8019b31d5625ba5743abc61c5c3d40ee5de +size 47926 diff --git a/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_HallusionBench_score-checkpoint.csv b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..8217d68196a4bcf5736330d7aeb410d45559c403 --- /dev/null +++ b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","42.27129337539432","17.341040462427745","15.384615384615385" +"VS","37.5","4.310344827586207","11.797752808988763" +"VD","45.17766497461929","23.91304347826087","17.689530685920577" +"VS_chart","33.84615384615385","0.0","15.789473684210526" +"VS_table","34.82142857142857","0.0","6.976744186046512" +"VD_illusion","54.166666666666664","29.03225806451613","18.055555555555554" +"VD_video","27.058823529411764","2.083333333333333","1.4492753623188406" +"VS_map","46.875","9.090909090909092","9.375" +"VD_math","38.88888888888889","2.7777777777777777","12.962962962962962" +"VS_ocr","40.74074074074074","11.538461538461538","11.11111111111111" +"VD_figure","60.0","36.58536585365854","28.205128205128204" +"VD_ocr","59.55056179775281","46.51162790697674","39.53488372093023" diff --git a/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_MME_score-checkpoint.csv b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..0b649a63751f9b0010518d2e03ab18fcbc52a986 --- /dev/null +++ b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1287.8229291716686","243.21428571428572","132.5","113.5","90.58823529411764","55.0","175.0","95.71428571428571","125.0","185.0","62.75","45.0","125.0","136.73469387755102","141.75","47.5" diff --git a/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_RealWorldQA_acc-checkpoint.csv b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..d9a7f9d90fe93530f392a908c84cfa520c22bc79 --- /dev/null +++ b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4980392156862745" diff --git a/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_SEEDBench_IMG_acc-checkpoint.csv b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f607b159176e59e976e3151c71b600e04992f6f9 --- /dev/null +++ b/llava_moe_e8t2/.ipynb_checkpoints/llava_moe_e8t2_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.6108066329398538","0.6700365670036567","0.6963407973784818","0.7010309278350515","0.6278118609406953","0.2676747037188394","0.7412919569347688","0.5251141552511416","0.32142857142857145","0.7643504531722054" diff --git a/llava_moe_e8t2/01_MMMU_DEV_VAL.pkl b/llava_moe_e8t2/01_MMMU_DEV_VAL.pkl new file mode 100644 index 0000000000000000000000000000000000000000..051347284d8ab84245949bb58d85f577ace8ab32 --- /dev/null +++ b/llava_moe_e8t2/01_MMMU_DEV_VAL.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be6543bc5ac485bf3d6a5bea79fd8335797087e7b12788c6fdf9aab2ca118a4 +size 1455 diff --git a/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST.xlsx b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f09434c13d3be41939af81796f76537b8eec3575 Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_acc.csv b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..e03b1d2dba7c6293bf0bc7fab35af9ca01dc04f4 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5761010362694301","0.625","0.6451612903225806","0.6428571428571429","0.6497418244406197","0.48201438848920863","0.4693140794223827","0.6057494866529775","0.5576923076923077","0.5189873417721519","0.43283582089552236","0.5609756097560976","0.6944444444444444","0.521865889212828","0.5625","0.45454545454545453" diff --git a/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_openai_result.pkl b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6bcb822a8ba59d185db2784ba72d50e3db9bb3b6 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b192780b1bae24bcd6f9f2c508cd5c1dccb88a4561087991eecbf1aaa3d66a +size 170089 diff --git a/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_openai_result.xlsx b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5e1b24354c6e907d02c88f93b05eb690b129ea4c Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_HallusionBench.xlsx b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e525fa907e57bfcb58c1ccbd5b04fa33de1ce0eb Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_PREV.pkl b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0ff105a1c56c30e2115e61609388d58bee44e77a --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347c8a3311040e1e1eda59287306bf313af791a6ce87e813c2587ffe0221a81e +size 118586 diff --git a/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_auxmatch.xlsx b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..daaa2a4289d9add6b819c7a1ec506e2de0403b77 Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_auxmatch.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_score.csv b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..a689b8b28f85b04fa8ea830d507e7ad10083243c --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","42.27129337539432","17.341040462427745","15.384615384615385" +"VD","45.17766497461929","23.91304347826087","17.689530685920577" +"VS","37.5","4.310344827586207","11.797752808988763" +"VS_chart","33.84615384615385","0.0","15.789473684210526" +"VS_ocr","40.74074074074074","11.538461538461538","11.11111111111111" +"VD_ocr","59.55056179775281","46.51162790697674","39.53488372093023" +"VD_figure","60.0","36.58536585365854","28.205128205128204" +"VS_map","46.875","9.090909090909092","9.375" +"VD_video","27.058823529411764","2.083333333333333","1.4492753623188406" +"VD_math","38.88888888888889","2.7777777777777777","12.962962962962962" +"VD_illusion","54.166666666666664","29.03225806451613","18.055555555555554" +"VS_table","34.82142857142857","0.0","6.976744186046512" diff --git a/llava_moe_e8t2/llava_moe_e8t2_MME.xlsx b/llava_moe_e8t2/llava_moe_e8t2_MME.xlsx index 65d594cbdf0fc333c75267c997de70c2864fd1d3..bf4d3a13d437e243cf9b01fa3edcd3ffc2dcd55b 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_MME.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_MME.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_MME_PREV.pkl b/llava_moe_e8t2/llava_moe_e8t2_MME_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9ccb2bd2575c99eaa4ab6b78637d88c88a141122 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_MME_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:136f494d1cc99d1d632e5f8aa0f4e23baf2d641a76d787f1e8d92709755a8e2b +size 13593 diff --git a/llava_moe_e8t2/llava_moe_e8t2_MME_auxmatch.xlsx b/llava_moe_e8t2/llava_moe_e8t2_MME_auxmatch.xlsx index f627775aacbfb90d9d09cb1fd8c503d1a1b6b293..4f1db029cbd597e6c759f0882ee870ad2c67c0fd 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_MME_auxmatch.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_MME_auxmatch.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_MathVista_MINI.xlsx b/llava_moe_e8t2/llava_moe_e8t2_MathVista_MINI.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..debd8d439d1df72d3c1ffc4382ab4477b9332334 Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_MathVista_MINI.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_POPE.xlsx b/llava_moe_e8t2/llava_moe_e8t2_POPE.xlsx index 70ad0f4367f06a8d37e6d5c5a7dca4373a9e0c03..7bec37e9dfc8dc4ff3ee32cb2be4d96be3d3bf5a 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_POPE.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_POPE.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_POPE_PREV.pkl b/llava_moe_e8t2/llava_moe_e8t2_POPE_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..df1cd52c2c7c8f11031b6b8a5cdf0a3521e2fa5a --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_POPE_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefd50866b7ba46f0499b0fa6f89c2d0319bd6dbfa7242b24468ac026e160753 +size 25412 diff --git a/llava_moe_e8t2/llava_moe_e8t2_POPE_auxmatch.xlsx b/llava_moe_e8t2/llava_moe_e8t2_POPE_auxmatch.xlsx index 0d095c9e25dd07ca96725bb8ce0a1b7395d44a91..1e474ac633967b5f16543a1d03c0f5aea8b82eab 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_POPE_auxmatch.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_POPE_auxmatch.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_POPE_score.csv b/llava_moe_e8t2/llava_moe_e8t2_POPE_score.csv index 514db4422e7674210e07083e1fb9066ad0218bdb..6069570d3c60a0c12fcead959268351db085b00f 100644 --- a/llava_moe_e8t2/llava_moe_e8t2_POPE_score.csv +++ b/llava_moe_e8t2/llava_moe_e8t2_POPE_score.csv @@ -1,5 +1,5 @@ "split","Overall","acc","precision","recall" "Overall","78.00963081861958","81.73333333333333","97.98387096774194","64.8" -"popular","78.04094741067844","81.76666666666667","98.0827447023209","64.8" "random","78.48203471941866","82.23333333333333","99.48822927328557","64.8" +"popular","78.04094741067844","81.76666666666667","98.0827447023209","64.8" "adversarial","77.51196172248804","81.2","96.42857142857143","64.8" diff --git a/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL.xlsx b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..358b2c72c84d4ac4911889c933f3a2a132697b55 Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_acc.csv b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..31edf951c65a9276ae74550290199741dba507f1 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5826086956521739","0.6063829787234043","0.6114285714285714","0.6068376068376068","0.7285714285714285","0.4","0.7321428571428571","0.65","0.7","0.44886363636363635","0.5793103448275863","0.5172413793103449","0.5882352941176471" diff --git a/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_openai_result.pkl b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9cfa62da698e491ff23e478e572d50cae70a72c1 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be9ea5b589855ba2efa987abbc3067d904b71713c41276d29c3d3806c2d42a3 +size 79532 diff --git a/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_openai_result.xlsx b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c36a5913cbc9014fd132fdb14022350e4fb71859 Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA.xlsx b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..65647cecc25357626ea1f37f416ce11ef90aac37 Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_PREV.pkl b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ffd91c9e19237731aef730faa420f257247c317a --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4403e7638dc9c5010308d96682dd8c725faf9ac6869737d5669214807648a5f6 +size 3735 diff --git a/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_acc.csv b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..d9a7f9d90fe93530f392a908c84cfa520c22bc79 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4980392156862745" diff --git a/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_openai_result.pkl b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e95f3a0e39c7606c9126cb2b21f4586309d4279b --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee8ed65b19ac0605e3fd543c8894dd89d036bd94d0dbf34dc43ed0763214e68 +size 49442 diff --git a/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_openai_result.xlsx b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0a1886be035734afab8a017bb5784f797d16b95d Binary files /dev/null and b/llava_moe_e8t2/llava_moe_e8t2_RealWorldQA_openai_result.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG.xlsx b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG.xlsx index afbc0ac2fd9ed525b73675812b333db33f2943e1..1ae28ffb022fba71b0c021d5bf070ad7cfdd299e 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_PREV.pkl b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..74bd429b4402e3671b98ada817c2a9945dd11e89 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e0a6e741ddb51570f6b359a91c8425f1fafc757b4286f1dbb1fa5bd1235cea +size 81774 diff --git a/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_acc.csv b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..f607b159176e59e976e3151c71b600e04992f6f9 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.6108066329398538","0.6700365670036567","0.6963407973784818","0.7010309278350515","0.6278118609406953","0.2676747037188394","0.7412919569347688","0.5251141552511416","0.32142857142857145","0.7643504531722054" diff --git a/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.pkl b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.pkl index c2a743af9a8ae78ce50b2e8f6adfbd2dc265a981..d8846b3254e450c212c820f0803671e93b8a9077 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.pkl and b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.pkl differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.xlsx b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..762142a3bc52cd520897a39976ebdb47ad7d670a --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c0b2c05098eb41153b8d392f0539c9ec96fb9c47bdae99ce63455123714968 +size 1057137 diff --git a/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL.xlsx b/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL.xlsx index d3336d3f579854ba1aa8bf3ffb7b79ad792152fc..3fba933e8a8ade354301363c196850679f37ea06 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL_PREV.pkl b/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1dc8cc6ac0d71f66930bee66ae74d980f731035f --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18a3e62d40aef7b6f6c7965647f1ac5d1caf9e06378e7b08fcd4986430c05dd1 +size 10432 diff --git a/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL_openai_result.xlsx b/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL_openai_result.xlsx index f84f9d30c43fb6f42cc844335301a1ded7068c2c..bb5de91d36b60ae2f2806b3acf037e352979e7e1 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL_openai_result.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_TextVQA_VAL.xlsx b/llava_moe_e8t2/llava_moe_e8t2_TextVQA_VAL.xlsx index fc125fb56fe65fe53f9d08be85f4ebdf9b071000..371f74900f708d489c5cd21fdc92fc368d9c0871 100644 Binary files a/llava_moe_e8t2/llava_moe_e8t2_TextVQA_VAL.xlsx and b/llava_moe_e8t2/llava_moe_e8t2_TextVQA_VAL.xlsx differ diff --git a/llava_moe_e8t2/llava_moe_e8t2_TextVQA_VAL_PREV.pkl b/llava_moe_e8t2/llava_moe_e8t2_TextVQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b61a9e6a0161a50088e91ec76a95f6d380a078d7 --- /dev/null +++ b/llava_moe_e8t2/llava_moe_e8t2_TextVQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:605614f8047cf37ae47b1d4e5388040555ae1f656b461ccc085a37f766c25980 +size 49344 diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_AI2D_TEST_acc-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_AI2D_TEST_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..58afcbc0f15fa0975aa51cf1ca8e767734e047f1 --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_AI2D_TEST_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5531088082901554","0.375","0.5806451612903226","0.5714285714285714","0.6514629948364888","0.42685851318944845","0.4259927797833935","0.5708418891170431","0.46153846153846156","0.5189873417721519","0.43283582089552236","0.5609756097560976","0.6944444444444444","0.4839650145772595","0.625","0.5" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_HallusionBench_score-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..4e0c08d17812fa865d6d799475d0fdc30ce93b08 --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","43.4279705573081","11.271676300578035","8.571428571428571" +"VD","44.50084602368866","12.608695652173912","7.9422382671480145" +"VS","41.66666666666667","8.620689655172415","9.550561797752808" +"VD_math","31.48148148148148","8.333333333333332","5.555555555555555" +"VS_ocr","53.70370370370371","26.923076923076923","11.11111111111111" +"VD_figure","57.49999999999999","24.390243902439025","20.51282051282051" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_map","45.3125","4.545454545454546","3.125" +"VS_chart","37.69230769230769","5.0","15.789473684210526" +"VD_video","45.294117647058826","10.416666666666668","10.144927536231885" +"VD_illusion","41.66666666666667","12.903225806451612","0.0" +"VD_ocr","51.68539325842697","6.976744186046512","9.30232558139535" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_MME_score-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f618bdff28452fde4d1a00c8d84ebc17be318698 --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1105.7718087234894","258.57142857142856","70.0","104.75","85.88235294117648","65.0","65.0","83.57142857142857","125.0","190.0","138.0","60.0","53.33333333333333","115.30612244897961","158.5","50.0" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_POPE_score-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f41f697926e15511c1eebd18afc1bd0e65450938 --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","81.2338267260118","79.85555555555555","76.0317767874443","87.2" +"popular","83.0212630910822","82.16666666666667","79.22471229557844","87.2" +"random","85.04551365409623","84.66666666666667","82.99492385786802","87.2" +"adversarial","76.17938264414677","72.73333333333333","67.63185108583247","87.2" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_Q-Bench1_VAL_acc-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_Q-Bench1_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..71a35c1b4d10222b0632cdce4467f946a45d2815 --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_Q-Bench1_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5204013377926422","0.5106382978723404","0.6","0.5641025641025641","0.5285714285714286","0.41333333333333333","0.6607142857142857","0.5","0.7111111111111111","0.3352272727272727","0.5172413793103449","0.5172413793103449","0.5294117647058824" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_RealWorldQA_acc-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..d5ebd6e2627b7967c989d23c5d2ae1996fc7bb9b --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.46797385620915033" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_SEEDBench_IMG_acc-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..0482a9b72caabb77d08eefbe458a18abc1789afd --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5346402473299606","0.4876317487631749","0.5871108683779356","0.6288659793814433","0.45194274028629855","0.4421740907233347","0.6773274224192527","0.3866057838660578","0.6428571428571429","0.7099697885196374" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_ScienceQA_VAL_acc-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..c3805a27690b047a2b2d697864e2b2f084e09162 --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6528373867429661","0.8958333333333334","1.0","1.0","1.0","1.0","1.0","0.6304347826086957","0.6190476190476191","0.78125","1.0","0.25","0.9166666666666666","0.8297872340425532","1.0","0.47619047619047616","0.6","1.0","1.0","0.4","0.0","0.5","1.0","0.68","0.7611940298507462","0.9811320754716981","0.358974358974359","0.44","0.7058823529411765","0.0425531914893617","0.4225352112676056","1.0","1.0","1.0","0.39097744360902253","0.3225806451612903","0.8189655172413793","1.0","0.5","0.2553191489361702","0.7","0.5","0.4117647058823529","0.4444444444444444","0.5348837209302325","0.7","1.0","1.0","1.0","0.975609756097561","0.5","0.7380952380952381","0.582089552238806","0.8616352201257862","0.3333333333333333","0.8421052631578947","0.6666666666666666","0.2","1.0","0.0","1.0","0.3333333333333333","0.17647058823529413","1.0","1.0","0.4375","1.0" diff --git a/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_TextVQA_VAL_acc-checkpoint.csv b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..b9302897e2cd2cb8e312c8fe9673d517d5655b12 --- /dev/null +++ b/llava_phi_c01/.ipynb_checkpoints/llava_phi_c01_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"17.272000000000002" diff --git a/llava_phi_c01/01_MMMU_DEV_VAL.pkl b/llava_phi_c01/01_MMMU_DEV_VAL.pkl new file mode 100644 index 0000000000000000000000000000000000000000..20b33ed04f7b2d45483e49d3e10fd009799d843c --- /dev/null +++ b/llava_phi_c01/01_MMMU_DEV_VAL.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272f5e39f50963ad95f7cd9a9fbb63d66c32c4a02d82aa035bac25d59584e4c8 +size 2447 diff --git a/llava_phi_c01/llava_phi_c01_AI2D_TEST.xlsx b/llava_phi_c01/llava_phi_c01_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9f80ffa7a3358e3b6817849127e639c3dbddd62a Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_AI2D_TEST.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_AI2D_TEST_acc.csv b/llava_phi_c01/llava_phi_c01_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..58afcbc0f15fa0975aa51cf1ca8e767734e047f1 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5531088082901554","0.375","0.5806451612903226","0.5714285714285714","0.6514629948364888","0.42685851318944845","0.4259927797833935","0.5708418891170431","0.46153846153846156","0.5189873417721519","0.43283582089552236","0.5609756097560976","0.6944444444444444","0.4839650145772595","0.625","0.5" diff --git a/llava_phi_c01/llava_phi_c01_AI2D_TEST_openai_result.pkl b/llava_phi_c01/llava_phi_c01_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1719e84d35a69ed5f521440ad9c56829859d24bf --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:019dff496ae4a6aad5b394322dbc6b58b63cc5e88762607b0b5bbd702f20a108 +size 166998 diff --git a/llava_phi_c01/llava_phi_c01_AI2D_TEST_openai_result.xlsx b/llava_phi_c01/llava_phi_c01_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9a3aceabcf5902d7e6fdbd154f1cdd25ddb416cb Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_HallusionBench.xlsx b/llava_phi_c01/llava_phi_c01_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c18586069901be4cc794d6e8ac647097ff4f8281 Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_HallusionBench.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_HallusionBench_auxmatch.xlsx b/llava_phi_c01/llava_phi_c01_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..fe7f8353247615dd93ef2bdfdd40532dbfe3d1af Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_HallusionBench_auxmatch.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_HallusionBench_score.csv b/llava_phi_c01/llava_phi_c01_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..4e0c08d17812fa865d6d799475d0fdc30ce93b08 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","43.4279705573081","11.271676300578035","8.571428571428571" +"VD","44.50084602368866","12.608695652173912","7.9422382671480145" +"VS","41.66666666666667","8.620689655172415","9.550561797752808" +"VD_math","31.48148148148148","8.333333333333332","5.555555555555555" +"VS_ocr","53.70370370370371","26.923076923076923","11.11111111111111" +"VD_figure","57.49999999999999","24.390243902439025","20.51282051282051" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_map","45.3125","4.545454545454546","3.125" +"VS_chart","37.69230769230769","5.0","15.789473684210526" +"VD_video","45.294117647058826","10.416666666666668","10.144927536231885" +"VD_illusion","41.66666666666667","12.903225806451612","0.0" +"VD_ocr","51.68539325842697","6.976744186046512","9.30232558139535" diff --git a/llava_phi_c01/llava_phi_c01_MME.xlsx b/llava_phi_c01/llava_phi_c01_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..cb84228ab16c49e50c08322508ae97a3125f860d Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_MME.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_MME_auxmatch.xlsx b/llava_phi_c01/llava_phi_c01_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..119c5c057423d36f01afd2931fb855ad618fd0a3 Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_MME_auxmatch.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_MME_score.csv b/llava_phi_c01/llava_phi_c01_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..f618bdff28452fde4d1a00c8d84ebc17be318698 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1105.7718087234894","258.57142857142856","70.0","104.75","85.88235294117648","65.0","65.0","83.57142857142857","125.0","190.0","138.0","60.0","53.33333333333333","115.30612244897961","158.5","50.0" diff --git a/llava_phi_c01/llava_phi_c01_POPE.xlsx b/llava_phi_c01/llava_phi_c01_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..25a0ca0cd2656d2d161f5ad090bedb10d5b4fe8e Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_POPE.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_POPE_PREV.pkl b/llava_phi_c01/llava_phi_c01_POPE_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..982ad66f28a7a8c60e7075828a17f6bd62cf134c --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_POPE_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b9f3cea32cd91513f2797bb95dbe3623d78ff950ee46e6ade7f370316508f0 +size 25412 diff --git a/llava_phi_c01/llava_phi_c01_POPE_auxmatch.xlsx b/llava_phi_c01/llava_phi_c01_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..96a24d849f434c26cc3205a8304f5fb9ed24fbd5 Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_POPE_auxmatch.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_POPE_score.csv b/llava_phi_c01/llava_phi_c01_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..f41f697926e15511c1eebd18afc1bd0e65450938 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","81.2338267260118","79.85555555555555","76.0317767874443","87.2" +"popular","83.0212630910822","82.16666666666667","79.22471229557844","87.2" +"random","85.04551365409623","84.66666666666667","82.99492385786802","87.2" +"adversarial","76.17938264414677","72.73333333333333","67.63185108583247","87.2" diff --git a/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL.xlsx b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4409cc6607e1a8d4f03b2c51111303e585350028 Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_PREV.pkl b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..02262fd434286f9137609d4aaf59a2e4502a9359 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:630314fad6f503df4a047fe15573712ab9f46be216228afa81dba031bb1f9bbb +size 29267 diff --git a/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_acc.csv b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..71a35c1b4d10222b0632cdce4467f946a45d2815 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5204013377926422","0.5106382978723404","0.6","0.5641025641025641","0.5285714285714286","0.41333333333333333","0.6607142857142857","0.5","0.7111111111111111","0.3352272727272727","0.5172413793103449","0.5172413793103449","0.5294117647058824" diff --git a/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_openai_result.pkl b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..508797182086851922a0f9092a459478d057d178 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a13c37e2d8a9f84a38b264a3ea9d33eeb0fb0af77c4aacbf68afcbab315652 +size 78721 diff --git a/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_openai_result.xlsx b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2205b518f30ca7c0706061060a2a39504bd5c1cc Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_RealWorldQA.xlsx b/llava_phi_c01/llava_phi_c01_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ed5fca1f521252a014abe2ad721589701740943f Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_RealWorldQA.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_RealWorldQA_acc.csv b/llava_phi_c01/llava_phi_c01_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..d5ebd6e2627b7967c989d23c5d2ae1996fc7bb9b --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.46797385620915033" diff --git a/llava_phi_c01/llava_phi_c01_RealWorldQA_openai_result.pkl b/llava_phi_c01/llava_phi_c01_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4a3b43f87c3c0f1d11065be163fa7298370a36bc --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e71908f6a786ad47e815ab14c79ba11d4c90c8362eb57dea30ce7f2f38a0f60 +size 41443 diff --git a/llava_phi_c01/llava_phi_c01_RealWorldQA_openai_result.xlsx b/llava_phi_c01/llava_phi_c01_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bee27b0917635c75a6a4bc303207aaa5f29e34db Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_RealWorldQA_openai_result.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_SEEDBench_IMG.xlsx b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..dcb1918c77e0c0b6fbae407c34e4d8fecc17fca8 Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_acc.csv b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..0482a9b72caabb77d08eefbe458a18abc1789afd --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5346402473299606","0.4876317487631749","0.5871108683779356","0.6288659793814433","0.45194274028629855","0.4421740907233347","0.6773274224192527","0.3866057838660578","0.6428571428571429","0.7099697885196374" diff --git a/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_openai_result.pkl b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..017c9a5d530c4dd3046de7cff7bf7f85cf03a04f --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f6937c4d46c01cdcd5a1f1519f2a6d706143cdd5c3d908be0ad63b367a5142 +size 769678 diff --git a/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_openai_result.xlsx b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..09e0e1ae2990a0eb29f9bc7f872f59cbf31795b4 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a206782710ebb8d004a5a00330da38a4ae6c4180977518595beb65fff0e1c +size 1054281 diff --git a/llava_phi_c01/llava_phi_c01_ScienceQA_VAL.xlsx b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1569ff2058a696698d34eef35cd0530cf9150d9a Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_PREV.pkl b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e1c701c30a7f4aeda7a5c58b12a70b96042e209b --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5a5ba5b3fc364862c4fd2052b705cc2972e6d21c760d77ad1fdcb9b509ad6c +size 10545 diff --git a/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_acc.csv b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..c3805a27690b047a2b2d697864e2b2f084e09162 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6528373867429661","0.8958333333333334","1.0","1.0","1.0","1.0","1.0","0.6304347826086957","0.6190476190476191","0.78125","1.0","0.25","0.9166666666666666","0.8297872340425532","1.0","0.47619047619047616","0.6","1.0","1.0","0.4","0.0","0.5","1.0","0.68","0.7611940298507462","0.9811320754716981","0.358974358974359","0.44","0.7058823529411765","0.0425531914893617","0.4225352112676056","1.0","1.0","1.0","0.39097744360902253","0.3225806451612903","0.8189655172413793","1.0","0.5","0.2553191489361702","0.7","0.5","0.4117647058823529","0.4444444444444444","0.5348837209302325","0.7","1.0","1.0","1.0","0.975609756097561","0.5","0.7380952380952381","0.582089552238806","0.8616352201257862","0.3333333333333333","0.8421052631578947","0.6666666666666666","0.2","1.0","0.0","1.0","0.3333333333333333","0.17647058823529413","1.0","1.0","0.4375","1.0" diff --git a/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_openai_result.pkl b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9092d662858d8e1e4fd9209f806134ef12e681d8 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba188edd4514bc5443f4d9cd69770ab7cfcd4bc036893553489e79026342ab2 +size 113626 diff --git a/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8b54d9347e0fa41fe5e1bc7de6aff8a3c9e7bcdb Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_TextVQA_VAL.xlsx b/llava_phi_c01/llava_phi_c01_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f019ad03874472d263fa41d37edd5d4da556e6f4 Binary files /dev/null and b/llava_phi_c01/llava_phi_c01_TextVQA_VAL.xlsx differ diff --git a/llava_phi_c01/llava_phi_c01_TextVQA_VAL_PREV.pkl b/llava_phi_c01/llava_phi_c01_TextVQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f50f445aca9dfddfc94feb1ac72c55029fcc2470 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_TextVQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2925be385bdfe64a7b4d8a2ff6ed4238143cb19ca68f6517cfbbc3ee5f6691 +size 62028 diff --git a/llava_phi_c01/llava_phi_c01_TextVQA_VAL_acc.csv b/llava_phi_c01/llava_phi_c01_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..b9302897e2cd2cb8e312c8fe9673d517d5655b12 --- /dev/null +++ b/llava_phi_c01/llava_phi_c01_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"17.272000000000002" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_AI2D_TEST_acc-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_AI2D_TEST_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..b09b0f308555a91b6a1c20c408e52806982ebc28 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_AI2D_TEST_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5207253886010362","0.375","0.6129032258064516","0.5714285714285714","0.6110154905335629","0.4052757793764988","0.37906137184115524","0.5585215605749486","0.5961538461538461","0.4177215189873418","0.4626865671641791","0.4146341463414634","0.7222222222222222","0.4489795918367347","0.4375","0.3409090909090909" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_HallusionBench_score-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..45c27a5234b7a3e5cd94fb7c430a8ba6775b5a34 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","43.743427970557306","12.716763005780345","9.230769230769232" +"VD","45.346869712351946","14.347826086956522","8.664259927797833" +"VS","41.11111111111111","9.482758620689655","10.112359550561797" +"VD_math","37.03703703703704","2.7777777777777777","9.25925925925926" +"VS_ocr","59.25925925925925","26.923076923076923","18.51851851851852" +"VD_video","44.11764705882353","10.416666666666668","8.695652173913043" +"VD_illusion","41.66666666666667","16.129032258064516","0.0" +"VD_figure","56.25","29.268292682926827","17.94871794871795" +"VD_ocr","53.93258426966292","11.627906976744185","13.953488372093023" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_chart","33.84615384615385","5.0","13.157894736842104" +"VS_map","45.3125","9.090909090909092","6.25" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_MME_score-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..1b4ccfc11d30c2d10468e09b2ce0f41c3b1e3aa5 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1137.2409963985592","273.2142857142857","87.5","115.75","106.47058823529412","62.5","63.333333333333336","90.71428571428572","123.33333333333334","185.0","133.0","72.5","46.66666666666667","117.68707482993197","158.5","47.5" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_POPE_score-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f20b36955f2d489eac82357f0227aabe439b3161 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","80.02170374389583","79.54444444444444","78.19724284199364","81.93333333333334" +"adversarial","75.58425584255842","73.53333333333333","70.14840182648402","81.93333333333334" +"random","83.32203389830508","83.6","84.75862068965517","81.93333333333334" +"popular","81.57982077663458","81.5","81.22934567085261","81.93333333333334" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_Q-Bench1_VAL_acc-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_Q-Bench1_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..c5ded731405950c59506aa51e5abb1853548ecec --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_Q-Bench1_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5076923076923077","0.5319148936170213","0.6342857142857142","0.5897435897435898","0.7142857142857143","0.3466666666666667","0.6607142857142857","0.44","0.5555555555555556","0.3181818181818182","0.5103448275862069","0.39080459770114945","0.5294117647058824" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_RealWorldQA_acc-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..79cd91557c3ae416228f013714d8f0b76213c4e9 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4326797385620915" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_SEEDBench_IMG_acc-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..b87c3a91bfbb3fa882558cb088256dce006621e4 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5080101180438449","0.45257044525704454","0.5499726925177498","0.6494845360824743","0.450920245398773","0.40743767879035553","0.6595946801773275","0.3926940639269406","0.6428571428571429","0.6737160120845922" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_ScienceQA_VAL_acc-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cb9115dc0bb308e2f952a885cc8b5bf0be28e95 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6490224129709108","0.8958333333333334","1.0","1.0","1.0","1.0","1.0","0.6086956521739131","0.7142857142857143","0.546875","1.0","0.375","0.9166666666666666","0.9148936170212766","1.0","0.47619047619047616","0.4","1.0","0.9838709677419355","0.2","1.0","1.0","1.0","0.56","0.7611940298507462","1.0","0.38461538461538464","0.36","0.6470588235294118","0.23404255319148937","0.4647887323943662","1.0","1.0","1.0","0.39097744360902253","0.3548387096774194","0.8017241379310345","0.5","0.5","0.2553191489361702","0.8","0.5","0.39705882352941174","0.4444444444444444","0.5813953488372093","0.7","1.0","1.0","1.0","0.9512195121951219","0.5","0.8095238095238095","0.5373134328358209","0.8427672955974843","0.6666666666666666","0.8157894736842105","0.6666666666666666","0.23333333333333334","1.0","0.0","1.0","0.4444444444444444","0.0784313725490196","0.8888888888888888","1.0","0.46875","0.6666666666666666" diff --git a/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_TextVQA_VAL_acc-checkpoint.csv b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..7481ad1891398214c268bf574feba90f74b25955 --- /dev/null +++ b/llava_phi_c02/.ipynb_checkpoints/llava_phi_c02_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"17.506" diff --git a/llava_phi_c02/01_MMMU_DEV_VAL.pkl b/llava_phi_c02/01_MMMU_DEV_VAL.pkl new file mode 100644 index 0000000000000000000000000000000000000000..96541310e5ef7a9b9eac4113db3ab124cd221e4d --- /dev/null +++ b/llava_phi_c02/01_MMMU_DEV_VAL.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5791e2ce4207edf0ffd602433dbaf76f22fe7200cc5a703c9b5f9c8b88209c6 +size 2163 diff --git a/llava_phi_c02/llava_phi_c02_AI2D_TEST.xlsx b/llava_phi_c02/llava_phi_c02_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..be996ca8129585b9e144522fed08e43e822ad81f Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_AI2D_TEST.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_AI2D_TEST_acc.csv b/llava_phi_c02/llava_phi_c02_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..b09b0f308555a91b6a1c20c408e52806982ebc28 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5207253886010362","0.375","0.6129032258064516","0.5714285714285714","0.6110154905335629","0.4052757793764988","0.37906137184115524","0.5585215605749486","0.5961538461538461","0.4177215189873418","0.4626865671641791","0.4146341463414634","0.7222222222222222","0.4489795918367347","0.4375","0.3409090909090909" diff --git a/llava_phi_c02/llava_phi_c02_AI2D_TEST_openai_result.pkl b/llava_phi_c02/llava_phi_c02_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..750b70a878a75f98372f2872c6a0e76e52e2ae20 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f87c7ee1551f5851fc10a0d4b7cf90bbbcc63b46918d50e346dfe7ee7df41f +size 166877 diff --git a/llava_phi_c02/llava_phi_c02_AI2D_TEST_openai_result.xlsx b/llava_phi_c02/llava_phi_c02_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7e819155cd2b07c390e4a794303f48d09c288a2e Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_HallusionBench.xlsx b/llava_phi_c02/llava_phi_c02_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..253d527c6ebafc1f51388d5ee05c5c6c544d3c8c Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_HallusionBench.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_HallusionBench_auxmatch.xlsx b/llava_phi_c02/llava_phi_c02_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ba95e56fec606e7b70cb2b069b401a27efd406d3 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_HallusionBench_auxmatch.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_HallusionBench_score.csv b/llava_phi_c02/llava_phi_c02_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..45c27a5234b7a3e5cd94fb7c430a8ba6775b5a34 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","43.743427970557306","12.716763005780345","9.230769230769232" +"VD","45.346869712351946","14.347826086956522","8.664259927797833" +"VS","41.11111111111111","9.482758620689655","10.112359550561797" +"VD_math","37.03703703703704","2.7777777777777777","9.25925925925926" +"VS_ocr","59.25925925925925","26.923076923076923","18.51851851851852" +"VD_video","44.11764705882353","10.416666666666668","8.695652173913043" +"VD_illusion","41.66666666666667","16.129032258064516","0.0" +"VD_figure","56.25","29.268292682926827","17.94871794871795" +"VD_ocr","53.93258426966292","11.627906976744185","13.953488372093023" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_chart","33.84615384615385","5.0","13.157894736842104" +"VS_map","45.3125","9.090909090909092","6.25" diff --git a/llava_phi_c02/llava_phi_c02_MME.xlsx b/llava_phi_c02/llava_phi_c02_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..36dc50b013b390abf72ef5e1eac1065256833032 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_MME.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_MME_auxmatch.xlsx b/llava_phi_c02/llava_phi_c02_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a1ea4bc0ca6935f2a9ff054366396299a3abc0f2 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_MME_auxmatch.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_MME_score.csv b/llava_phi_c02/llava_phi_c02_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..1b4ccfc11d30c2d10468e09b2ce0f41c3b1e3aa5 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1137.2409963985592","273.2142857142857","87.5","115.75","106.47058823529412","62.5","63.333333333333336","90.71428571428572","123.33333333333334","185.0","133.0","72.5","46.66666666666667","117.68707482993197","158.5","47.5" diff --git a/llava_phi_c02/llava_phi_c02_POPE.xlsx b/llava_phi_c02/llava_phi_c02_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8bf42132d3cb21bac64de6218ee40ed4726260df Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_POPE.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_POPE_auxmatch.xlsx b/llava_phi_c02/llava_phi_c02_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8b469f0074d7f396d1db618c2dacec308149a005 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_POPE_auxmatch.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_POPE_score.csv b/llava_phi_c02/llava_phi_c02_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..f20b36955f2d489eac82357f0227aabe439b3161 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","80.02170374389583","79.54444444444444","78.19724284199364","81.93333333333334" +"adversarial","75.58425584255842","73.53333333333333","70.14840182648402","81.93333333333334" +"random","83.32203389830508","83.6","84.75862068965517","81.93333333333334" +"popular","81.57982077663458","81.5","81.22934567085261","81.93333333333334" diff --git a/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL.xlsx b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..903dbeb36e70e898e5b2476c433eed4f990eb7e6 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_acc.csv b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..c5ded731405950c59506aa51e5abb1853548ecec --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5076923076923077","0.5319148936170213","0.6342857142857142","0.5897435897435898","0.7142857142857143","0.3466666666666667","0.6607142857142857","0.44","0.5555555555555556","0.3181818181818182","0.5103448275862069","0.39080459770114945","0.5294117647058824" diff --git a/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_openai_result.pkl b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4fd0cba16532e7cd88e8c72b85a9b3b2968fa123 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e187ec1cdf7d874b0a585c2415c64632e5f5650eee5cf2ddad635ecab009c16c +size 78596 diff --git a/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_openai_result.xlsx b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..51bcf996c1ff2a6582c43bb9f415cd24fe3c7a8a Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_RealWorldQA.xlsx b/llava_phi_c02/llava_phi_c02_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2104b937ee8403c2703e7b029be98a18287e5cbc Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_RealWorldQA.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_RealWorldQA_acc.csv b/llava_phi_c02/llava_phi_c02_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..79cd91557c3ae416228f013714d8f0b76213c4e9 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4326797385620915" diff --git a/llava_phi_c02/llava_phi_c02_RealWorldQA_openai_result.pkl b/llava_phi_c02/llava_phi_c02_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..bff87f5af499bd66adc19424a3c4b8b3563e1661 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6365e6fcdf1dd412e797ad432fbf8063a6c089784355dc8ab4438f8f2a66043 +size 41411 diff --git a/llava_phi_c02/llava_phi_c02_RealWorldQA_openai_result.xlsx b/llava_phi_c02/llava_phi_c02_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..af3e9955996d04956196459ed7965fa67775de48 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_RealWorldQA_openai_result.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_SEEDBench_IMG.xlsx b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b589351e64dae9fd723bad74e9a7f7c73a8c38da Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_acc.csv b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..b87c3a91bfbb3fa882558cb088256dce006621e4 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5080101180438449","0.45257044525704454","0.5499726925177498","0.6494845360824743","0.450920245398773","0.40743767879035553","0.6595946801773275","0.3926940639269406","0.6428571428571429","0.6737160120845922" diff --git a/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_openai_result.pkl b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2cfc669a30d387b9474a4a0cb9e7843d3f2d81e5 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9108a21f2bf089f9d25843ec8060ae5d69af43563d3aa81a6a9a20cb2057bc3e +size 768756 diff --git a/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_openai_result.xlsx b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c3d58c91b8e8607d48b6504e684fb4355a7bea0d --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a394389b4d18c7afc770e067265e5263c558a8387a0285278a0477bacc6bd1e +size 1050932 diff --git a/llava_phi_c02/llava_phi_c02_ScienceQA_VAL.xlsx b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3dfe72007a73c4302bcf81dda8c7ebbf52b74959 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_acc.csv b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cb9115dc0bb308e2f952a885cc8b5bf0be28e95 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6490224129709108","0.8958333333333334","1.0","1.0","1.0","1.0","1.0","0.6086956521739131","0.7142857142857143","0.546875","1.0","0.375","0.9166666666666666","0.9148936170212766","1.0","0.47619047619047616","0.4","1.0","0.9838709677419355","0.2","1.0","1.0","1.0","0.56","0.7611940298507462","1.0","0.38461538461538464","0.36","0.6470588235294118","0.23404255319148937","0.4647887323943662","1.0","1.0","1.0","0.39097744360902253","0.3548387096774194","0.8017241379310345","0.5","0.5","0.2553191489361702","0.8","0.5","0.39705882352941174","0.4444444444444444","0.5813953488372093","0.7","1.0","1.0","1.0","0.9512195121951219","0.5","0.8095238095238095","0.5373134328358209","0.8427672955974843","0.6666666666666666","0.8157894736842105","0.6666666666666666","0.23333333333333334","1.0","0.0","1.0","0.4444444444444444","0.0784313725490196","0.8888888888888888","1.0","0.46875","0.6666666666666666" diff --git a/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_openai_result.pkl b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..553a112891166ea4128b81781f3584ce62885e0d --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb2009c85448ce402a48274473f921283ea3458039cdb2a7a86a3fc299dd67a0 +size 113352 diff --git a/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..38764b2506f08935aac95642f664a6d7f9ebb749 Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_TextVQA_VAL.xlsx b/llava_phi_c02/llava_phi_c02_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ad602160a758e3bbda3d7ee720afd95b88f8699b Binary files /dev/null and b/llava_phi_c02/llava_phi_c02_TextVQA_VAL.xlsx differ diff --git a/llava_phi_c02/llava_phi_c02_TextVQA_VAL_acc.csv b/llava_phi_c02/llava_phi_c02_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..7481ad1891398214c268bf574feba90f74b25955 --- /dev/null +++ b/llava_phi_c02/llava_phi_c02_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"17.506" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_AI2D_TEST_acc-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_AI2D_TEST_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..0daafd5fdddc95c51e09ac101311fd6dd94db6b3 --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_AI2D_TEST_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5074481865284974","0.5","0.5161290322580645","0.42857142857142855","0.5963855421686747","0.41007194244604317","0.36823104693140796","0.5544147843942505","0.46153846153846156","0.4050632911392405","0.417910447761194","0.34146341463414637","0.6388888888888888","0.4489795918367347","0.5","0.36363636363636365" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_HallusionBench_score-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f419a6f24ab21f3cabbe0f1e2cd6ee2873ceab08 --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","40.483701366982125","6.9364161849710975","3.5164835164835164" +"VD","42.47038917089678","6.521739130434782","2.166064981949458" +"VS","37.22222222222222","7.758620689655173","5.617977528089887" +"VD_math","27.77777777777778","2.7777777777777777","1.8518518518518516" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VD_ocr","48.31460674157304","0.0","2.3255813953488373" +"VD_video","46.470588235294116","8.333333333333332","5.797101449275362" +"VS_ocr","48.148148148148145","26.923076923076923","3.7037037037037033" +"VS_map","42.1875","4.545454545454546","3.125" +"VS_chart","29.230769230769234","2.5","9.210526315789473" +"VD_illusion","42.36111111111111","12.903225806451612","0.0" +"VD_figure","47.5","4.878048780487805","0.0" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_MME_score-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..adeecc2f72180b807bdee54897b2a900e69e650e --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"685.9732893157263","228.57142857142856","72.5","67.75","48.529411764705884","45.0","53.333333333333336","73.57142857142857","80.0","90.0","57.5","45.0","48.333333333333336","68.02721088435375","100.0","65.0" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_POPE_score-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f528c696a49727e652a9966ca208da5f84f922d2 --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","68.5337726523888","55.43333333333334","52.964714441615136","97.06666666666666" +"popular","69.43252265140677","57.266666666666666","54.046028210838905","97.06666666666666" +"random","69.16864608076011","56.733333333333334","53.7269372693727","97.06666666666666" +"adversarial","67.05042597282984","52.300000000000004","51.21350685895181","97.06666666666666" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_Q-Bench1_VAL_acc-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_Q-Bench1_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..dd2324ebbb78024b3079ab272323a953e7f82d9a --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_Q-Bench1_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.46488294314381273","0.4627659574468085","0.5542857142857143","0.5555555555555556","0.7428571428571429","0.29333333333333333","0.5267857142857143","0.48","0.6111111111111112","0.3068181818181818","0.45517241379310347","0.3793103448275862","0.4117647058823529" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_RealWorldQA_acc-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f7f00783e8b302a64389dc628a30f5296bfe97b4 --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.469281045751634" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_SEEDBench_IMG_acc-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..336a894b2ccc7ed5b0e80b3bb1a9b5cf4c3e4be0 --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.41259134345137716","0.39685953968595394","0.4281813216821409","0.4845360824742268","0.37321063394683024","0.32856559051900286","0.4920835972134262","0.3683409436834094","0.6785714285714286","0.525679758308157" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_ScienceQA_VAL_acc-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..e6d56177a2fb99dc33060a88d21f77da8cfbc450 --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.586075345731998","0.7291666666666666","0.5","1.0","1.0","1.0","1.0","0.391304347826087","0.6190476190476191","0.375","1.0","0.375","0.8928571428571429","0.6595744680851063","1.0","0.5238095238095238","0.6","1.0","0.8709677419354839","0.6","1.0","0.5","1.0","0.6","0.7164179104477612","0.9811320754716981","0.358974358974359","0.44","0.6470588235294118","0.10638297872340426","0.38028169014084506","1.0","1.0","1.0","0.3609022556390977","0.3387096774193548","0.6293103448275862","0.5","0.5","0.2765957446808511","0.4","0.5","0.3382352941176471","0.2777777777777778","0.37209302325581395","0.5","1.0","1.0","1.0","1.0","0.5","0.5238095238095238","0.5970149253731343","0.7955974842767296","0.6666666666666666","0.8157894736842105","0.6666666666666666","0.36666666666666664","1.0","0.0","1.0","0.2777777777777778","0.0784313725490196","0.8888888888888888","1.0","0.5625","1.0" diff --git a/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_TextVQA_VAL_acc-checkpoint.csv b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..77cf888adfc02a7ef53c0764514f32f2620557e9 --- /dev/null +++ b/llava_phi_c03/.ipynb_checkpoints/llava_phi_c03_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"10.205999999999998" diff --git a/llava_phi_c03/01_MMMU_DEV_VAL.pkl b/llava_phi_c03/01_MMMU_DEV_VAL.pkl new file mode 100644 index 0000000000000000000000000000000000000000..332cbe23fa859e79ddbb7a3f0864ff179f503869 --- /dev/null +++ b/llava_phi_c03/01_MMMU_DEV_VAL.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4fe66e42fa3a3a992d0c45aa131e29c8cc338310ceb0fd7a0f75da83a7723b +size 2116 diff --git a/llava_phi_c03/llava_phi_c03_AI2D_TEST.xlsx b/llava_phi_c03/llava_phi_c03_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3412deffb7ad0239e0d186fd32f2d30ad2cd9cb7 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_AI2D_TEST.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_AI2D_TEST_acc.csv b/llava_phi_c03/llava_phi_c03_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..0daafd5fdddc95c51e09ac101311fd6dd94db6b3 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5074481865284974","0.5","0.5161290322580645","0.42857142857142855","0.5963855421686747","0.41007194244604317","0.36823104693140796","0.5544147843942505","0.46153846153846156","0.4050632911392405","0.417910447761194","0.34146341463414637","0.6388888888888888","0.4489795918367347","0.5","0.36363636363636365" diff --git a/llava_phi_c03/llava_phi_c03_AI2D_TEST_openai_result.pkl b/llava_phi_c03/llava_phi_c03_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a8764bbc2314725f15f8d45c00c7121c2dc5cc0a --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb79ae4c74f59d1d943541ec393945f22f91f4d6f183905bb2deb723947ddde4 +size 166877 diff --git a/llava_phi_c03/llava_phi_c03_AI2D_TEST_openai_result.xlsx b/llava_phi_c03/llava_phi_c03_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..27288a1d8ba91f1a68fb7ef310d91fa17bd10f97 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_HallusionBench.xlsx b/llava_phi_c03/llava_phi_c03_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3b5fe0f1a4e5a543826df46d514e763121bce776 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_HallusionBench.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_HallusionBench_auxmatch.xlsx b/llava_phi_c03/llava_phi_c03_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..17b875fa105a073a43675fb3f0d55b6cf161b02a Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_HallusionBench_auxmatch.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_HallusionBench_score.csv b/llava_phi_c03/llava_phi_c03_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..f419a6f24ab21f3cabbe0f1e2cd6ee2873ceab08 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","40.483701366982125","6.9364161849710975","3.5164835164835164" +"VD","42.47038917089678","6.521739130434782","2.166064981949458" +"VS","37.22222222222222","7.758620689655173","5.617977528089887" +"VD_math","27.77777777777778","2.7777777777777777","1.8518518518518516" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VD_ocr","48.31460674157304","0.0","2.3255813953488373" +"VD_video","46.470588235294116","8.333333333333332","5.797101449275362" +"VS_ocr","48.148148148148145","26.923076923076923","3.7037037037037033" +"VS_map","42.1875","4.545454545454546","3.125" +"VS_chart","29.230769230769234","2.5","9.210526315789473" +"VD_illusion","42.36111111111111","12.903225806451612","0.0" +"VD_figure","47.5","4.878048780487805","0.0" diff --git a/llava_phi_c03/llava_phi_c03_MME.xlsx b/llava_phi_c03/llava_phi_c03_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..885525f0f028bc3645eea8186910fa6d465a56c5 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_MME.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_MME_auxmatch.xlsx b/llava_phi_c03/llava_phi_c03_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3331552b5e8ef44096843a682071a671391f1fe9 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_MME_auxmatch.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_MME_score.csv b/llava_phi_c03/llava_phi_c03_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..adeecc2f72180b807bdee54897b2a900e69e650e --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"685.9732893157263","228.57142857142856","72.5","67.75","48.529411764705884","45.0","53.333333333333336","73.57142857142857","80.0","90.0","57.5","45.0","48.333333333333336","68.02721088435375","100.0","65.0" diff --git a/llava_phi_c03/llava_phi_c03_POPE.xlsx b/llava_phi_c03/llava_phi_c03_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..36541c5a7b8d2fde02f9873478e026dffbd6bd5b Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_POPE.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_POPE_auxmatch.xlsx b/llava_phi_c03/llava_phi_c03_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..06c8e2168f1282ff97c5eaf8324cf925afab5247 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_POPE_auxmatch.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_POPE_score.csv b/llava_phi_c03/llava_phi_c03_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..f528c696a49727e652a9966ca208da5f84f922d2 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","68.5337726523888","55.43333333333334","52.964714441615136","97.06666666666666" +"popular","69.43252265140677","57.266666666666666","54.046028210838905","97.06666666666666" +"random","69.16864608076011","56.733333333333334","53.7269372693727","97.06666666666666" +"adversarial","67.05042597282984","52.300000000000004","51.21350685895181","97.06666666666666" diff --git a/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL.xlsx b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..849dd51e7f648bbfd39a44f7d559c962c1028ca1 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_acc.csv b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..dd2324ebbb78024b3079ab272323a953e7f82d9a --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.46488294314381273","0.4627659574468085","0.5542857142857143","0.5555555555555556","0.7428571428571429","0.29333333333333333","0.5267857142857143","0.48","0.6111111111111112","0.3068181818181818","0.45517241379310347","0.3793103448275862","0.4117647058823529" diff --git a/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_openai_result.pkl b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..df894a0e7ff0d1ba769964de5f4605146e0b849b --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1277fe3b94d4d1c81602a6966a60525a0672793964ccbb96ae1868facb5d4dda +size 78596 diff --git a/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_openai_result.xlsx b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4a2ed6388dffdaa4ff79a9f550d59d60e37b2aea Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_RealWorldQA.xlsx b/llava_phi_c03/llava_phi_c03_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d0ed6bd2234d5137c2b6b900ff8e815fefaf728c Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_RealWorldQA.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_RealWorldQA_acc.csv b/llava_phi_c03/llava_phi_c03_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..f7f00783e8b302a64389dc628a30f5296bfe97b4 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.469281045751634" diff --git a/llava_phi_c03/llava_phi_c03_RealWorldQA_openai_result.pkl b/llava_phi_c03/llava_phi_c03_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..490241c23ef38de257b8add077083e017fa67cd1 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0709a8a1d68ba7bfd939776ef636dbed5bc695acbbd384c3d489bb1858cdbc9d +size 41411 diff --git a/llava_phi_c03/llava_phi_c03_RealWorldQA_openai_result.xlsx b/llava_phi_c03/llava_phi_c03_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..72496f86d16ded234d2799bc6a99edb9efb77819 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_RealWorldQA_openai_result.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_SEEDBench_IMG.xlsx b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3aa6bc5d4ec4a9f713f31b2cf5bad4dce003932f Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_acc.csv b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..336a894b2ccc7ed5b0e80b3bb1a9b5cf4c3e4be0 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.41259134345137716","0.39685953968595394","0.4281813216821409","0.4845360824742268","0.37321063394683024","0.32856559051900286","0.4920835972134262","0.3683409436834094","0.6785714285714286","0.525679758308157" diff --git a/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_openai_result.pkl b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f52b259e9d4fa05ff1adc989cee66ab2ff005df1 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f21e90fbce389074fcc1be981d5b23629762f78d65158f44366030532e40a48 +size 768763 diff --git a/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_openai_result.xlsx b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..782f3c4a161101bc5a3514c73331c3722ee049ff --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64906afc9bc6a1b86973cea2aa5212922cba908b1cf863f3c6101eb11c7d1889 +size 1051208 diff --git a/llava_phi_c03/llava_phi_c03_ScienceQA_VAL.xlsx b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e5df77771af6d4358b55a95619a6b3e6fa038e4d Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_acc.csv b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..e6d56177a2fb99dc33060a88d21f77da8cfbc450 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.586075345731998","0.7291666666666666","0.5","1.0","1.0","1.0","1.0","0.391304347826087","0.6190476190476191","0.375","1.0","0.375","0.8928571428571429","0.6595744680851063","1.0","0.5238095238095238","0.6","1.0","0.8709677419354839","0.6","1.0","0.5","1.0","0.6","0.7164179104477612","0.9811320754716981","0.358974358974359","0.44","0.6470588235294118","0.10638297872340426","0.38028169014084506","1.0","1.0","1.0","0.3609022556390977","0.3387096774193548","0.6293103448275862","0.5","0.5","0.2765957446808511","0.4","0.5","0.3382352941176471","0.2777777777777778","0.37209302325581395","0.5","1.0","1.0","1.0","1.0","0.5","0.5238095238095238","0.5970149253731343","0.7955974842767296","0.6666666666666666","0.8157894736842105","0.6666666666666666","0.36666666666666664","1.0","0.0","1.0","0.2777777777777778","0.0784313725490196","0.8888888888888888","1.0","0.5625","1.0" diff --git a/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_openai_result.pkl b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..edfbdfd753f82b932f506c90e63621972558dc59 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a411c2a1ca665cfb13498d0f0426c1080143eab448231bf587540b38319cbe62 +size 113352 diff --git a/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b91fc8cfe8f6b6bc0419618bc516df8546ff5752 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_TextVQA_VAL.xlsx b/llava_phi_c03/llava_phi_c03_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..87aac618b6f7585daeb98e1fa3a614975fef1334 Binary files /dev/null and b/llava_phi_c03/llava_phi_c03_TextVQA_VAL.xlsx differ diff --git a/llava_phi_c03/llava_phi_c03_TextVQA_VAL_acc.csv b/llava_phi_c03/llava_phi_c03_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..77cf888adfc02a7ef53c0764514f32f2620557e9 --- /dev/null +++ b/llava_phi_c03/llava_phi_c03_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"10.205999999999998" diff --git a/llava_phi_c04/01_TextVQA_VAL.pkl b/llava_phi_c04/01_TextVQA_VAL.pkl new file mode 100644 index 0000000000000000000000000000000000000000..21ceadc73f45d09c9dfb360f482f12dd84b48510 --- /dev/null +++ b/llava_phi_c04/01_TextVQA_VAL.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae751fe21dcd1b27b185afcb1499bf2f73b2760672219a8433190216f3b8c5fb +size 128990 diff --git a/llava_phi_c04/llava_phi_c04_ScienceQA_VAL.xlsx b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..22bf719c9dfac50f23fc85989afc1ab069a4a124 Binary files /dev/null and b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_acc.csv b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..c4f9d1eb9ca59fc795e834cb574edcf63ab9ee96 --- /dev/null +++ b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0","0.0" diff --git a/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_openai_result.pkl b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3364a0f3689245da44ff72a9756bc85c4c7592e7 --- /dev/null +++ b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0feba6acce71533b56fd888312483b738676fc2d96d43724d9cd21120ceea74 +size 276945 diff --git a/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..390d7f7d7990c9554b92e1dc8b92f5f147faf890 Binary files /dev/null and b/llava_phi_c04/llava_phi_c04_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_AI2D_TEST_acc-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_AI2D_TEST_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..13ff95aac9643cb509b673c9f180665c27c7c11f --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_AI2D_TEST_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.4951424870466321","0.625","0.5483870967741935","0.5357142857142857","0.5757314974182444","0.3932853717026379","0.38267148014440433","0.5133470225872689","0.5192307692307693","0.4177215189873418","0.43283582089552236","0.3902439024390244","0.6388888888888888","0.43440233236151604","0.625","0.36363636363636365" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_HallusionBench_score-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..615a61e0d0f657512cbbe7e0fb2cb41bc0e8cc1d --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","43.53312302839117","12.427745664739884","8.131868131868131" +"VD","46.02368866328257","14.347826086956522","8.303249097472925" +"VS","39.44444444444444","8.620689655172415","7.865168539325842" +"VD_video","50.0","12.5","13.043478260869565" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VD_math","36.11111111111111","8.333333333333332","11.11111111111111" +"VS_chart","34.61538461538461","5.0","14.473684210526317" +"VD_illusion","42.36111111111111","20.967741935483872","0.0" +"VS_map","43.75","9.090909090909092","3.125" +"VD_figure","52.5","19.51219512195122","12.82051282051282" +"VS_ocr","48.148148148148145","23.076923076923077","3.7037037037037033" +"VD_ocr","50.56179775280899","6.976744186046512","6.976744186046512" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_MME_score-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..cfe9d98db6dc2c48db164f4f887553ea212aeb60 --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1057.4552821128452","291.7857142857143","77.5","100.75","66.47058823529412","65.0","78.33333333333334","84.28571428571428","135.0","183.33333333333331","118.75","47.5","53.33333333333333","86.73469387755102","157.25","95.0" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_POPE_score-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..2186247b00e9504d8ea769b69a45567a1ed2a5a2 --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","80.26345673404496","80.68888888888888","82.07152810032512","78.53333333333333" +"popular","82.11920529801324","82.89999999999999","86.0482103725347","78.53333333333333" +"random","83.01620859760394","83.93333333333334","88.04185351270553","78.53333333333333" +"adversarial","76.0245240400129","75.23333333333333","73.67104440275172","78.53333333333333" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_Q-Bench1_VAL_acc-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_Q-Bench1_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..df178cd2c0fd083a562db1d63bf5335a3fd07cc3 --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_Q-Bench1_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5438127090301004","0.5691489361702128","0.6171428571428571","0.6239316239316239","0.6571428571428571","0.41333333333333333","0.6517857142857143","0.54","0.7444444444444445","0.3125","0.5724137931034483","0.41379310344827586","0.5764705882352941" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_RealWorldQA_acc-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..230c1e0c73279a5c849bd45319aee4eb7813c8a8 --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.41045751633986927" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_SEEDBench_IMG_acc-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..9c6cb79acc728d20a77c8349d55223fa8f457a20 --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5207982012366498","0.4527855452785545","0.5647187329328236","0.6288659793814433","0.4539877300613497","0.4331834899877401","0.6830272324255858","0.4079147640791476","0.5833333333333334","0.7069486404833837" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_ScienceQA_VAL_acc-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..72f3fc3c45c1e037d0f0b2912d3762e1889484a0 --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6180257510729614","0.875","1.0","1.0","1.0","0.5","1.0","0.5869565217391305","0.5714285714285714","0.296875","1.0","0.5","0.7857142857142857","0.6595744680851063","1.0","0.47619047619047616","0.6","1.0","0.9838709677419355","0.6","1.0","1.0","0.75","0.68","0.8059701492537313","0.9245283018867925","0.3076923076923077","0.52","0.47058823529411764","0.23404255319148937","0.4084507042253521","1.0","1.0","0.8","0.42105263157894735","0.3064516129032258","0.8362068965517241","0.5","0.5","0.40425531914893614","0.7","0.5","0.35294117647058826","0.5","0.3953488372093023","0.6","0.8","1.0","1.0","0.975609756097561","0.5","0.35714285714285715","0.5074626865671642","0.8836477987421384","0.6666666666666666","0.7894736842105263","0.6666666666666666","0.23333333333333334","1.0","0.0","1.0","0.3888888888888889","0.0784313725490196","0.8888888888888888","1.0","0.40625","1.0" diff --git a/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_TextVQA_VAL_acc-checkpoint.csv b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..95477bc72b4ef05daf210f5640f3c6fb5d4255c2 --- /dev/null +++ b/llava_phi_c05/.ipynb_checkpoints/llava_phi_c05_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"15.894" diff --git a/llava_phi_c05/01_MMMU_DEV_VAL.pkl b/llava_phi_c05/01_MMMU_DEV_VAL.pkl new file mode 100644 index 0000000000000000000000000000000000000000..28ad7596d6da4abb1d643e5914bf85785d552ed7 --- /dev/null +++ b/llava_phi_c05/01_MMMU_DEV_VAL.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff25c7cca37376d67da8526bf53ee6147dcf42e33b52d45346fcb2b2e1f3c6fc +size 2916 diff --git a/llava_phi_c05/llava_phi_c05_AI2D_TEST.xlsx b/llava_phi_c05/llava_phi_c05_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5551ef70d5778b7546d58c49efcc45c4888a7f54 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_AI2D_TEST.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_AI2D_TEST_acc.csv b/llava_phi_c05/llava_phi_c05_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..13ff95aac9643cb509b673c9f180665c27c7c11f --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.4951424870466321","0.625","0.5483870967741935","0.5357142857142857","0.5757314974182444","0.3932853717026379","0.38267148014440433","0.5133470225872689","0.5192307692307693","0.4177215189873418","0.43283582089552236","0.3902439024390244","0.6388888888888888","0.43440233236151604","0.625","0.36363636363636365" diff --git a/llava_phi_c05/llava_phi_c05_AI2D_TEST_openai_result.pkl b/llava_phi_c05/llava_phi_c05_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e89cfcfd341d67babebec72924ba834b709da6de --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59594390a45a49eeada7aadc3e3d6f568942947ed2967dade356db5d807e7712 +size 166877 diff --git a/llava_phi_c05/llava_phi_c05_AI2D_TEST_openai_result.xlsx b/llava_phi_c05/llava_phi_c05_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5624252c12cf969d8d2807b3d76c9c0ab230b803 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_HallusionBench.xlsx b/llava_phi_c05/llava_phi_c05_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b26f5d193682e1147515d0b8dc61ab20f2e7f695 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_HallusionBench.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_HallusionBench_auxmatch.xlsx b/llava_phi_c05/llava_phi_c05_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e6cb445ce193fc7871481aabd7128c68ca462613 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_HallusionBench_auxmatch.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_HallusionBench_score.csv b/llava_phi_c05/llava_phi_c05_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..615a61e0d0f657512cbbe7e0fb2cb41bc0e8cc1d --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","43.53312302839117","12.427745664739884","8.131868131868131" +"VD","46.02368866328257","14.347826086956522","8.303249097472925" +"VS","39.44444444444444","8.620689655172415","7.865168539325842" +"VD_video","50.0","12.5","13.043478260869565" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VD_math","36.11111111111111","8.333333333333332","11.11111111111111" +"VS_chart","34.61538461538461","5.0","14.473684210526317" +"VD_illusion","42.36111111111111","20.967741935483872","0.0" +"VS_map","43.75","9.090909090909092","3.125" +"VD_figure","52.5","19.51219512195122","12.82051282051282" +"VS_ocr","48.148148148148145","23.076923076923077","3.7037037037037033" +"VD_ocr","50.56179775280899","6.976744186046512","6.976744186046512" diff --git a/llava_phi_c05/llava_phi_c05_MME.xlsx b/llava_phi_c05/llava_phi_c05_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a8f77c7e6e495cf4e15aeac74c85ee16b4db06cb Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_MME.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_MME_auxmatch.xlsx b/llava_phi_c05/llava_phi_c05_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e84bc7ed5093f6b29cf0978e41d130f437cbb8a5 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_MME_auxmatch.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_MME_score.csv b/llava_phi_c05/llava_phi_c05_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..cfe9d98db6dc2c48db164f4f887553ea212aeb60 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1057.4552821128452","291.7857142857143","77.5","100.75","66.47058823529412","65.0","78.33333333333334","84.28571428571428","135.0","183.33333333333331","118.75","47.5","53.33333333333333","86.73469387755102","157.25","95.0" diff --git a/llava_phi_c05/llava_phi_c05_POPE.xlsx b/llava_phi_c05/llava_phi_c05_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..139ca6e663484a651a06603d6c6df0b0fdf4dbda Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_POPE.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_POPE_auxmatch.xlsx b/llava_phi_c05/llava_phi_c05_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d88a821c8c308891dc8de2e161db351333e422ea Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_POPE_auxmatch.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_POPE_score.csv b/llava_phi_c05/llava_phi_c05_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..2186247b00e9504d8ea769b69a45567a1ed2a5a2 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","80.26345673404496","80.68888888888888","82.07152810032512","78.53333333333333" +"popular","82.11920529801324","82.89999999999999","86.0482103725347","78.53333333333333" +"random","83.01620859760394","83.93333333333334","88.04185351270553","78.53333333333333" +"adversarial","76.0245240400129","75.23333333333333","73.67104440275172","78.53333333333333" diff --git a/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL.xlsx b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9eb9e0e650ac604b2f5c7460067b73216f0532be Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_acc.csv b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..df178cd2c0fd083a562db1d63bf5335a3fd07cc3 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5438127090301004","0.5691489361702128","0.6171428571428571","0.6239316239316239","0.6571428571428571","0.41333333333333333","0.6517857142857143","0.54","0.7444444444444445","0.3125","0.5724137931034483","0.41379310344827586","0.5764705882352941" diff --git a/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_openai_result.pkl b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a0debc5922134320fa479e4b76fc9a0e6c688648 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4e35471ee88914412cda87e7011bf4c2880dc06e747e4b441e7dbb76e0f66e1 +size 78596 diff --git a/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_openai_result.xlsx b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..97812654779606ff43c346cc8f4997fb10a12234 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_RealWorldQA.xlsx b/llava_phi_c05/llava_phi_c05_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e5330cd1b7d97dc8f5680b83b369be48b45f4a70 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_RealWorldQA.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_RealWorldQA_acc.csv b/llava_phi_c05/llava_phi_c05_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..230c1e0c73279a5c849bd45319aee4eb7813c8a8 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.41045751633986927" diff --git a/llava_phi_c05/llava_phi_c05_RealWorldQA_openai_result.pkl b/llava_phi_c05/llava_phi_c05_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..13ee0f3e6607fc021cd504ac175a1922ef312aab --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f518e2f7f1cb461251ce384607aaf19223981d2811e8bd684ebbc1699f9a4e0 +size 41411 diff --git a/llava_phi_c05/llava_phi_c05_RealWorldQA_openai_result.xlsx b/llava_phi_c05/llava_phi_c05_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..45fb34342dfd4b94e866122756e4f95baf30c809 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_RealWorldQA_openai_result.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_SEEDBench_IMG.xlsx b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8d073d41d93d65a1cc2d1473d0ab221299c564ed Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_acc.csv b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..9c6cb79acc728d20a77c8349d55223fa8f457a20 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5207982012366498","0.4527855452785545","0.5647187329328236","0.6288659793814433","0.4539877300613497","0.4331834899877401","0.6830272324255858","0.4079147640791476","0.5833333333333334","0.7069486404833837" diff --git a/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_openai_result.pkl b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e643cfda118380f619853b3350c72cba22b743d2 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d6dbd49f6f6eadcb95ddd8b3afdcf20dc53d3ac436ff291a7e2a51d912afca +size 768763 diff --git a/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_openai_result.xlsx b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..cb120385f04f57be6972b64dca1041b20fd89b31 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b65182ff473da54e3e105b13df6af20c425bc753d15d12d043e89fad9cb663e +size 1050792 diff --git a/llava_phi_c05/llava_phi_c05_ScienceQA_VAL.xlsx b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9298bbf239fca3ebaba1ad1ef53720f87ce528b6 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_acc.csv b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..72f3fc3c45c1e037d0f0b2912d3762e1889484a0 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6180257510729614","0.875","1.0","1.0","1.0","0.5","1.0","0.5869565217391305","0.5714285714285714","0.296875","1.0","0.5","0.7857142857142857","0.6595744680851063","1.0","0.47619047619047616","0.6","1.0","0.9838709677419355","0.6","1.0","1.0","0.75","0.68","0.8059701492537313","0.9245283018867925","0.3076923076923077","0.52","0.47058823529411764","0.23404255319148937","0.4084507042253521","1.0","1.0","0.8","0.42105263157894735","0.3064516129032258","0.8362068965517241","0.5","0.5","0.40425531914893614","0.7","0.5","0.35294117647058826","0.5","0.3953488372093023","0.6","0.8","1.0","1.0","0.975609756097561","0.5","0.35714285714285715","0.5074626865671642","0.8836477987421384","0.6666666666666666","0.7894736842105263","0.6666666666666666","0.23333333333333334","1.0","0.0","1.0","0.3888888888888889","0.0784313725490196","0.8888888888888888","1.0","0.40625","1.0" diff --git a/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_openai_result.pkl b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6a7fffd6f6def58756334b8c363c191727aa9ff2 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb1e1676b9313455314ab312ffc430cfd15db46eddc2d56ad2f3adac9fffee02 +size 113352 diff --git a/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d5a3d92c7747ee3bc9c71aa7c2029cc59328fb65 Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_TextVQA_VAL.xlsx b/llava_phi_c05/llava_phi_c05_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7c5b961e0fbb64da00855fdeab28fb8ddcf4d07d Binary files /dev/null and b/llava_phi_c05/llava_phi_c05_TextVQA_VAL.xlsx differ diff --git a/llava_phi_c05/llava_phi_c05_TextVQA_VAL_acc.csv b/llava_phi_c05/llava_phi_c05_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..95477bc72b4ef05daf210f5640f3c6fb5d4255c2 --- /dev/null +++ b/llava_phi_c05/llava_phi_c05_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"15.894" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_AI2D_TEST_acc-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_AI2D_TEST_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..db6fb6e5b883718e1fc64d64cca0fbea573d33bc --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_AI2D_TEST_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.4990284974093264","0.5","0.5483870967741935","0.4642857142857143","0.5791738382099828","0.4052757793764988","0.37906137184115524","0.5277207392197125","0.5","0.4050632911392405","0.43283582089552236","0.36585365853658536","0.6666666666666666","0.44314868804664725","0.6875","0.3181818181818182" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_HallusionBench_score-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..967ac8bce2384ea64ba261f44015a05d9b38232e --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","41.85068349106204","9.248554913294797","5.714285714285714" +"VD","43.48561759729272","9.565217391304348","4.693140794223827" +"VS","39.166666666666664","8.620689655172415","7.303370786516854" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_map","45.3125","4.545454545454546","3.125" +"VS_ocr","48.148148148148145","26.923076923076923","3.7037037037037033" +"VD_ocr","49.43820224719101","2.3255813953488373","4.651162790697675" +"VD_figure","55.00000000000001","26.82926829268293","12.82051282051282" +"VS_chart","33.07692307692307","5.0","13.157894736842104" +"VD_video","44.70588235294118","2.083333333333333","5.797101449275362" +"VD_illusion","41.66666666666667","12.903225806451612","0.0" +"VD_math","30.555555555555557","2.7777777777777777","3.7037037037037033" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_MME_score-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..9ebe7fb70ed8af4e6d24d0c370a021701eac37e0 --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"966.4086634653861","226.42857142857144","70.0","87.0","53.52941176470588","55.0","68.33333333333334","71.42857142857143","125.0","166.66666666666669","104.75","50.0","55.0","73.12925170068027","163.0","50.0" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_POPE_score-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..b6d4267b423f3ca0412a9f31e639cdcc70677c5c --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","78.98565366740755","76.88888888888889","72.4157095220452","86.86666666666667" +"adversarial","74.54233409610985","70.33333333333334","65.28056112224448","86.86666666666667" +"popular","79.52395483674091","77.63333333333333","73.32583005064716","86.86666666666667" +"random","83.39200000000001","82.69999999999999","80.18461538461538","86.86666666666667" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_Q-Bench1_VAL_acc-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_Q-Bench1_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..64efe559adc26ded53da7f12ba8847d09c5a0dd1 --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_Q-Bench1_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5010033444816053","0.5212765957446809","0.5771428571428572","0.5299145299145299","0.4714285714285714","0.36666666666666664","0.6339285714285714","0.52","0.7333333333333333","0.30113636363636365","0.4827586206896552","0.42528735632183906","0.6" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_RealWorldQA_acc-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..5649801e06c75da9ebcadc195feb02baf39a1e9b --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4261437908496732" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_SEEDBench_IMG_acc-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..1b06e6eaa42f3a1a684c2e5465e8ff7b383a806d --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5112422709387296","0.4579479457947946","0.5674494811578372","0.6082474226804123","0.43047034764826175","0.40988966080915407","0.662444585180494","0.3835616438356164","0.6428571428571429","0.6858006042296072" diff --git a/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_TextVQA_VAL_acc-checkpoint.csv b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..e10b582d46bc323442cddc0f8a2dcac0e0875d1a --- /dev/null +++ b/llava_phi_c06/.ipynb_checkpoints/llava_phi_c06_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"15.441999999999997" diff --git a/llava_phi_c06/llava_phi_c06_AI2D_TEST.xlsx b/llava_phi_c06/llava_phi_c06_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..de925399be5f8127a156b375e99973228513ce64 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_AI2D_TEST.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_AI2D_TEST_acc.csv b/llava_phi_c06/llava_phi_c06_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..db6fb6e5b883718e1fc64d64cca0fbea573d33bc --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.4990284974093264","0.5","0.5483870967741935","0.4642857142857143","0.5791738382099828","0.4052757793764988","0.37906137184115524","0.5277207392197125","0.5","0.4050632911392405","0.43283582089552236","0.36585365853658536","0.6666666666666666","0.44314868804664725","0.6875","0.3181818181818182" diff --git a/llava_phi_c06/llava_phi_c06_AI2D_TEST_openai_result.pkl b/llava_phi_c06/llava_phi_c06_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..288aff916f45323cde370e7da12acf76260da7e9 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4499fc9e2904b822433f5c82b9a0d6c0d63876f1a4e480fb288cff89ff82a3a +size 166877 diff --git a/llava_phi_c06/llava_phi_c06_AI2D_TEST_openai_result.xlsx b/llava_phi_c06/llava_phi_c06_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bbdf86585dc3ab9f51696801425bb8e56d46fa1f Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_HallusionBench.xlsx b/llava_phi_c06/llava_phi_c06_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..57d6bb1e976e729b46679c96bdc010687207925b Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_HallusionBench.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_HallusionBench_auxmatch.xlsx b/llava_phi_c06/llava_phi_c06_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..1493e650e10949cf0f543a9e8c950d4bf8e9389c Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_HallusionBench_auxmatch.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_HallusionBench_score.csv b/llava_phi_c06/llava_phi_c06_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..967ac8bce2384ea64ba261f44015a05d9b38232e --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","41.85068349106204","9.248554913294797","5.714285714285714" +"VD","43.48561759729272","9.565217391304348","4.693140794223827" +"VS","39.166666666666664","8.620689655172415","7.303370786516854" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_map","45.3125","4.545454545454546","3.125" +"VS_ocr","48.148148148148145","26.923076923076923","3.7037037037037033" +"VD_ocr","49.43820224719101","2.3255813953488373","4.651162790697675" +"VD_figure","55.00000000000001","26.82926829268293","12.82051282051282" +"VS_chart","33.07692307692307","5.0","13.157894736842104" +"VD_video","44.70588235294118","2.083333333333333","5.797101449275362" +"VD_illusion","41.66666666666667","12.903225806451612","0.0" +"VD_math","30.555555555555557","2.7777777777777777","3.7037037037037033" diff --git a/llava_phi_c06/llava_phi_c06_MME.xlsx b/llava_phi_c06/llava_phi_c06_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..48ea23ed2060393b85f2c5be9cb4f8647d3cd1b4 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_MME.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_MME_auxmatch.xlsx b/llava_phi_c06/llava_phi_c06_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a23732614c9e4474ae141555f48f70fbf9853cab Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_MME_auxmatch.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_MME_score.csv b/llava_phi_c06/llava_phi_c06_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..9ebe7fb70ed8af4e6d24d0c370a021701eac37e0 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"966.4086634653861","226.42857142857144","70.0","87.0","53.52941176470588","55.0","68.33333333333334","71.42857142857143","125.0","166.66666666666669","104.75","50.0","55.0","73.12925170068027","163.0","50.0" diff --git a/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL.xlsx b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..387ab82e04ccacc944ef0a01d3f562d1bed8adc5 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_acc.csv b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..7005cb04bfedd244857853a08ceb7cc0e31c2f29 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_acc.csv @@ -0,0 +1,3 @@ +"split","Overall","Accounting","Agriculture","Architecture_and_Engineering","Art","Art_Theory","Basic_Medical_Science","Biology","Chemistry","Clinical_Medicine","Computer_Science","Design","Diagnostics_and_Laboratory_Medicine","Economics","Electronics","Energy_and_Power","Finance","Geography","History","Literature","Manage","Marketing","Materials","Math","Mechanical_Engineering","Music","Pharmacy","Physics","Psychology","Public_Health","Sociology","Art & Design","Business","Health & Medicine","Humanities & Social Science","Science","Tech & Engineering" +"dev","0.28","0.2","0.2","0.0","0.2","0.6","0.4","0.4","0.4","0.2","0.2","0.0","0.4","0.2","0.0","0.6","0.0","0.4","0.6","0.4","0.0","0.6","0.6","0.4","0.0","0.0","0.4","0.2","0.4","0.0","0.4","0.2","0.2","0.28","0.45","0.36","0.22857142857142856" +"validation","0.3111111111111111","0.3333333333333333","0.36666666666666664","0.3","0.3333333333333333","0.23333333333333334","0.3","0.3","0.2","0.36666666666666664","0.26666666666666666","0.4","0.3","0.2","0.4","0.3","0.23333333333333334","0.2","0.3333333333333333","0.7333333333333333","0.3333333333333333","0.16666666666666666","0.1","0.3333333333333333","0.26666666666666666","0.26666666666666666","0.4666666666666667","0.2","0.3","0.43333333333333335","0.36666666666666664","0.30833333333333335","0.25333333333333335","0.37333333333333335","0.43333333333333335","0.24666666666666667","0.2857142857142857" diff --git a/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_openai_result.pkl b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f25f8414afc83df65dfc8a1fb13db6046328c4e1 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c1022623e88946e924b94d83d359562f10e02ca4914523a49657bd9f21c723a +size 64759 diff --git a/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_openai_result.xlsx b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a69a08502f6ee5945898f4d50b20f510560e6ac6 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_MMMU_DEV_VAL_openai_result.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_POPE.xlsx b/llava_phi_c06/llava_phi_c06_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5ee6836089108d9d30fadb9d142fef6d1ff9b3a7 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_POPE.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_POPE_auxmatch.xlsx b/llava_phi_c06/llava_phi_c06_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f6d1b087595ba9f5c9cdb706cfd5c60080a5b4bb Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_POPE_auxmatch.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_POPE_score.csv b/llava_phi_c06/llava_phi_c06_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..b6d4267b423f3ca0412a9f31e639cdcc70677c5c --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","78.98565366740755","76.88888888888889","72.4157095220452","86.86666666666667" +"adversarial","74.54233409610985","70.33333333333334","65.28056112224448","86.86666666666667" +"popular","79.52395483674091","77.63333333333333","73.32583005064716","86.86666666666667" +"random","83.39200000000001","82.69999999999999","80.18461538461538","86.86666666666667" diff --git a/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL.xlsx b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..935ddca84f68ce56d71c9bd513ff122724d90f12 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_acc.csv b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..64efe559adc26ded53da7f12ba8847d09c5a0dd1 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5010033444816053","0.5212765957446809","0.5771428571428572","0.5299145299145299","0.4714285714285714","0.36666666666666664","0.6339285714285714","0.52","0.7333333333333333","0.30113636363636365","0.4827586206896552","0.42528735632183906","0.6" diff --git a/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_openai_result.pkl b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1e23355159a518156a0ccfac29297b7cb5e711c8 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef69f800db143320d1e2fa5365cf79f97b3665b2e80e2c5cb7a2192bd0e9bcec +size 78596 diff --git a/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_openai_result.xlsx b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bb09da1432c7feac6e16cb9beba6b23311805418 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_RealWorldQA.xlsx b/llava_phi_c06/llava_phi_c06_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..15d48f60a1fb3df689fcc629f1299582fd205a08 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_RealWorldQA.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_RealWorldQA_acc.csv b/llava_phi_c06/llava_phi_c06_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..5649801e06c75da9ebcadc195feb02baf39a1e9b --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4261437908496732" diff --git a/llava_phi_c06/llava_phi_c06_RealWorldQA_openai_result.pkl b/llava_phi_c06/llava_phi_c06_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e55fc84bfd92dcdda9c9ff76d391cb2b3931cde4 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428aa9db3f41488e8bf388b0b000cefcab30adf7aa88e283ffb6c5c42bb5fb1b +size 41411 diff --git a/llava_phi_c06/llava_phi_c06_RealWorldQA_openai_result.xlsx b/llava_phi_c06/llava_phi_c06_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..de162981bfd7486c9274efca092968e81bc64dac Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_RealWorldQA_openai_result.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_SEEDBench_IMG.xlsx b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5a813b1e9c4bfed25e3614aef5320ba4977731a1 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_acc.csv b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..1b06e6eaa42f3a1a684c2e5465e8ff7b383a806d --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5112422709387296","0.4579479457947946","0.5674494811578372","0.6082474226804123","0.43047034764826175","0.40988966080915407","0.662444585180494","0.3835616438356164","0.6428571428571429","0.6858006042296072" diff --git a/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_openai_result.pkl b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1dfaec750e7eccdc0b456f8c50aa59bdc35e9f92 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf694b51fa7a5022bf8b533ab8b5fe0e6c9cb8be1144b61d41154a7fcfa0e76 +size 768834 diff --git a/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_openai_result.xlsx b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5bf3b4fe5b42e42b7f8f9d382ff4a811ad530004 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ed0293fca5f939c104a5ae4325a1c23282140c068ace1d47d8df700f93dfe4 +size 1051504 diff --git a/llava_phi_c06/llava_phi_c06_ScienceQA_VAL.xlsx b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ebad3adaadc64a32720ca2c84cbe1e39ededb225 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_acc.csv b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..94f34c97d2f70cdf7ba3fcd9d1f96c04599ef2fc --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6337625178826896","0.875","1.0","1.0","0.0","1.0","0.6666666666666666","0.5869565217391305","0.6190476190476191","0.703125","1.0","0.25","0.8452380952380952","0.6382978723404256","1.0","0.5238095238095238","0.6","1.0","1.0","0.4","1.0","1.0","0.75","0.64","0.8059701492537313","0.9056603773584906","0.4358974358974359","0.32","0.5294117647058824","0.1276595744680851","0.4084507042253521","1.0","1.0","0.8","0.40601503759398494","0.25806451612903225","0.8189655172413793","0.0","0.5","0.3404255319148936","0.5","0.0","0.5147058823529411","0.3888888888888889","0.3488372093023256","0.5","0.8","1.0","1.0","0.926829268292683","0.5","0.47619047619047616","0.5223880597014925","0.8647798742138365","0.6666666666666666","0.7631578947368421","1.0","0.2","1.0","0.0","1.0","0.3333333333333333","0.3333333333333333","0.8888888888888888","1.0","0.515625","1.0" diff --git a/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_openai_result.pkl b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..87c36d20bd0e10438f16663e3c8e4649e9e26ea1 --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2432634a9197c2e74801780ad1b53ca32a744e60611977d420f10954ad6493e5 +size 113366 diff --git a/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f90e53410bc1de030f3ec34fb52d6441ee082870 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_TextVQA_VAL.xlsx b/llava_phi_c06/llava_phi_c06_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b5935bd3604275193dd8dd86448b688a9f043906 Binary files /dev/null and b/llava_phi_c06/llava_phi_c06_TextVQA_VAL.xlsx differ diff --git a/llava_phi_c06/llava_phi_c06_TextVQA_VAL_acc.csv b/llava_phi_c06/llava_phi_c06_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..e10b582d46bc323442cddc0f8a2dcac0e0875d1a --- /dev/null +++ b/llava_phi_c06/llava_phi_c06_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"15.441999999999997" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_AI2D_TEST_acc-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_AI2D_TEST_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..3501c5bc9f410a77782f235743e6c9d3fbfb7eba --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_AI2D_TEST_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5297927461139896","0.25","0.5483870967741935","0.6071428571428571","0.6256454388984509","0.4052757793764988","0.4223826714801444","0.5503080082135524","0.4807692307692308","0.4810126582278481","0.4626865671641791","0.4634146341463415","0.6944444444444444","0.44314868804664725","0.4375","0.5" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_HallusionBench_score-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..c863410358e16a3295ef1cc13766d7a7c8a16df2 --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","44.58464773922187","12.138728323699421","9.670329670329672" +"VD","47.71573604060914","14.347826086956522","10.830324909747292" +"VS","39.44444444444444","7.758620689655173","7.865168539325842" +"VD_figure","60.0","26.82926829268293","23.076923076923077" +"VS_chart","33.07692307692307","5.0","13.157894736842104" +"VD_ocr","52.80898876404494","6.976744186046512","9.30232558139535" +"VS_ocr","50.0","26.923076923076923","7.4074074074074066" +"VD_math","43.51851851851852","8.333333333333332","16.666666666666664" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_map","45.3125","0.0","3.125" +"VD_video","47.05882352941176","6.25","11.594202898550725" +"VD_illusion","41.66666666666667","20.967741935483872","0.0" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_MME_score-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..7981796f465cfff61c171c3367b0375cfa4f326f --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"944.8881552621048","242.5","70.0","98.75","83.8235294117647","50.0","75.0","85.0","113.33333333333331","155.0","91.25","57.5","66.66666666666666","61.564625850340136","129.5","50.0" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_POPE_score-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..07bca557ef284b734009bba792db4b670f68c486 --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","79.26169308078855","76.15555555555555","70.12653898768811","91.13333333333333" +"popular","80.15244796247434","77.43333333333334","71.53322867608581","91.13333333333333" +"random","84.04549646480173","82.69999999999999","77.98060467769538","91.13333333333333" +"adversarial","74.2128121606949","68.33333333333333","62.591575091575095","91.13333333333333" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_Q-Bench1_VAL_acc-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_Q-Bench1_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..47443005d79c7f449525bc996514396a417fa102 --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_Q-Bench1_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.559866220735786","0.574468085106383","0.6571428571428571","0.6495726495726496","0.7571428571428571","0.36666666666666664","0.6160714285714286","0.54","0.7222222222222222","0.42045454545454547","0.5724137931034483","0.47126436781609193","0.5176470588235295" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_RealWorldQA_acc-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..06bf616b5c03927d05eb425da8f2041641f78773 --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.47843137254901963" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_SEEDBench_IMG_acc-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..cbc6b8e849fa30da9a33f9b8487b326fd294d19e --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.510539629005059","0.4409550440955044","0.56799563080284","0.5670103092783505","0.47648261758691207","0.42296689824274625","0.6640278657378087","0.3774733637747336","0.44047619047619047","0.7190332326283988" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_ScienceQA_VAL_acc-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..764ababe3b8397a2f844fc0f1353c1f62bb640ef --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6289938006676205","0.875","1.0","1.0","1.0","1.0","0.6666666666666666","0.6086956521739131","0.5714285714285714","0.640625","1.0","0.375","0.9642857142857143","0.8297872340425532","1.0","0.47619047619047616","0.4","1.0","0.8709677419354839","0.4","0.0","0.5","0.75","0.56","0.7611940298507462","0.9245283018867925","0.358974358974359","0.48","0.6470588235294118","0.19148936170212766","0.38028169014084506","1.0","1.0","1.0","0.38345864661654133","0.3709677419354839","0.8275862068965517","0.5","0.5","0.2765957446808511","0.8","1.0","0.4117647058823529","0.3888888888888889","0.5581395348837209","0.7","1.0","1.0","1.0","0.975609756097561","0.5","0.6904761904761905","0.5522388059701493","0.7767295597484277","0.3333333333333333","0.7105263157894737","0.6666666666666666","0.2","1.0","0.0","1.0","0.6666666666666666","0.0784313725490196","0.7777777777777778","1.0","0.5","1.0" diff --git a/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_TextVQA_VAL_acc-checkpoint.csv b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..a48dafc79db9894b45b0b817f01d2bfa6f1ea474 --- /dev/null +++ b/llava_phi_c09/.ipynb_checkpoints/llava_phi_c09_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"17.351999999999997" diff --git a/llava_phi_c09/01_MMMU_DEV_VAL.pkl b/llava_phi_c09/01_MMMU_DEV_VAL.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4c978613106fd21ace3f8902cfa66bbdbf34e3c6 --- /dev/null +++ b/llava_phi_c09/01_MMMU_DEV_VAL.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d381eb61debced43b9247379d553107b659ab30c38ec66311973c7d54e1cec +size 2190 diff --git a/llava_phi_c09/llava_phi_c09_AI2D_TEST.xlsx b/llava_phi_c09/llava_phi_c09_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..233ae855413a68e33a046f68e00df13c10cc0e33 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_AI2D_TEST.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_AI2D_TEST_acc.csv b/llava_phi_c09/llava_phi_c09_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..3501c5bc9f410a77782f235743e6c9d3fbfb7eba --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5297927461139896","0.25","0.5483870967741935","0.6071428571428571","0.6256454388984509","0.4052757793764988","0.4223826714801444","0.5503080082135524","0.4807692307692308","0.4810126582278481","0.4626865671641791","0.4634146341463415","0.6944444444444444","0.44314868804664725","0.4375","0.5" diff --git a/llava_phi_c09/llava_phi_c09_AI2D_TEST_openai_result.pkl b/llava_phi_c09/llava_phi_c09_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2a06ffbd3416ded71baac2b050b3c587114e8981 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b5ebcb00e5780b7b82061f9f2165ae44cc1ad2617fe4944883d31f577187bd +size 166877 diff --git a/llava_phi_c09/llava_phi_c09_AI2D_TEST_openai_result.xlsx b/llava_phi_c09/llava_phi_c09_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6f834b72254631f8d3fd6c8a03071477a8cf10b8 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_HallusionBench.xlsx b/llava_phi_c09/llava_phi_c09_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..8b59a48887b3915e1e86ea268e918daa8bcf90b8 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_HallusionBench.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_HallusionBench_auxmatch.xlsx b/llava_phi_c09/llava_phi_c09_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f6fc56411a4f826bbce748301e53ed816428c6e4 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_HallusionBench_auxmatch.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_HallusionBench_score.csv b/llava_phi_c09/llava_phi_c09_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..c863410358e16a3295ef1cc13766d7a7c8a16df2 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","44.58464773922187","12.138728323699421","9.670329670329672" +"VD","47.71573604060914","14.347826086956522","10.830324909747292" +"VS","39.44444444444444","7.758620689655173","7.865168539325842" +"VD_figure","60.0","26.82926829268293","23.076923076923077" +"VS_chart","33.07692307692307","5.0","13.157894736842104" +"VD_ocr","52.80898876404494","6.976744186046512","9.30232558139535" +"VS_ocr","50.0","26.923076923076923","7.4074074074074066" +"VD_math","43.51851851851852","8.333333333333332","16.666666666666664" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_map","45.3125","0.0","3.125" +"VD_video","47.05882352941176","6.25","11.594202898550725" +"VD_illusion","41.66666666666667","20.967741935483872","0.0" diff --git a/llava_phi_c09/llava_phi_c09_MME.xlsx b/llava_phi_c09/llava_phi_c09_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e1e1eb957d8fb9657e0f6d58081a89368d4b3d44 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_MME.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_MME_auxmatch.xlsx b/llava_phi_c09/llava_phi_c09_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d70694daaf5c88dc2b08bc9d60503937079ec13d Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_MME_auxmatch.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_MME_score.csv b/llava_phi_c09/llava_phi_c09_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..7981796f465cfff61c171c3367b0375cfa4f326f --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"944.8881552621048","242.5","70.0","98.75","83.8235294117647","50.0","75.0","85.0","113.33333333333331","155.0","91.25","57.5","66.66666666666666","61.564625850340136","129.5","50.0" diff --git a/llava_phi_c09/llava_phi_c09_POPE.xlsx b/llava_phi_c09/llava_phi_c09_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a168c28aaee55c45f5ef38e65ccac03f79fe7817 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_POPE.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_POPE_auxmatch.xlsx b/llava_phi_c09/llava_phi_c09_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3deca2738b73ad0bbf9f8d6cabae418513845f99 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_POPE_auxmatch.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_POPE_score.csv b/llava_phi_c09/llava_phi_c09_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..07bca557ef284b734009bba792db4b670f68c486 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","79.26169308078855","76.15555555555555","70.12653898768811","91.13333333333333" +"popular","80.15244796247434","77.43333333333334","71.53322867608581","91.13333333333333" +"random","84.04549646480173","82.69999999999999","77.98060467769538","91.13333333333333" +"adversarial","74.2128121606949","68.33333333333333","62.591575091575095","91.13333333333333" diff --git a/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL.xlsx b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bcce02763403cd2060ed577153a07c40d34b8ac5 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_acc.csv b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..47443005d79c7f449525bc996514396a417fa102 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.559866220735786","0.574468085106383","0.6571428571428571","0.6495726495726496","0.7571428571428571","0.36666666666666664","0.6160714285714286","0.54","0.7222222222222222","0.42045454545454547","0.5724137931034483","0.47126436781609193","0.5176470588235295" diff --git a/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_openai_result.pkl b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..89e4fd08b4975c2b05c92c9babf1646125924452 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44bb2aa961b663b26ff72d910b9d3d2267f136705ee89f75b2ec3c1b7628f09a +size 78596 diff --git a/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_openai_result.xlsx b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..b8927f7b32d882989ea51f96f1751bad5619be3b Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_RealWorldQA.xlsx b/llava_phi_c09/llava_phi_c09_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..13780f36081e01d9b040bbea3c4b56ece330a53c Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_RealWorldQA.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_RealWorldQA_acc.csv b/llava_phi_c09/llava_phi_c09_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..06bf616b5c03927d05eb425da8f2041641f78773 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.47843137254901963" diff --git a/llava_phi_c09/llava_phi_c09_RealWorldQA_openai_result.pkl b/llava_phi_c09/llava_phi_c09_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..97eaac4aea7d2de6e16ec57dd89be6d8006f1406 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a1b9cce9be37679936c775f1b436ab5ef5419ff58cba010720da5eebf020692 +size 41411 diff --git a/llava_phi_c09/llava_phi_c09_RealWorldQA_openai_result.xlsx b/llava_phi_c09/llava_phi_c09_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6e110ad5f02ba1c54263b77f3304686574da94fc Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_RealWorldQA_openai_result.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_SEEDBench_IMG.xlsx b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4eac3ce9306fbbbbde774d72dd6e98e993ab0d9c Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_acc.csv b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..cbc6b8e849fa30da9a33f9b8487b326fd294d19e --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.510539629005059","0.4409550440955044","0.56799563080284","0.5670103092783505","0.47648261758691207","0.42296689824274625","0.6640278657378087","0.3774733637747336","0.44047619047619047","0.7190332326283988" diff --git a/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_openai_result.pkl b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b864cc669c3f289e471815b8a8287ba7c2026bcb --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0f0e34fe5e454df5762adfcfbf24fe8cc12f93cc89eba23297bdb929135dd1 +size 768756 diff --git a/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_openai_result.xlsx b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..95a1d12afd79f6fde3dbc2f5dd220f5600ef6fd0 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f08b28c833fe1bff6aa7c040490d832691372fdf54c2098c0f396631c973373 +size 1051391 diff --git a/llava_phi_c09/llava_phi_c09_ScienceQA_VAL.xlsx b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f4a415c657d42ab00442876a3060cb9c60cb8f46 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_acc.csv b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..764ababe3b8397a2f844fc0f1353c1f62bb640ef --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6289938006676205","0.875","1.0","1.0","1.0","1.0","0.6666666666666666","0.6086956521739131","0.5714285714285714","0.640625","1.0","0.375","0.9642857142857143","0.8297872340425532","1.0","0.47619047619047616","0.4","1.0","0.8709677419354839","0.4","0.0","0.5","0.75","0.56","0.7611940298507462","0.9245283018867925","0.358974358974359","0.48","0.6470588235294118","0.19148936170212766","0.38028169014084506","1.0","1.0","1.0","0.38345864661654133","0.3709677419354839","0.8275862068965517","0.5","0.5","0.2765957446808511","0.8","1.0","0.4117647058823529","0.3888888888888889","0.5581395348837209","0.7","1.0","1.0","1.0","0.975609756097561","0.5","0.6904761904761905","0.5522388059701493","0.7767295597484277","0.3333333333333333","0.7105263157894737","0.6666666666666666","0.2","1.0","0.0","1.0","0.6666666666666666","0.0784313725490196","0.7777777777777778","1.0","0.5","1.0" diff --git a/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_openai_result.pkl b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5a4d00fc1a253955bd6a172430b62c54da4ddc8d --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:382b33c626d21e13ef8294a39b0a003094b52e4a3048b6c27171a8f54dc915c0 +size 113352 diff --git a/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..6750e74c087a5f1c3540d16eb7da25c00a923cd3 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_TextVQA_VAL.xlsx b/llava_phi_c09/llava_phi_c09_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..17aadb393ebf3fce9a30b52771d72b84385b6c58 Binary files /dev/null and b/llava_phi_c09/llava_phi_c09_TextVQA_VAL.xlsx differ diff --git a/llava_phi_c09/llava_phi_c09_TextVQA_VAL_acc.csv b/llava_phi_c09/llava_phi_c09_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..a48dafc79db9894b45b0b817f01d2bfa6f1ea474 --- /dev/null +++ b/llava_phi_c09/llava_phi_c09_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"17.351999999999997" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_AI2D_TEST_acc-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_AI2D_TEST_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..a51740dd507fa75c7ff6af7273507f26d047d04e --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_AI2D_TEST_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5301165803108808","0.625","0.6129032258064516","0.5357142857142857","0.6135972461273667","0.41007194244604317","0.4187725631768953","0.5667351129363449","0.4807692307692308","0.46835443037974683","0.417910447761194","0.4634146341463415","0.6944444444444444","0.4606413994169096","0.5625","0.4772727272727273" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_HallusionBench_score-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_HallusionBench_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..682f4ab29d12e2d84001e41f46fef89251956a63 --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_HallusionBench_score-checkpoint.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","44.79495268138801","11.271676300578035","9.89010989010989" +"VS","39.166666666666664","7.758620689655173","7.865168539325842" +"VD","48.223350253807105","13.043478260869565","11.191335740072201" +"VS_chart","32.30769230769231","5.0","13.157894736842104" +"VD_illusion","47.91666666666667","20.967741935483872","6.944444444444445" +"VD_math","46.2962962962963","8.333333333333332","20.37037037037037" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_ocr","50.0","26.923076923076923","7.4074074074074066" +"VD_figure","55.00000000000001","19.51219512195122","17.94871794871795" +"VD_ocr","53.93258426966292","9.30232558139535","9.30232558139535" +"VD_video","43.529411764705884","4.166666666666666","5.797101449275362" +"VS_map","45.3125","0.0","3.125" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_MME_score-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_MME_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..013c545ea217a9bd8227d3021cb09b93c8e1df66 --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_MME_score-checkpoint.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1126.6691676670666","261.7857142857143","72.5","112.25","127.64705882352942","67.5","71.66666666666667","94.28571428571428","130.0","190.0","135.5","50.0","46.666666666666664","96.9387755102041","143.5","50.0" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_POPE_score-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_POPE_score-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..73cba5fe43f0b68a8d6c0ebff49c4a6a7bad3d1f --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_POPE_score-checkpoint.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","81.57667386609072","81.04444444444444","79.34873949579831","83.93333333333334" +"popular","82.55737704918033","82.26666666666667","81.2258064516129","83.93333333333334" +"adversarial","77.21557804354492","75.23333333333333","71.49346961953435","83.93333333333334" +"random","85.38487622923024","85.63333333333333","86.88750862663906","83.93333333333334" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_Q-Bench1_VAL_acc-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_Q-Bench1_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..d827411ba4d95dbcec00dd19e536aaafa5feaa8a --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_Q-Bench1_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5391304347826087","0.5957446808510638","0.5942857142857143","0.6239316239316239","0.8","0.3933333333333333","0.6428571428571429","0.48","0.6444444444444445","0.375","0.5103448275862069","0.4367816091954023","0.5411764705882353" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_RealWorldQA_acc-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_RealWorldQA_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..f665bf9f37569c9495e4ca542d4839f4dacbcd14 --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_RealWorldQA_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4339869281045752" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_SEEDBench_IMG_acc-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_SEEDBench_IMG_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..37279619f103703638cfd6bbd88ca4b5c0fefc66 --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_SEEDBench_IMG_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5406127037661608","0.496235749623575","0.576187875477881","0.5979381443298969","0.4601226993865031","0.46097261953412344","0.6728942368587714","0.4216133942161339","0.7023809523809523","0.7099697885196374" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_ScienceQA_VAL_acc-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_ScienceQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..72922de50de5034731448ba89bed578d7c2f4834 --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_ScienceQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6547448736289938","0.8854166666666666","1.0","1.0","1.0","1.0","1.0","0.6304347826086957","0.7142857142857143","0.5","1.0","0.125","0.9166666666666666","0.8085106382978723","1.0","0.5476190476190477","0.6","1.0","1.0","0.6","1.0","0.5","0.75","0.6","0.8059701492537313","0.9056603773584906","0.38461538461538464","0.48","0.5882352941176471","0.1276595744680851","0.352112676056338","1.0","1.0","1.0","0.43609022556390975","0.3064516129032258","0.8189655172413793","0.5","0.5","0.3617021276595745","0.8","0.5","0.38235294117647056","0.3888888888888889","0.5813953488372093","0.7","1.0","1.0","1.0","0.8292682926829268","0.5","0.7142857142857143","0.582089552238806","0.8553459119496856","0.0","0.868421052631579","0.6666666666666666","0.23333333333333334","1.0","0.0","1.0","0.4444444444444444","0.35294117647058826","1.0","1.0","0.578125","1.0" diff --git a/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_TextVQA_VAL_acc-checkpoint.csv b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_TextVQA_VAL_acc-checkpoint.csv new file mode 100644 index 0000000000000000000000000000000000000000..4082d743dd877414bf7fd7c485fb4f421239e571 --- /dev/null +++ b/llava_phi_c10/.ipynb_checkpoints/llava_phi_c10_TextVQA_VAL_acc-checkpoint.csv @@ -0,0 +1,2 @@ +"Overall" +"18.488000000000003" diff --git a/llava_phi_c10/llava_phi_c10_AI2D_TEST.xlsx b/llava_phi_c10/llava_phi_c10_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..3c616ff39d102fbdc19b2604c044734f5eb9b9f9 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_AI2D_TEST.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_AI2D_TEST_acc.csv b/llava_phi_c10/llava_phi_c10_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..a51740dd507fa75c7ff6af7273507f26d047d04e --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.5301165803108808","0.625","0.6129032258064516","0.5357142857142857","0.6135972461273667","0.41007194244604317","0.4187725631768953","0.5667351129363449","0.4807692307692308","0.46835443037974683","0.417910447761194","0.4634146341463415","0.6944444444444444","0.4606413994169096","0.5625","0.4772727272727273" diff --git a/llava_phi_c10/llava_phi_c10_AI2D_TEST_openai_result.pkl b/llava_phi_c10/llava_phi_c10_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d493b382b19bdb3aaa8baa379e8b6bccf316524e --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d98132ea731a1f9e6fafde05aa036f7dec2929ed6acfa3ea2804e2c1aa99ed5 +size 166877 diff --git a/llava_phi_c10/llava_phi_c10_AI2D_TEST_openai_result.xlsx b/llava_phi_c10/llava_phi_c10_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..97020ef7d64d97cb92fae10c0a9880ce43d47e05 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_AI2D_TEST_openai_result.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_HallusionBench.xlsx b/llava_phi_c10/llava_phi_c10_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..eea3efab3ac5f44617877174b164bafe21ce2d71 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_HallusionBench.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_HallusionBench_auxmatch.xlsx b/llava_phi_c10/llava_phi_c10_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..32bbc83d70745a8be5ddbc628c121f6fbb31f77a Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_HallusionBench_auxmatch.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_HallusionBench_score.csv b/llava_phi_c10/llava_phi_c10_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..682f4ab29d12e2d84001e41f46fef89251956a63 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","44.79495268138801","11.271676300578035","9.89010989010989" +"VS","39.166666666666664","7.758620689655173","7.865168539325842" +"VD","48.223350253807105","13.043478260869565","11.191335740072201" +"VS_chart","32.30769230769231","5.0","13.157894736842104" +"VD_illusion","47.91666666666667","20.967741935483872","6.944444444444445" +"VD_math","46.2962962962963","8.333333333333332","20.37037037037037" +"VS_table","38.392857142857146","0.0","2.3255813953488373" +"VS_ocr","50.0","26.923076923076923","7.4074074074074066" +"VD_figure","55.00000000000001","19.51219512195122","17.94871794871795" +"VD_ocr","53.93258426966292","9.30232558139535","9.30232558139535" +"VD_video","43.529411764705884","4.166666666666666","5.797101449275362" +"VS_map","45.3125","0.0","3.125" diff --git a/llava_phi_c10/llava_phi_c10_MME.xlsx b/llava_phi_c10/llava_phi_c10_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..424f8adfc7ab106af7ec42d0ac6e51bc852edb54 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_MME.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_MME_auxmatch.xlsx b/llava_phi_c10/llava_phi_c10_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9df989f75698cea93a62999e33bd0ce706b2464d Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_MME_auxmatch.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_MME_score.csv b/llava_phi_c10/llava_phi_c10_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..013c545ea217a9bd8227d3021cb09b93c8e1df66 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"1126.6691676670666","261.7857142857143","72.5","112.25","127.64705882352942","67.5","71.66666666666667","94.28571428571428","130.0","190.0","135.5","50.0","46.666666666666664","96.9387755102041","143.5","50.0" diff --git a/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL.xlsx b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..394e104ed54b6e5fbbb64657383f38cadcd90447 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_acc.csv b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..c5ceb2ccce06400513164ca6551d42268003841a --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_acc.csv @@ -0,0 +1,3 @@ +"split","Overall","Accounting","Agriculture","Architecture_and_Engineering","Art","Art_Theory","Basic_Medical_Science","Biology","Chemistry","Clinical_Medicine","Computer_Science","Design","Diagnostics_and_Laboratory_Medicine","Economics","Electronics","Energy_and_Power","Finance","Geography","History","Literature","Manage","Marketing","Materials","Math","Mechanical_Engineering","Music","Pharmacy","Physics","Psychology","Public_Health","Sociology","Art & Design","Business","Health & Medicine","Humanities & Social Science","Science","Tech & Engineering" +"validation","0.3288888888888889","0.26666666666666666","0.36666666666666664","0.3333333333333333","0.43333333333333335","0.36666666666666664","0.3333333333333333","0.36666666666666664","0.2","0.26666666666666666","0.3","0.4666666666666667","0.2","0.43333333333333335","0.26666666666666666","0.3333333333333333","0.23333333333333334","0.3333333333333333","0.3333333333333333","0.7666666666666667","0.26666666666666666","0.2","0.36666666666666664","0.4","0.23333333333333334","0.23333333333333334","0.3","0.16666666666666666","0.3333333333333333","0.43333333333333335","0.3333333333333333","0.375","0.28","0.30666666666666664","0.44166666666666665","0.29333333333333333","0.3142857142857143" +"dev","0.36666666666666664","0.4","0.2","0.0","0.0","0.4","0.4","0.6","0.8","0.2","0.4","0.2","0.4","0.6","0.2","0.6","0.2","0.0","0.8","0.6","0.2","0.2","0.6","0.2","0.2","0.4","0.2","0.4","0.6","0.2","0.8","0.25","0.32","0.28","0.7","0.4","0.3142857142857143" diff --git a/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_openai_result.pkl b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..da15d6bea8370b19dc46325ec12d3fc3bad57282 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7686a5f311a830d0f239150c19cadb7e83b7946d0005fc7412da1abb943db5 +size 66901 diff --git a/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_openai_result.xlsx b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..25ae04d4857587a596e4f3dac5b5b9d9fbf760ac Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_MMMU_DEV_VAL_openai_result.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_POPE.xlsx b/llava_phi_c10/llava_phi_c10_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..0c0e6dfbf5ebade04c6920742565b25754dc4c2f Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_POPE.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_POPE_auxmatch.xlsx b/llava_phi_c10/llava_phi_c10_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..043f395df1bdc48a52bde5e0c1c064efed6ba638 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_POPE_auxmatch.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_POPE_score.csv b/llava_phi_c10/llava_phi_c10_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..73cba5fe43f0b68a8d6c0ebff49c4a6a7bad3d1f --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","81.57667386609072","81.04444444444444","79.34873949579831","83.93333333333334" +"popular","82.55737704918033","82.26666666666667","81.2258064516129","83.93333333333334" +"adversarial","77.21557804354492","75.23333333333333","71.49346961953435","83.93333333333334" +"random","85.38487622923024","85.63333333333333","86.88750862663906","83.93333333333334" diff --git a/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL.xlsx b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c22e9b1c9e72fdeb01562d8ecb13b3d5c077bd0c Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_acc.csv b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..d827411ba4d95dbcec00dd19e536aaafa5feaa8a --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.5391304347826087","0.5957446808510638","0.5942857142857143","0.6239316239316239","0.8","0.3933333333333333","0.6428571428571429","0.48","0.6444444444444445","0.375","0.5103448275862069","0.4367816091954023","0.5411764705882353" diff --git a/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_openai_result.pkl b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0a7694f2419ba09d9c5eb6b4ff84a235beda97f3 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a6da5203957b5a464e79b3276f1d3d671a7ff15d3bd2ca035ae3b662b43507 +size 78596 diff --git a/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_openai_result.xlsx b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9bb9c0ed70ce1ebe6aa2225c3fe60fd6efb28d58 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_RealWorldQA.xlsx b/llava_phi_c10/llava_phi_c10_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..28f358a6d9dfcd1232e9f5490e9136f6c1a96261 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_RealWorldQA.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_RealWorldQA_acc.csv b/llava_phi_c10/llava_phi_c10_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..f665bf9f37569c9495e4ca542d4839f4dacbcd14 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.4339869281045752" diff --git a/llava_phi_c10/llava_phi_c10_RealWorldQA_openai_result.pkl b/llava_phi_c10/llava_phi_c10_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3357a78fde9bfc085647e422b522c03489f95b7c --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2bb64d3f8bd9ada2fb05a5ba51f699127fd48f5708d154c5ffe910820b4df37 +size 41411 diff --git a/llava_phi_c10/llava_phi_c10_RealWorldQA_openai_result.xlsx b/llava_phi_c10/llava_phi_c10_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bded89100c682b8454d3d44e5258be5781534363 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_RealWorldQA_openai_result.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_SEEDBench_IMG.xlsx b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..681b8ae52be115a6e8a3885552d2452af598f2ce Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_acc.csv b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..37279619f103703638cfd6bbd88ca4b5c0fefc66 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.5406127037661608","0.496235749623575","0.576187875477881","0.5979381443298969","0.4601226993865031","0.46097261953412344","0.6728942368587714","0.4216133942161339","0.7023809523809523","0.7099697885196374" diff --git a/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_openai_result.pkl b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1bcac99a06510684bd3addc45645de3bfd8d2aaa --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31159902821a93b93e40a3dbfac3a118f46242b62bc86e44e405a6cbd8c0bb3b +size 768912 diff --git a/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_openai_result.xlsx b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ca4fb3f8e16eef01f5d8c87c8b58e611b5179ef9 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb42dc3af8ad8b99f38dc033e3138f4f6c602866774e134fbe8026c5a96a510 +size 1051467 diff --git a/llava_phi_c10/llava_phi_c10_ScienceQA_VAL.xlsx b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..548d2d74e4f3523955d50e8645458428dae1a62b Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_acc.csv b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..72922de50de5034731448ba89bed578d7c2f4834 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.6547448736289938","0.8854166666666666","1.0","1.0","1.0","1.0","1.0","0.6304347826086957","0.7142857142857143","0.5","1.0","0.125","0.9166666666666666","0.8085106382978723","1.0","0.5476190476190477","0.6","1.0","1.0","0.6","1.0","0.5","0.75","0.6","0.8059701492537313","0.9056603773584906","0.38461538461538464","0.48","0.5882352941176471","0.1276595744680851","0.352112676056338","1.0","1.0","1.0","0.43609022556390975","0.3064516129032258","0.8189655172413793","0.5","0.5","0.3617021276595745","0.8","0.5","0.38235294117647056","0.3888888888888889","0.5813953488372093","0.7","1.0","1.0","1.0","0.8292682926829268","0.5","0.7142857142857143","0.582089552238806","0.8553459119496856","0.0","0.868421052631579","0.6666666666666666","0.23333333333333334","1.0","0.0","1.0","0.4444444444444444","0.35294117647058826","1.0","1.0","0.578125","1.0" diff --git a/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_openai_result.pkl b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..44c4bc636fcdd07b6b814fa03fba836fe3efb733 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f369a530aa4fd76c440f705682c16ce28f86bb44febb3f8c3932b70001be8e +size 113352 diff --git a/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_openai_result.xlsx b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..af3d98aa9c0ba21791882f6f06d75d39faa8fed5 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_ScienceQA_VAL_openai_result.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_TextVQA_VAL.xlsx b/llava_phi_c10/llava_phi_c10_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..5e0f3da61d041fbab65a5c3c55c1c8d79bf70507 Binary files /dev/null and b/llava_phi_c10/llava_phi_c10_TextVQA_VAL.xlsx differ diff --git a/llava_phi_c10/llava_phi_c10_TextVQA_VAL_acc.csv b/llava_phi_c10/llava_phi_c10_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..4082d743dd877414bf7fd7c485fb4f421239e571 --- /dev/null +++ b/llava_phi_c10/llava_phi_c10_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"18.488000000000003" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a85b662a5f62b9bd7411104b4a0ee5138a8d5132 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_PREV.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..84bcf31b5def20455bd24c910ee10ad48fdf94d8 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f29edf06ab5021199ba0d7663c6ddbf6595161b7f1e30e9555de0b983aefcd7 +size 17094 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..c82c76720b3e808a7fee7f2146c75f7a6d240ec2 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","ALL" +"val","0.4890829694323144","0.4890829694323144" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_openai_result.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..343c42d82529992f9da268db0c0d4a32806e4e02 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32e632861958c34dd3bc4108aaa56a198d11caa79a0f8cec5f87428d6e0cd34 +size 96304 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_openai_result.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..881d1b8f25a1697d9efe3a0476b71e12567ed31b Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_A-OKVQA_openai_result.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2a8d1d8e1716d01cfb8d5ebe8d26f40f83f85c0a Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..78298c0a4fcbbce1130d3b0847b18f441c80c416 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","atomStructure","eclipses","faultsEarthquakes","foodChainsWebs","lifeCycles","moonPhaseEquinox","partsOfA","partsOfTheEarth","photosynthesisRespiration","rockCycle","rockStrata","solarSystem","typesOf","volcano","waterCNPCycle" +"none","0.16321243523316062","0.0","0.1935483870967742","0.2857142857142857","0.14371772805507746","0.2038369304556355","0.20577617328519857","0.16427104722792607","0.25","0.16455696202531644","0.29850746268656714","0.0975609756097561","0.08333333333333333","0.11370262390670553","0.125","0.1590909090909091" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_openai_result.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0268194e912248dadd605f4351da2cc87c14830c --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a5e0df374a992a8e0656831b6b5b562dbbaf09b15ce3ebe75087d5ec259697 +size 310321 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_openai_result.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..efa7933caeb45470f94e1f6bf3ce0d26e49dff5c Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_AI2D_TEST_openai_result.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..19129e17187db3f4c2baa67c273b14121de55173 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench_auxmatch.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a9285c855e67ce875476976ffea06ba76f673101 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench_auxmatch.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench_score.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..0c759b43cddacbc031305a4292f1ce00974a4818 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_HallusionBench_score.csv @@ -0,0 +1,13 @@ +"split","aAcc","fAcc","qAcc" +"Overall","33.4384858044164","14.739884393063585","9.670329670329672" +"VD","38.74788494077834","18.695652173913043","13.718411552346572" +"VS","24.72222222222222","6.896551724137931","3.3707865168539324" +"VS_table","21.428571428571427","0.0","0.0" +"VS_ocr","35.18518518518518","26.923076923076923","11.11111111111111" +"VS_chart","16.153846153846153","0.0","3.9473684210526314" +"VS_map","39.0625","4.545454545454546","0.0" +"VD_ocr","52.80898876404494","25.581395348837212","23.25581395348837" +"VD_illusion","54.166666666666664","25.806451612903224","16.666666666666664" +"VD_math","32.407407407407405","8.333333333333332","14.814814814814813" +"VD_figure","57.49999999999999","29.268292682926827","17.94871794871795" +"VD_video","13.529411764705882","2.083333333333333","1.4492753623188406" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MME.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..d16658fa083a7cc6f17e0946ddeb6fc21d73141a Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_PREV.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ef719497af4f2c15eb1342a1bd463f3adc579bd9 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a892eb3b2911dd4b91c9b6c377bc5dbf6f274bca084d8e7e454eebfdfab66731 +size 20080 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_auxmatch.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c177a513c8c62c57ae2dcc8519da09a497079e85 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_auxmatch.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_score.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..a958a70997c037f68ceb20b39e48d358a70b82b4 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_MME_score.csv @@ -0,0 +1,2 @@ +"perception","reasoning","OCR","artwork","celebrity","code_reasoning","color","commonsense_reasoning","count","existence","landmark","numerical_calculation","position","posters","scene","text_translation" +"952.0464185674269","114.64285714285715","95.0","70.0","33.52941176470588","30.0","110.0","82.14285714285715","99.99999999999999","170.0","74.5","2.5","43.333333333333336","109.18367346938776","146.5","0.0" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..61afe222be56d45268aed254b549d1ef79f48ac8 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..b6609fca7c0d9424ae6f94f490c4fab5cc4786b2 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_acc.csv @@ -0,0 +1,3 @@ +"split","Overall","Accounting","Agriculture","Architecture_and_Engineering","Art","Art_Theory","Basic_Medical_Science","Biology","Chemistry","Clinical_Medicine","Computer_Science","Design","Diagnostics_and_Laboratory_Medicine","Economics","Electronics","Energy_and_Power","Finance","Geography","History","Literature","Manage","Marketing","Materials","Math","Mechanical_Engineering","Music","Pharmacy","Physics","Psychology","Public_Health","Sociology","Art & Design","Business","Health & Medicine","Humanities & Social Science","Science","Tech & Engineering" +"validation","0.15888888888888889","0.3","0.0","0.26666666666666666","0.1","0.06666666666666667","0.13333333333333333","0.13333333333333333","0.13333333333333333","0.13333333333333333","0.06666666666666667","0.23333333333333334","0.0","0.2","0.1","0.2","0.06666666666666667","0.06666666666666667","0.2","0.36666666666666664","0.16666666666666666","0.2","0.2","0.16666666666666666","0.2","0.1","0.16666666666666666","0.2","0.16666666666666666","0.3333333333333333","0.1","0.125","0.18666666666666668","0.15333333333333332","0.20833333333333334","0.14","0.14761904761904762" +"dev","0.12666666666666668","0.2","0.0","0.0","0.0","0.0","0.0","0.2","0.6","0.0","0.2","0.0","0.0","0.2","0.0","0.2","0.0","0.0","0.2","0.2","0.2","0.2","0.0","0.2","0.2","0.0","0.0","0.2","0.2","0.4","0.2","0.0","0.16","0.08","0.2","0.24","0.08571428571428572" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_openai_result.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..543371632690050262734e539dc065af8436d30f --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a76bb015a5372004996fe565e1fe4d803af744e6346e25252d8653d7071378 +size 109916 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_openai_result.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e2dc984684facf756b93a10e3a559b1b056420d6 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_MMMU_DEV_VAL_openai_result.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..a7ec259913c2383174fe953ae3cd63b4ff596819 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_PREV.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..da393677de10954ace7f3fe7d63d5f46992c09c4 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2024c6225f3334ababaa6fb82e068f979f5c3b3069018fe308c5cb0b6b55b810 +size 41630 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_auxmatch.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_auxmatch.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..599640db2ae2edf10ee10a8fada22673c52d0ef3 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_auxmatch.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_score.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_score.csv new file mode 100644 index 0000000000000000000000000000000000000000..b861aa0b5d2793b7dcd40229ada983e7bd62d4c6 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_POPE_score.csv @@ -0,0 +1,5 @@ +"split","Overall","acc","precision","recall" +"Overall","82.60516155252998","77.45555555555555","76.09509546986148","90.33333333333333" +"popular","80.96803107260233","76.43333333333334","73.36220898754738","90.33333333333333" +"adversarial","79.8938679245283","75.06666666666668","71.61733615221986","90.33333333333333" +"random","87.33483725427006","80.86666666666666","84.52900810979413","90.33333333333333" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..4ee0d413acce60772cd80f36e3f339c98ebe514b Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_PREV.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..300bf2b8c4fd29ea6921d677247d2e558db0103e --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16560cdb494f9574f5b00051ee0c60da0f56c40f164bece0c06cb8ab0eb9cd20 +size 39558 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..3b7122ad99ed9afa207368640b86c6aa28c697ab --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","type_0_concern_0","type_0_concern_1","type_0_concern_2","type_0_concern_3","type_1_concern_0","type_1_concern_1","type_1_concern_2","type_1_concern_3","type_2_concern_0","type_2_concern_1","type_2_concern_2","type_2_concern_3" +"val","0.0588628762541806","0.03723404255319149","0.06857142857142857","0.042735042735042736","0.22857142857142856","0.06","0.08928571428571429","0.03","0.17777777777777778","0.005681818181818182","0.020689655172413793","0.011494252873563218","0.058823529411764705" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_openai_result.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5eb47594da5944f7c71d889b90c3d6098ca6d4b6 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba78afde2a4d93c590dc77d61e04b3c29e9a064bba2f5985df5776cda4a83a60 +size 185135 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_openai_result.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2e3d2c4f78fba5ba8ef2091ba2aa0dfacbdb2ce8 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_Q-Bench1_VAL_openai_result.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e46aa6c43807ee1336f729c9ca5ac801934e04b5 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..c13cd1153cf6ddb7b4c1f08440245106ad33aa05 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_acc.csv @@ -0,0 +1,2 @@ +"split","Overall" +"none","0.19869281045751633" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_openai_result.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3e9700009e1a749bfcaa73cf00362f0d3607df91 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be91bb5087f2cc65d3c24f8fcf66f2ac95d0ba45c6266b78a8b128db34b523e +size 82347 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_openai_result.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..7fa5536924c7f6da5d24da170286b9b8dd004d29 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_RealWorldQA_openai_result.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e30256a5c1390ba4bcc57069c8e1d23148fd4bad --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5538442b965a4bfd5b04d1b518284874a47f8fb1ac788f585a68f17ab5c6cc1 +size 1005915 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..5e3215bd2b6c165e3fc1b6ebed01eef2d24efed6 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Instance Attributes","Instance Identity","Instance Interaction","Instance Location","Instances Counting","Scene Understanding","Spatial Relation","Text Understanding","Visual Reasoning" +"none","0.3192804946599213","0.28651322865132284","0.3244128891316221","0.24742268041237114","0.23210633946830267","0.2917858602370249","0.4430018999366688","0.1308980213089802","0.09523809523809523","0.48338368580060426" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_openai_result.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..12f23d52cc62b2dc84e860cd915e650ab10d4845 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f32e3edbe4c4c1b0effe4267323addee5a67326ab3508febfa32aa91b42586c +size 1415544 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_openai_result.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..2ed7790dab60262ddedd3aa0a1f03e0266134c77 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_SEEDBench_IMG_openai_result.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3868ab8065ec489d508ae64cf56f985563e6db45a46ff36e1814855c13c6152d +size 1144119 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..adeed25bbc309cb35e4ed014ba7e41b925df5c83 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_PREV.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..741c8edbc9c6cf4c5d96837c098b639c11cbdd3a --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5a7bd4ef699253bce077925aad286f9f9fbc02acaf40cb4add3ce287c2ffc59 +size 44837 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..b0aaea53e8e34e62f9300185e4de9a3769c51868 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"split","Overall","Adaptations","Adaptations and natural selection","Age of Exploration","Ancient Egypt and Kush","Ancient Mesopotamia","Animals","Astronomy","Atoms and molecules","Basic economic principles","Chemical reactions","Cities","Classification","Classification and scientific names","Climate change","Colonial America","Context clues","Descriptive details","Designing experiments","Domain-specific vocabulary","Early 19th century American history","Early Americas","Earth events","Ecological interactions","Ecosystems","Engineering practices","English colonies in North America","Force and motion","Fossils","Genes to traits","Geography","Government","Independent reading comprehension","Informational texts: level 1","Magnets","Maps","Materials","Medieval Asia","Natural resources and human impacts","Oceania: geography","Oceans and continents","Oceans and continents ","Particle motion and energy","Persuasive strategies","Physical Geography","Plant reproduction","Plants","Plate tectonics","Read-alone texts","Rocks and minerals","Rome and the Byzantine Empire","Scientific names","Solutions","State capitals","States","States of matter","The American Revolution","The Americas: geography","The Antebellum period","The Civil War and Reconstruction","The Silk Road","Thermal energy","Velocity, acceleration, and forces","Visual elements","Water cycle","Weather and climate","World religions" +"val","0.2641869337148307","0.020833333333333332","0.5","1.0","0.0","0.0","0.3333333333333333","0.43478260869565216","0.19047619047619047","0.328125","0.0","0.0","0.0","0.0","0.0","0.2857142857142857","0.0","0.0","0.5806451612903226","0.0","0.0","0.0","0.25","0.12","0.26865671641791045","0.8113207547169812","0.15384615384615385","0.0","0.0","0.2127659574468085","0.18309859154929578","1.0","0.0","0.0","0.3308270676691729","0.16129032258064516","0.008620689655172414","0.0","0.5","0.02127659574468085","0.3","0.0","0.4852941176470588","0.16666666666666666","0.16279069767441862","0.0","0.1","0.5","1.0","0.6097560975609756","0.5","0.023809523809523808","0.373134328358209","0.5534591194968553","0.0","0.10526315789473684","0.0","0.0","0.6666666666666666","0.0","1.0","0.2777777777777778","0.11764705882352941","0.0","0.0","0.03125","0.6666666666666666" diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_openai_result.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_openai_result.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ae3e5103d2248c6996dcd49eaa88b06c32507d31 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_openai_result.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627df3092e9e93b1948ad9c36f8cb0230a9d319ce895f4e102daddf767f66e32 +size 232436 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_openai_result.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_openai_result.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..e03876aee86633ccb46db9c3a188982581faaf06 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_ScienceQA_VAL_openai_result.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL.xlsx b/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..c9da3ffa4372e949f58e7a8794818360a7b24905 Binary files /dev/null and b/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL.xlsx differ diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL_PREV.pkl b/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL_PREV.pkl new file mode 100644 index 0000000000000000000000000000000000000000..282068f38b74d31245161f3dff31dd9233bf6cac --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL_PREV.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53bd6abd8499008e065e089dc7d61f72f13f0a555cb75de2855d55d46a7b212 +size 59816 diff --git a/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL_acc.csv b/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL_acc.csv new file mode 100644 index 0000000000000000000000000000000000000000..9538bcf1f355d066fd38a01265246786b63fca87 --- /dev/null +++ b/vicuna7b_moe_llava/vicuna7b_moe_llava_TextVQA_VAL_acc.csv @@ -0,0 +1,2 @@ +"Overall" +"10.966"