Spaces:
Sleeping
Sleeping
update
Browse files- .gitattributes +36 -0
- app.py +22 -3
- results_qwen/CodeLlama-70b-Instruct-hf.csv +3 -0
- results_qwen/CodeLlama-70b-Instruct-hf.jpg +3 -0
- results_qwen/CodeLlama-70b-Instruct-hf.pkl +3 -0
- results_qwen/CodeLlama-70b-Instruct-hf.png +3 -0
- results_qwen/Mixtral-8x7B-Instruct-v0.1.csv +3 -0
- results_qwen/Mixtral-8x7B-Instruct-v0.1.jpg +3 -0
- results_qwen/Mixtral-8x7B-Instruct-v0.1.pkl +3 -0
- results_qwen/Mixtral-8x7B-Instruct-v0.1.png +3 -0
- results_qwen/claude-3-haiku-20240307.csv +3 -0
- results_qwen/claude-3-haiku-20240307.jpg +3 -0
- results_qwen/claude-3-haiku-20240307.pkl +3 -0
- results_qwen/claude-3-haiku-20240307.png +3 -0
- results_qwen/claude-3-opus-20240229.csv +3 -0
- results_qwen/claude-3-opus-20240229.jpg +3 -0
- results_qwen/claude-3-opus-20240229.pkl +3 -0
- results_qwen/claude-3-opus-20240229.png +3 -0
- results_qwen/deepseek-llm-67b-chat.csv +3 -0
- results_qwen/deepseek-llm-67b-chat.jpg +3 -0
- results_qwen/deepseek-llm-67b-chat.pkl +3 -0
- results_qwen/deepseek-llm-67b-chat.png +3 -0
- results_qwen/gemma-7b-it.csv +3 -0
- results_qwen/gemma-7b-it.jpg +3 -0
- results_qwen/gemma-7b-it.pkl +3 -0
- results_qwen/gemma-7b-it.png +3 -0
- results_qwen/gpt-3.5-turbo-0125.csv +3 -0
- results_qwen/gpt-3.5-turbo-0125.jpg +3 -0
- results_qwen/gpt-3.5-turbo-0125.pkl +3 -0
- results_qwen/gpt-3.5-turbo-0125.png +3 -0
- results_qwen/gpt-4-0125-preview.csv +3 -0
- results_qwen/gpt-4-0125-preview.jpg +3 -0
- results_qwen/gpt-4-0125-preview.pkl +3 -0
- results_qwen/gpt-4-0125-preview.png +3 -0
- results_qwen/gpt-4-turbo-2024-04-09.csv +3 -0
- results_qwen/gpt-4-turbo-2024-04-09.jpg +3 -0
- results_qwen/gpt-4-turbo-2024-04-09.pkl +3 -0
- results_qwen/gpt-4-turbo-2024-04-09.png +3 -0
.gitattributes
CHANGED
|
@@ -195,3 +195,39 @@ results/claude-3-sonnet-20240229.jpg filter=lfs diff=lfs merge=lfs -text
|
|
| 195 |
results/claude-3-sonnet-20240229.pkl filter=lfs diff=lfs merge=lfs -text
|
| 196 |
results/deepseek-llm-67b-chat.jpg filter=lfs diff=lfs merge=lfs -text
|
| 197 |
results-vision/claude-3-opus-20240229.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
results/claude-3-sonnet-20240229.pkl filter=lfs diff=lfs merge=lfs -text
|
| 196 |
results/deepseek-llm-67b-chat.jpg filter=lfs diff=lfs merge=lfs -text
|
| 197 |
results-vision/claude-3-opus-20240229.csv filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
results_qwen/CodeLlama-70b-Instruct-hf.csv filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
results_qwen/claude-3-opus-20240229.png filter=lfs diff=lfs merge=lfs -text
|
| 200 |
+
results_qwen/gpt-4-0125-preview.png filter=lfs diff=lfs merge=lfs -text
|
| 201 |
+
results_qwen/gpt-4-turbo-2024-04-09.jpg filter=lfs diff=lfs merge=lfs -text
|
| 202 |
+
results_qwen/claude-3-haiku-20240307.csv filter=lfs diff=lfs merge=lfs -text
|
| 203 |
+
results_qwen/gemma-7b-it.pkl filter=lfs diff=lfs merge=lfs -text
|
| 204 |
+
results_qwen/gpt-4-0125-preview.csv filter=lfs diff=lfs merge=lfs -text
|
| 205 |
+
results_qwen/gpt-4-0125-preview.pkl filter=lfs diff=lfs merge=lfs -text
|
| 206 |
+
results_qwen/CodeLlama-70b-Instruct-hf.jpg filter=lfs diff=lfs merge=lfs -text
|
| 207 |
+
results_qwen/Mixtral-8x7B-Instruct-v0.1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 208 |
+
results_qwen/deepseek-llm-67b-chat.pkl filter=lfs diff=lfs merge=lfs -text
|
| 209 |
+
results_qwen/gemma-7b-it.jpg filter=lfs diff=lfs merge=lfs -text
|
| 210 |
+
results_qwen/gemma-7b-it.png filter=lfs diff=lfs merge=lfs -text
|
| 211 |
+
results_qwen/gpt-3.5-turbo-0125.pkl filter=lfs diff=lfs merge=lfs -text
|
| 212 |
+
results_qwen/Mixtral-8x7B-Instruct-v0.1.pkl filter=lfs diff=lfs merge=lfs -text
|
| 213 |
+
results_qwen/gpt-4-0125-preview.jpg filter=lfs diff=lfs merge=lfs -text
|
| 214 |
+
results_qwen/gpt-4-turbo-2024-04-09.csv filter=lfs diff=lfs merge=lfs -text
|
| 215 |
+
results_qwen/gpt-4-turbo-2024-04-09.png filter=lfs diff=lfs merge=lfs -text
|
| 216 |
+
results_qwen/Mixtral-8x7B-Instruct-v0.1.csv filter=lfs diff=lfs merge=lfs -text
|
| 217 |
+
results_qwen/Mixtral-8x7B-Instruct-v0.1.png filter=lfs diff=lfs merge=lfs -text
|
| 218 |
+
results_qwen/claude-3-haiku-20240307.pkl filter=lfs diff=lfs merge=lfs -text
|
| 219 |
+
results_qwen/claude-3-haiku-20240307.png filter=lfs diff=lfs merge=lfs -text
|
| 220 |
+
results_qwen/claude-3-opus-20240229.jpg filter=lfs diff=lfs merge=lfs -text
|
| 221 |
+
results_qwen/deepseek-llm-67b-chat.csv filter=lfs diff=lfs merge=lfs -text
|
| 222 |
+
results_qwen/CodeLlama-70b-Instruct-hf.pkl filter=lfs diff=lfs merge=lfs -text
|
| 223 |
+
results_qwen/deepseek-llm-67b-chat.jpg filter=lfs diff=lfs merge=lfs -text
|
| 224 |
+
results_qwen/gpt-3.5-turbo-0125.csv filter=lfs diff=lfs merge=lfs -text
|
| 225 |
+
results_qwen/claude-3-opus-20240229.csv filter=lfs diff=lfs merge=lfs -text
|
| 226 |
+
results_qwen/claude-3-opus-20240229.pkl filter=lfs diff=lfs merge=lfs -text
|
| 227 |
+
results_qwen/gemma-7b-it.csv filter=lfs diff=lfs merge=lfs -text
|
| 228 |
+
results_qwen/gpt-3.5-turbo-0125.jpg filter=lfs diff=lfs merge=lfs -text
|
| 229 |
+
results_qwen/gpt-3.5-turbo-0125.png filter=lfs diff=lfs merge=lfs -text
|
| 230 |
+
results_qwen/gpt-4-turbo-2024-04-09.pkl filter=lfs diff=lfs merge=lfs -text
|
| 231 |
+
results_qwen/CodeLlama-70b-Instruct-hf.png filter=lfs diff=lfs merge=lfs -text
|
| 232 |
+
results_qwen/claude-3-haiku-20240307.jpg filter=lfs diff=lfs merge=lfs -text
|
| 233 |
+
results_qwen/deepseek-llm-67b-chat.png filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -15,7 +15,8 @@ import pandas as pd
|
|
| 15 |
|
| 16 |
|
| 17 |
# Load text benchmark results
|
| 18 |
-
|
|
|
|
| 19 |
# Load vision benchmark results
|
| 20 |
vision_results = glob("results-vision/*.pkl")
|
| 21 |
# Load CoT text benchmark results
|
|
@@ -36,7 +37,7 @@ def load_data(files, model_type):
|
|
| 36 |
|
| 37 |
|
| 38 |
# Load and label all data
|
| 39 |
-
data = load_data(
|
| 40 |
vision_data = load_data(vision_results, "Vision")
|
| 41 |
cot_text_data = load_data(cot_text_results, "CoT Text Only")
|
| 42 |
# cot_vision_data = load_data(cot_vision_results, "CoT Vision")
|
|
@@ -61,7 +62,7 @@ text_only_filtered_raw_cot = None
|
|
| 61 |
|
| 62 |
|
| 63 |
# Load the csv files into a dict with keys being name of the file and values being the data
|
| 64 |
-
data = {file: pd.read_pickle(file) for file in
|
| 65 |
# Load the vision files into a dict
|
| 66 |
vision_data = {file: pd.read_pickle(file) for file in vision_results}
|
| 67 |
# Load the CoT text files into a dict
|
|
@@ -69,6 +70,8 @@ cot_text_data = {file: pd.read_pickle(file) for file in cot_text_results}
|
|
| 69 |
# Load the CoT vision files into a dict
|
| 70 |
# cot_vision_data = {file: pd.read_pickle(file) for file in cot_vision_results}
|
| 71 |
|
|
|
|
|
|
|
| 72 |
|
| 73 |
intersection_df = pd.read_pickle(
|
| 74 |
"./intersection_results/gpt-3.5-judge-by_Qwen_5times_intersection_subset_1.pkl"
|
|
@@ -124,12 +127,15 @@ def process_data(data):
|
|
| 124 |
|
| 125 |
# Process all data
|
| 126 |
text_data_for_df = process_data(data)
|
|
|
|
|
|
|
| 127 |
vision_data_for_df = process_data(vision_data)
|
| 128 |
cot_text_data_for_df = process_data(cot_text_data)
|
| 129 |
# cot_vision_data_for_df = process_data(cot_vision_data)
|
| 130 |
|
| 131 |
# Create DataFrames
|
| 132 |
accuracy_df = pd.DataFrame(text_data_for_df, columns=column_names)
|
|
|
|
| 133 |
vision_accuracy_df = pd.DataFrame(vision_data_for_df, columns=column_names)
|
| 134 |
cot_text_accuracy_df = pd.DataFrame(cot_text_data_for_df, columns=column_names)
|
| 135 |
# cot_vision_accuracy_df = pd.DataFrame(cot_vision_data_for_df, columns=column_names)
|
|
@@ -146,6 +152,7 @@ def finalize_df(df):
|
|
| 146 |
|
| 147 |
# Finalize all DataFrames
|
| 148 |
accuracy_df = finalize_df(accuracy_df)
|
|
|
|
| 149 |
vision_accuracy_df = finalize_df(vision_accuracy_df)
|
| 150 |
cot_text_accuracy_df = finalize_df(cot_text_accuracy_df)
|
| 151 |
# cot_vision_accuracy_df = finalize_df(cot_vision_accuracy_df)
|
|
@@ -156,6 +163,11 @@ def load_heatmap(evt: gr.SelectData):
|
|
| 156 |
return heatmap_image
|
| 157 |
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
def load_vision_heatmap(evt: gr.SelectData):
|
| 160 |
heatmap_image = gr.Image(f"results-vision/{evt.value}.jpg")
|
| 161 |
return heatmap_image
|
|
@@ -430,6 +442,13 @@ with gr.Blocks() as demo:
|
|
| 430 |
heatmap_image = gr.Image(label="", show_label=False)
|
| 431 |
leader_board.select(fn=load_heatmap, outputs=[heatmap_image])
|
| 432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
with gr.Tab("Vision Benchmark"):
|
| 434 |
gr.Markdown("# Vision Benchmark Leaderboard")
|
| 435 |
leader_board_vision = gr.Dataframe(
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
# Load text benchmark results
|
| 18 |
+
noncot_results = glob("results/*.pkl")
|
| 19 |
+
noncot_results_qwen = glob("results_qwen/*.pkl")
|
| 20 |
# Load vision benchmark results
|
| 21 |
vision_results = glob("results-vision/*.pkl")
|
| 22 |
# Load CoT text benchmark results
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
# Load and label all data
|
| 40 |
+
data = load_data(noncot_results, "Text Only")
|
| 41 |
vision_data = load_data(vision_results, "Vision")
|
| 42 |
cot_text_data = load_data(cot_text_results, "CoT Text Only")
|
| 43 |
# cot_vision_data = load_data(cot_vision_results, "CoT Vision")
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
# Load the csv files into a dict with keys being name of the file and values being the data
|
| 65 |
+
data = {file: pd.read_pickle(file) for file in noncot_results}
|
| 66 |
# Load the vision files into a dict
|
| 67 |
vision_data = {file: pd.read_pickle(file) for file in vision_results}
|
| 68 |
# Load the CoT text files into a dict
|
|
|
|
| 70 |
# Load the CoT vision files into a dict
|
| 71 |
# cot_vision_data = {file: pd.read_pickle(file) for file in cot_vision_results}
|
| 72 |
|
| 73 |
+
data_qwen = {file: pd.read_pickle(file) for file in noncot_results_qwen}
|
| 74 |
+
|
| 75 |
|
| 76 |
intersection_df = pd.read_pickle(
|
| 77 |
"./intersection_results/gpt-3.5-judge-by_Qwen_5times_intersection_subset_1.pkl"
|
|
|
|
| 127 |
|
| 128 |
# Process all data
|
| 129 |
text_data_for_df = process_data(data)
|
| 130 |
+
text_data_for_df_qwen = process_data(data_qwen)
|
| 131 |
+
|
| 132 |
vision_data_for_df = process_data(vision_data)
|
| 133 |
cot_text_data_for_df = process_data(cot_text_data)
|
| 134 |
# cot_vision_data_for_df = process_data(cot_vision_data)
|
| 135 |
|
| 136 |
# Create DataFrames
|
| 137 |
accuracy_df = pd.DataFrame(text_data_for_df, columns=column_names)
|
| 138 |
+
accuracy_df_qwen = pd.DataFrame(text_data_for_df_qwen, columns=column_names)
|
| 139 |
vision_accuracy_df = pd.DataFrame(vision_data_for_df, columns=column_names)
|
| 140 |
cot_text_accuracy_df = pd.DataFrame(cot_text_data_for_df, columns=column_names)
|
| 141 |
# cot_vision_accuracy_df = pd.DataFrame(cot_vision_data_for_df, columns=column_names)
|
|
|
|
| 152 |
|
| 153 |
# Finalize all DataFrames
|
| 154 |
accuracy_df = finalize_df(accuracy_df)
|
| 155 |
+
accuracy_df_qwen = finalize_df(accuracy_df_qwen)
|
| 156 |
vision_accuracy_df = finalize_df(vision_accuracy_df)
|
| 157 |
cot_text_accuracy_df = finalize_df(cot_text_accuracy_df)
|
| 158 |
# cot_vision_accuracy_df = finalize_df(cot_vision_accuracy_df)
|
|
|
|
| 163 |
return heatmap_image
|
| 164 |
|
| 165 |
|
| 166 |
+
def load_heatmap_qwen(evt: gr.SelectData):
|
| 167 |
+
heatmap_image = gr.Image(f"results_qwen/{evt.value}.jpg")
|
| 168 |
+
return heatmap_image
|
| 169 |
+
|
| 170 |
+
|
| 171 |
def load_vision_heatmap(evt: gr.SelectData):
|
| 172 |
heatmap_image = gr.Image(f"results-vision/{evt.value}.jpg")
|
| 173 |
return heatmap_image
|
|
|
|
| 442 |
heatmap_image = gr.Image(label="", show_label=False)
|
| 443 |
leader_board.select(fn=load_heatmap, outputs=[heatmap_image])
|
| 444 |
|
| 445 |
+
with gr.Tab("Text-only Benchmark (Judged by Qwen)"):
|
| 446 |
+
gr.Markdown("# Text-only Leaderboard (Judged by Qwen)")
|
| 447 |
+
leader_board = gr.Dataframe(accuracy_df_qwen, headers=headers_with_icons)
|
| 448 |
+
gr.Markdown("## Heatmap")
|
| 449 |
+
heatmap_image_qwen = gr.Image(label="", show_label=False)
|
| 450 |
+
leader_board.select(fn=load_heatmap_qwen, outputs=[heatmap_image_qwen])
|
| 451 |
+
|
| 452 |
with gr.Tab("Vision Benchmark"):
|
| 453 |
gr.Markdown("# Vision Benchmark Leaderboard")
|
| 454 |
leader_board_vision = gr.Dataframe(
|
results_qwen/CodeLlama-70b-Instruct-hf.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8eb9e387ef5e3ec72c0cf7f9fb69721135481977c3b2277cee5511312b8975cd
|
| 3 |
+
size 14962036
|
results_qwen/CodeLlama-70b-Instruct-hf.jpg
ADDED
|
Git LFS Details
|
results_qwen/CodeLlama-70b-Instruct-hf.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:203ab4a96fdca08c9155d6cec16d72eac15e35506e4ef551fa0fe8cf867c96a5
|
| 3 |
+
size 14974843
|
results_qwen/CodeLlama-70b-Instruct-hf.png
ADDED
|
Git LFS Details
|
results_qwen/Mixtral-8x7B-Instruct-v0.1.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b4ad69d274c473073093be7fb46a2d1070a3db35cfdfbb196a927efc2df204f
|
| 3 |
+
size 15659205
|
results_qwen/Mixtral-8x7B-Instruct-v0.1.jpg
ADDED
|
Git LFS Details
|
results_qwen/Mixtral-8x7B-Instruct-v0.1.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:579a96d609f2bd4650a078944535c7bf2d348f9304bf627248c1a40910815452
|
| 3 |
+
size 15660660
|
results_qwen/Mixtral-8x7B-Instruct-v0.1.png
ADDED
|
Git LFS Details
|
results_qwen/claude-3-haiku-20240307.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb5ca14f6f17cd4c3422e071a6e03dfc5504cb3709f5422a4b44c01daa00f57f
|
| 3 |
+
size 17778799
|
results_qwen/claude-3-haiku-20240307.jpg
ADDED
|
Git LFS Details
|
results_qwen/claude-3-haiku-20240307.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4a38e2e7e8f22ec5c7c55561a25cd51cfe1e261f4c171e9995993b9d41bb028
|
| 3 |
+
size 17780948
|
results_qwen/claude-3-haiku-20240307.png
ADDED
|
Git LFS Details
|
results_qwen/claude-3-opus-20240229.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:413814671b1a00fe6b8fa308ab8fc4e76a046799d2e275c5fe51a1606d0a5062
|
| 3 |
+
size 18235109
|
results_qwen/claude-3-opus-20240229.jpg
ADDED
|
Git LFS Details
|
results_qwen/claude-3-opus-20240229.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be62c58b6b5e21ae49bdf5ba96ad4628d959b9843a2eba691bddd9c5bd717f1d
|
| 3 |
+
size 18253951
|
results_qwen/claude-3-opus-20240229.png
ADDED
|
Git LFS Details
|
results_qwen/deepseek-llm-67b-chat.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:668c30672d315df2c899df7627039723fc35bba099ae410792729a087a65b9fb
|
| 3 |
+
size 12916783
|
results_qwen/deepseek-llm-67b-chat.jpg
ADDED
|
Git LFS Details
|
results_qwen/deepseek-llm-67b-chat.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e26477861326f0784a70903beadad6a4ca7dc61e18e3a76db9cc713d196dab0
|
| 3 |
+
size 12957786
|
results_qwen/deepseek-llm-67b-chat.png
ADDED
|
Git LFS Details
|
results_qwen/gemma-7b-it.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abcf0738cde1e241535a90aae98338d1b9505a844c9561877e245b604676e489
|
| 3 |
+
size 11246490
|
results_qwen/gemma-7b-it.jpg
ADDED
|
Git LFS Details
|
results_qwen/gemma-7b-it.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cd2216230dadc2adf5a73a1b64c799f27913cf4033e7333290962c8926b9557
|
| 3 |
+
size 11301306
|
results_qwen/gemma-7b-it.png
ADDED
|
Git LFS Details
|
results_qwen/gpt-3.5-turbo-0125.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3da618470a9256a02824d308cab84e80400ded429b9157c20b89ba720f708bb1
|
| 3 |
+
size 8041854
|
results_qwen/gpt-3.5-turbo-0125.jpg
ADDED
|
Git LFS Details
|
results_qwen/gpt-3.5-turbo-0125.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e224821031c0d87b38a113efff8a9100fe770d067d2d94662c81ad6207718963
|
| 3 |
+
size 8069783
|
results_qwen/gpt-3.5-turbo-0125.png
ADDED
|
Git LFS Details
|
results_qwen/gpt-4-0125-preview.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd1c1982308ad0b1f510001f407bf50a9378dc94391993f4116c02beaa55c27f
|
| 3 |
+
size 15996843
|
results_qwen/gpt-4-0125-preview.jpg
ADDED
|
Git LFS Details
|
results_qwen/gpt-4-0125-preview.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d31b186ce750657cbddcabbf6998069ecf2f6631954a8718c88313564063b30
|
| 3 |
+
size 15985596
|
results_qwen/gpt-4-0125-preview.png
ADDED
|
Git LFS Details
|
results_qwen/gpt-4-turbo-2024-04-09.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d50952850b16d84f951949aa629db60375aa2100cf86f1b9a143cdfd83fc1c30
|
| 3 |
+
size 16522373
|
results_qwen/gpt-4-turbo-2024-04-09.jpg
ADDED
|
Git LFS Details
|
results_qwen/gpt-4-turbo-2024-04-09.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:661211c67ac6a0e6168e79df8d616b4abbc7e5d0fda335736cbe22912fade1f4
|
| 3 |
+
size 16542716
|
results_qwen/gpt-4-turbo-2024-04-09.png
ADDED
|
Git LFS Details
|