Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -126,8 +126,6 @@ def load_id_answer_mapping():
|
|
| 126 |
return json.loads(id_answer_mapping)
|
| 127 |
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
def evaluate_uploaded_json(user_file, model_name):
|
| 132 |
print(f"Model Name: {model_name}")
|
| 133 |
print(f"Uploaded File: {user_file}")
|
|
@@ -155,15 +153,16 @@ def evaluate_uploaded_json(user_file, model_name):
|
|
| 155 |
class_correct[question_class] += 1
|
| 156 |
correct += 1
|
| 157 |
|
| 158 |
-
|
| 159 |
subclass_data = []
|
| 160 |
-
|
|
|
|
| 161 |
acc = class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
|
| 162 |
subclass_data.append({
|
| 163 |
"Subclass": cls,
|
| 164 |
"Accuracy": f"{acc:.2%}",
|
| 165 |
"Correct/Total": f"{class_correct[cls]}/{class_total[cls]}"
|
| 166 |
})
|
|
|
|
| 167 |
|
| 168 |
|
| 169 |
category_data = []
|
|
@@ -176,17 +175,20 @@ def evaluate_uploaded_json(user_file, model_name):
|
|
| 176 |
"Accuracy": f"{acc:.2%}",
|
| 177 |
"Correct/Total": f"{cat_correct}/{cat_total}"
|
| 178 |
})
|
|
|
|
| 179 |
|
| 180 |
overall_accuracy = f"{correct / total:.2%} ({correct}/{total} correct)"
|
| 181 |
|
| 182 |
subclass_df = pd.DataFrame(subclass_data)
|
| 183 |
category_df = pd.DataFrame(category_data)
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
|
| 188 |
return overall_accuracy, category_df, subclass_df
|
| 189 |
|
|
|
|
|
|
|
| 190 |
def save_class_accuracy_to_hf_dataset(model_name, class_accuracy):
|
| 191 |
|
| 192 |
new_data = {"Model Name": model_name}
|
|
@@ -224,19 +226,22 @@ with demo:
|
|
| 224 |
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 225 |
|
| 226 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
| 227 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
fn=evaluate_uploaded_json,
|
| 229 |
-
inputs=[
|
| 230 |
-
|
| 231 |
-
gr.Textbox(label="Model Name", placeholder="Enter your model name here")
|
| 232 |
-
],
|
| 233 |
-
outputs=[
|
| 234 |
-
gr.Textbox(label="Overall Accuracy"),
|
| 235 |
-
gr.Dataframe(label="Category Accuracy"),
|
| 236 |
-
gr.Dataframe(label="Subclass Accuracy")
|
| 237 |
-
],
|
| 238 |
-
title="JSON Answer Evaluator",
|
| 239 |
-
description="JSON Answer Evaluator"
|
| 240 |
)
|
| 241 |
|
| 242 |
with gr.Row():
|
|
|
|
| 126 |
return json.loads(id_answer_mapping)
|
| 127 |
|
| 128 |
|
|
|
|
|
|
|
| 129 |
def evaluate_uploaded_json(user_file, model_name):
|
| 130 |
print(f"Model Name: {model_name}")
|
| 131 |
print(f"Uploaded File: {user_file}")
|
|
|
|
| 153 |
class_correct[question_class] += 1
|
| 154 |
correct += 1
|
| 155 |
|
|
|
|
| 156 |
subclass_data = []
|
| 157 |
+
subclass_result = {}
|
| 158 |
+
for cls in CLASS_LIST[:-5]:
|
| 159 |
acc = class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
|
| 160 |
subclass_data.append({
|
| 161 |
"Subclass": cls,
|
| 162 |
"Accuracy": f"{acc:.2%}",
|
| 163 |
"Correct/Total": f"{class_correct[cls]}/{class_total[cls]}"
|
| 164 |
})
|
| 165 |
+
subclass_result[cls] = acc
|
| 166 |
|
| 167 |
|
| 168 |
category_data = []
|
|
|
|
| 175 |
"Accuracy": f"{acc:.2%}",
|
| 176 |
"Correct/Total": f"{cat_correct}/{cat_total}"
|
| 177 |
})
|
| 178 |
+
subclass_result[category] = acc
|
| 179 |
|
| 180 |
overall_accuracy = f"{correct / total:.2%} ({correct}/{total} correct)"
|
| 181 |
|
| 182 |
subclass_df = pd.DataFrame(subclass_data)
|
| 183 |
category_df = pd.DataFrame(category_data)
|
| 184 |
|
| 185 |
+
|
| 186 |
+
save_class_accuracy_to_hf_dataset(model_name, subclass_result)
|
| 187 |
|
| 188 |
return overall_accuracy, category_df, subclass_df
|
| 189 |
|
| 190 |
+
|
| 191 |
+
|
| 192 |
def save_class_accuracy_to_hf_dataset(model_name, class_accuracy):
|
| 193 |
|
| 194 |
new_data = {"Model Name": model_name}
|
|
|
|
| 226 |
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
| 227 |
|
| 228 |
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
|
| 229 |
+
with gr.Row():
|
| 230 |
+
json_file = gr.File(label="Upload JSON File")
|
| 231 |
+
model_name = gr.Textbox(label="Model Name", placeholder="Enter your model name here")
|
| 232 |
+
|
| 233 |
+
with gr.Row():
|
| 234 |
+
overall_acc = gr.Textbox(label="Overall Accuracy")
|
| 235 |
+
|
| 236 |
+
with gr.Row():
|
| 237 |
+
category_df = gr.Dataframe(label="Category Accuracy")
|
| 238 |
+
subclass_df = gr.Dataframe(label="Subclass Accuracy")
|
| 239 |
+
|
| 240 |
+
json_eval_button = gr.Button("Evaluate")
|
| 241 |
+
json_eval_button.click(
|
| 242 |
fn=evaluate_uploaded_json,
|
| 243 |
+
inputs=[json_file, model_name],
|
| 244 |
+
outputs=[overall_acc, category_df, subclass_df]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
with gr.Row():
|