JunJiaGuo commited on
Commit
5d32525
·
verified ·
1 Parent(s): 0586ee3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -18
app.py CHANGED
@@ -126,8 +126,6 @@ def load_id_answer_mapping():
126
  return json.loads(id_answer_mapping)
127
 
128
 
129
-
130
-
131
  def evaluate_uploaded_json(user_file, model_name):
132
  print(f"Model Name: {model_name}")
133
  print(f"Uploaded File: {user_file}")
@@ -155,15 +153,16 @@ def evaluate_uploaded_json(user_file, model_name):
155
  class_correct[question_class] += 1
156
  correct += 1
157
 
158
-
159
  subclass_data = []
160
- for cls in CLASS_LIST[:-5]:
 
161
  acc = class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
162
  subclass_data.append({
163
  "Subclass": cls,
164
  "Accuracy": f"{acc:.2%}",
165
  "Correct/Total": f"{class_correct[cls]}/{class_total[cls]}"
166
  })
 
167
 
168
 
169
  category_data = []
@@ -176,17 +175,20 @@ def evaluate_uploaded_json(user_file, model_name):
176
  "Accuracy": f"{acc:.2%}",
177
  "Correct/Total": f"{cat_correct}/{cat_total}"
178
  })
 
179
 
180
  overall_accuracy = f"{correct / total:.2%} ({correct}/{total} correct)"
181
 
182
  subclass_df = pd.DataFrame(subclass_data)
183
  category_df = pd.DataFrame(category_data)
184
 
185
- save_class_accuracy_to_hf_dataset(model_name, {**{d['Subclass']: d['Accuracy'] for d in subclass_data},
186
- **{d['Category']: d['Accuracy'] for d in category_data}})
187
 
188
  return overall_accuracy, category_df, subclass_df
189
 
 
 
190
  def save_class_accuracy_to_hf_dataset(model_name, class_accuracy):
191
 
192
  new_data = {"Model Name": model_name}
@@ -224,19 +226,22 @@ with demo:
224
  # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
225
 
226
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
227
- gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
228
  fn=evaluate_uploaded_json,
229
- inputs=[
230
- gr.File(label="Upload JSON File"),
231
- gr.Textbox(label="Model Name", placeholder="Enter your model name here")
232
- ],
233
- outputs=[
234
- gr.Textbox(label="Overall Accuracy"),
235
- gr.Dataframe(label="Category Accuracy"),
236
- gr.Dataframe(label="Subclass Accuracy")
237
- ],
238
- title="JSON Answer Evaluator",
239
- description="JSON Answer Evaluator"
240
  )
241
 
242
  with gr.Row():
 
126
  return json.loads(id_answer_mapping)
127
 
128
 
 
 
129
  def evaluate_uploaded_json(user_file, model_name):
130
  print(f"Model Name: {model_name}")
131
  print(f"Uploaded File: {user_file}")
 
153
  class_correct[question_class] += 1
154
  correct += 1
155
 
 
156
  subclass_data = []
157
+ subclass_result = {}
158
+ for cls in CLASS_LIST[:-5]:
159
  acc = class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
160
  subclass_data.append({
161
  "Subclass": cls,
162
  "Accuracy": f"{acc:.2%}",
163
  "Correct/Total": f"{class_correct[cls]}/{class_total[cls]}"
164
  })
165
+ subclass_result[cls] = acc
166
 
167
 
168
  category_data = []
 
175
  "Accuracy": f"{acc:.2%}",
176
  "Correct/Total": f"{cat_correct}/{cat_total}"
177
  })
178
+ subclass_result[category] = acc
179
 
180
  overall_accuracy = f"{correct / total:.2%} ({correct}/{total} correct)"
181
 
182
  subclass_df = pd.DataFrame(subclass_data)
183
  category_df = pd.DataFrame(category_data)
184
 
185
+
186
+ save_class_accuracy_to_hf_dataset(model_name, subclass_result)
187
 
188
  return overall_accuracy, category_df, subclass_df
189
 
190
+
191
+
192
  def save_class_accuracy_to_hf_dataset(model_name, class_accuracy):
193
 
194
  new_data = {"Model Name": model_name}
 
226
  # gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
227
 
228
  with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
229
+ with gr.Row():
230
+ json_file = gr.File(label="Upload JSON File")
231
+ model_name = gr.Textbox(label="Model Name", placeholder="Enter your model name here")
232
+
233
+ with gr.Row():
234
+ overall_acc = gr.Textbox(label="Overall Accuracy")
235
+
236
+ with gr.Row():
237
+ category_df = gr.Dataframe(label="Category Accuracy")
238
+ subclass_df = gr.Dataframe(label="Subclass Accuracy")
239
+
240
+ json_eval_button = gr.Button("Evaluate")
241
+ json_eval_button.click(
242
  fn=evaluate_uploaded_json,
243
+ inputs=[json_file, model_name],
244
+ outputs=[overall_acc, category_df, subclass_df]
 
 
 
 
 
 
 
 
 
245
  )
246
 
247
  with gr.Row():