JunJiaGuo commited on
Commit
ca6133f
·
verified ·
1 Parent(s): 5d32525

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -6
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import json
3
  import base64
4
  import gradio as gr
@@ -126,6 +127,29 @@ def load_id_answer_mapping():
126
  return json.loads(id_answer_mapping)
127
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  def evaluate_uploaded_json(user_file, model_name):
130
  print(f"Model Name: {model_name}")
131
  print(f"Uploaded File: {user_file}")
@@ -143,7 +167,8 @@ def evaluate_uploaded_json(user_file, model_name):
143
 
144
  for item in user_data:
145
  question_id = item["id"]
146
- user_answer = item.get("model_answer")
 
147
  question_class = item.get("class", "Unknown")
148
 
149
  class_total[question_class] += 1
@@ -154,7 +179,8 @@ def evaluate_uploaded_json(user_file, model_name):
154
  correct += 1
155
 
156
  subclass_data = []
157
- subclass_result = {}
 
158
  for cls in CLASS_LIST[:-5]:
159
  acc = class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
160
  subclass_data.append({
@@ -162,8 +188,7 @@ def evaluate_uploaded_json(user_file, model_name):
162
  "Accuracy": f"{acc:.2%}",
163
  "Correct/Total": f"{class_correct[cls]}/{class_total[cls]}"
164
  })
165
- subclass_result[cls] = acc
166
-
167
 
168
  category_data = []
169
  for category, sub_classes in CATEGORY_MAPPING.items():
@@ -175,14 +200,13 @@ def evaluate_uploaded_json(user_file, model_name):
175
  "Accuracy": f"{acc:.2%}",
176
  "Correct/Total": f"{cat_correct}/{cat_total}"
177
  })
178
- subclass_result[category] = acc
179
 
180
  overall_accuracy = f"{correct / total:.2%} ({correct}/{total} correct)"
181
 
182
  subclass_df = pd.DataFrame(subclass_data)
183
  category_df = pd.DataFrame(category_data)
184
 
185
-
186
  save_class_accuracy_to_hf_dataset(model_name, subclass_result)
187
 
188
  return overall_accuracy, category_df, subclass_df
 
1
  import os
2
+ import re
3
  import json
4
  import base64
5
  import gradio as gr
 
127
  return json.loads(id_answer_mapping)
128
 
129
 
130
+
131
+ def answer_matching(text):
132
+ if isinstance(text, list):
133
+ text = text[0] if text else random.choice(['A', 'B', 'C', 'D'])
134
+ if not isinstance(text, str):
135
+ return random.choice(['A', 'B', 'C', 'D'])
136
+
137
+ patterns = [
138
+ r'\((A|B|C|D)\)',
139
+ r'^(A|B|C|D)[\s\W]*',
140
+ r'\b[A-D]\b',
141
+ r'\((a|b|c|d)\)',
142
+ r'\b(A|B|C|D)\.',
143
+ ]
144
+ for pattern in patterns:
145
+ match = re.search(pattern, text)
146
+ if match:
147
+ return match.group(1).upper()
148
+
149
+ letters = re.findall(r'[a-zA-Z]', text)
150
+ return letters[0].upper() if len(letters) == 1 else random.choice(['A', 'B', 'C', 'D'])
151
+
152
+
153
  def evaluate_uploaded_json(user_file, model_name):
154
  print(f"Model Name: {model_name}")
155
  print(f"Uploaded File: {user_file}")
 
167
 
168
  for item in user_data:
169
  question_id = item["id"]
170
+ raw_user_answer = item.get("model_answer", "")
171
+ user_answer = answer_matching(raw_user_answer)
172
  question_class = item.get("class", "Unknown")
173
 
174
  class_total[question_class] += 1
 
179
  correct += 1
180
 
181
  subclass_data = []
182
+ subclass_result = {}
183
+
184
  for cls in CLASS_LIST[:-5]:
185
  acc = class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
186
  subclass_data.append({
 
188
  "Accuracy": f"{acc:.2%}",
189
  "Correct/Total": f"{class_correct[cls]}/{class_total[cls]}"
190
  })
191
+ subclass_result[cls] = acc
 
192
 
193
  category_data = []
194
  for category, sub_classes in CATEGORY_MAPPING.items():
 
200
  "Accuracy": f"{acc:.2%}",
201
  "Correct/Total": f"{cat_correct}/{cat_total}"
202
  })
203
+ subclass_result[category] = acc
204
 
205
  overall_accuracy = f"{correct / total:.2%} ({correct}/{total} correct)"
206
 
207
  subclass_df = pd.DataFrame(subclass_data)
208
  category_df = pd.DataFrame(category_data)
209
 
 
210
  save_class_accuracy_to_hf_dataset(model_name, subclass_result)
211
 
212
  return overall_accuracy, category_df, subclass_df