Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -93,7 +93,7 @@ def init_leaderboard(dataframe):
|
|
| 93 |
)
|
| 94 |
|
| 95 |
def load_id_answer_mapping():
|
| 96 |
-
id_answer_mapping = os.getenv("ID_ANSWER_MAPPING")
|
| 97 |
if not id_answer_mapping:
|
| 98 |
raise ValueError("ID_ANSWER_MAPPING secret not found!")
|
| 99 |
print(id_answer_mapping)
|
|
@@ -101,26 +101,23 @@ def load_id_answer_mapping():
|
|
| 101 |
return json.loads(id_answer_mapping)
|
| 102 |
|
| 103 |
def evaluate_uploaded_json(user_file):
|
| 104 |
-
print(user_file)
|
| 105 |
-
id_answer_mapping = load_id_answer_mapping()
|
| 106 |
|
| 107 |
with open(user_file, "r", encoding="utf-8") as f:
|
| 108 |
-
user_data = json.load(f)
|
| 109 |
|
| 110 |
-
# 统计总正确率
|
| 111 |
correct = 0
|
| 112 |
total = 0
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
class_total = defaultdict(int) # 存每个 class 总的题目数
|
| 117 |
|
| 118 |
for item in user_data:
|
| 119 |
question_id = item["id"]
|
| 120 |
user_answer = item.get("answer")
|
| 121 |
-
question_class = item.get("class", "Unknown")
|
| 122 |
|
| 123 |
-
# 增加 class 计数
|
| 124 |
class_total[question_class] += 1
|
| 125 |
total += 1
|
| 126 |
|
|
@@ -128,16 +125,13 @@ def evaluate_uploaded_json(user_file):
|
|
| 128 |
class_correct[question_class] += 1
|
| 129 |
correct += 1
|
| 130 |
|
| 131 |
-
# 计算总正确率
|
| 132 |
accuracy = correct / total if total > 0 else 0
|
| 133 |
|
| 134 |
-
# 计算每个 class 的正确率
|
| 135 |
class_accuracy = {
|
| 136 |
cls: class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
|
| 137 |
for cls in class_total
|
| 138 |
}
|
| 139 |
|
| 140 |
-
# 格式化输出
|
| 141 |
class_accuracy_str = "\n".join(
|
| 142 |
[f"- {cls}: {acc:.2%} ({class_correct[cls]}/{class_total[cls]} correct)"
|
| 143 |
for cls, acc in class_accuracy.items()]
|
|
@@ -152,8 +146,8 @@ def evaluate_uploaded_json(user_file):
|
|
| 152 |
|
| 153 |
demo = gr.Blocks(css=custom_css)
|
| 154 |
with demo:
|
| 155 |
-
gr.HTML(
|
| 156 |
-
gr.Markdown("Vid-Composition", elem_classes="markdown-text")
|
| 157 |
|
| 158 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 159 |
# with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
|
|
|
| 93 |
)
|
| 94 |
|
| 95 |
def load_id_answer_mapping():
|
| 96 |
+
id_answer_mapping = os.getenv("ID_ANSWER_MAPPING")
|
| 97 |
if not id_answer_mapping:
|
| 98 |
raise ValueError("ID_ANSWER_MAPPING secret not found!")
|
| 99 |
print(id_answer_mapping)
|
|
|
|
| 101 |
return json.loads(id_answer_mapping)
|
| 102 |
|
| 103 |
def evaluate_uploaded_json(user_file):
|
| 104 |
+
print(user_file)
|
| 105 |
+
id_answer_mapping = load_id_answer_mapping()
|
| 106 |
|
| 107 |
with open(user_file, "r", encoding="utf-8") as f:
|
| 108 |
+
user_data = json.load(f)
|
| 109 |
|
|
|
|
| 110 |
correct = 0
|
| 111 |
total = 0
|
| 112 |
|
| 113 |
+
class_correct = defaultdict(int)
|
| 114 |
+
class_total = defaultdict(int)
|
|
|
|
| 115 |
|
| 116 |
for item in user_data:
|
| 117 |
question_id = item["id"]
|
| 118 |
user_answer = item.get("answer")
|
| 119 |
+
question_class = item.get("class", "Unknown")
|
| 120 |
|
|
|
|
| 121 |
class_total[question_class] += 1
|
| 122 |
total += 1
|
| 123 |
|
|
|
|
| 125 |
class_correct[question_class] += 1
|
| 126 |
correct += 1
|
| 127 |
|
|
|
|
| 128 |
accuracy = correct / total if total > 0 else 0
|
| 129 |
|
|
|
|
| 130 |
class_accuracy = {
|
| 131 |
cls: class_correct[cls] / class_total[cls] if class_total[cls] > 0 else 0
|
| 132 |
for cls in class_total
|
| 133 |
}
|
| 134 |
|
|
|
|
| 135 |
class_accuracy_str = "\n".join(
|
| 136 |
[f"- {cls}: {acc:.2%} ({class_correct[cls]}/{class_total[cls]} correct)"
|
| 137 |
for cls, acc in class_accuracy.items()]
|
|
|
|
| 146 |
|
| 147 |
demo = gr.Blocks(css=custom_css)
|
| 148 |
with demo:
|
| 149 |
+
gr.HTML("Vid-Composition")
|
| 150 |
+
# gr.Markdown("Vid-Composition", elem_classes="markdown-text")
|
| 151 |
|
| 152 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
| 153 |
# with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|