陈俊杰
commited on
Commit
·
b781bf5
1
Parent(s):
f51ed55
table
Browse files
app.py
CHANGED
|
@@ -265,10 +265,13 @@ elif page == "LeaderBoard":
|
|
| 265 |
"Spearman (Non-Factoid QA)": [],
|
| 266 |
}
|
| 267 |
|
|
|
|
|
|
|
|
|
|
| 268 |
# teamId 唯一标识码
|
| 269 |
DG = {
|
| 270 |
-
"TeamId":
|
| 271 |
-
"Methods":
|
| 272 |
"Accuracy": [0.5806, 0.5483, 0.6001, 0.6472],
|
| 273 |
"Kendall's Tau": [0.3243, 0.1739, 0.3042, 0.4167],
|
| 274 |
"Spearman": [0.3505, 0.1857, 0.3264, 0.4512]
|
|
@@ -276,8 +279,8 @@ elif page == "LeaderBoard":
|
|
| 276 |
df1 = pd.DataFrame(DG)
|
| 277 |
|
| 278 |
TE = {
|
| 279 |
-
"TeamId":
|
| 280 |
-
"Methods":
|
| 281 |
"Accuracy": [0.5107, 0.5050, 0.5461, 0.5581],
|
| 282 |
"Kendall's Tau": [0.1281, 0.0635, 0.2716, 0.3864],
|
| 283 |
"Spearman": [0.1352, 0.0667, 0.2867, 0.4157]
|
|
@@ -285,8 +288,8 @@ elif page == "LeaderBoard":
|
|
| 285 |
df2 = pd.DataFrame(TE)
|
| 286 |
|
| 287 |
SG = {
|
| 288 |
-
"TeamId":
|
| 289 |
-
"Methods":
|
| 290 |
"Accuracy": [0.6504, 0.6014, 0.7162, 0.7441],
|
| 291 |
"Kendall's Tau": [0.3957, 0.2688, 0.5092, 0.5001],
|
| 292 |
"Spearman": [0.4188, 0.2817, 0.5403, 0.5405],
|
|
@@ -294,18 +297,42 @@ elif page == "LeaderBoard":
|
|
| 294 |
df3 = pd.DataFrame(SG)
|
| 295 |
|
| 296 |
NFQA = {
|
| 297 |
-
"TeamId":
|
| 298 |
-
"Methods":
|
| 299 |
"Accuracy": [0.5935, 0.5817, 0.7000, 0.7203],
|
| 300 |
"Kendall's Tau": [0.2332, 0.2389, 0.4440, 0.4235],
|
| 301 |
"Spearman": [0.2443, 0.2492, 0.4630, 0.4511]
|
| 302 |
}
|
| 303 |
df4 = pd.DataFrame(NFQA)
|
| 304 |
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
# # 创建标签页
|
| 311 |
# tab1, tab2, tab3, tab4 = st.tabs(["DG", "TE", "SG", "NFQA"])
|
|
@@ -325,30 +352,45 @@ elif page == "LeaderBoard":
|
|
| 325 |
# with tab4:
|
| 326 |
# st.markdown("""<p class='main-text'>Task: Non-Factoid QA; Dataset: NF_CATS</p>""", unsafe_allow_html=True)
|
| 327 |
# st.dataframe(df4, use_container_width=True)
|
| 328 |
-
|
| 329 |
-
data = [DG, NFQA, SG, TE]
|
| 330 |
-
task = ["Dialogue Generation", "Non-Factoid QA", "Summary Generation", "Text Expansion"]
|
| 331 |
-
metric = ["Accuracy", "Kendall's Tau", "Spearman"]
|
| 332 |
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
-
df = pd.DataFrame(df)
|
| 349 |
-
for col in df.select_dtypes(include=['float64', 'int64']).columns:
|
| 350 |
-
|
| 351 |
-
st.dataframe(df,use_container_width=True)
|
| 352 |
|
| 353 |
st.markdown("""
|
| 354 |
🔗 To register for AEOLLM task, you can visit the following link and choose our AEOLLM task: [https://research.nii.ac.jp/ntcir/ntcir-18/howto.html](https://research.nii.ac.jp/ntcir/ntcir-18/howto.html).
|
|
|
|
| 265 |
"Spearman (Non-Factoid QA)": [],
|
| 266 |
}
|
| 267 |
|
| 268 |
+
TeamId = ["baseline1", "baseline2", "baseline3", "baseline4"]
|
| 269 |
+
Methods = ["chatglm3-6b", "baichuan2-13b", "chatglm-pro", "gpt-4o"]
|
| 270 |
+
|
| 271 |
# teamId 唯一标识码
|
| 272 |
DG = {
|
| 273 |
+
"TeamId": TeamId,
|
| 274 |
+
"Methods": Methods,
|
| 275 |
"Accuracy": [0.5806, 0.5483, 0.6001, 0.6472],
|
| 276 |
"Kendall's Tau": [0.3243, 0.1739, 0.3042, 0.4167],
|
| 277 |
"Spearman": [0.3505, 0.1857, 0.3264, 0.4512]
|
|
|
|
| 279 |
df1 = pd.DataFrame(DG)
|
| 280 |
|
| 281 |
TE = {
|
| 282 |
+
"TeamId": TeamId,
|
| 283 |
+
"Methods": Methods,
|
| 284 |
"Accuracy": [0.5107, 0.5050, 0.5461, 0.5581],
|
| 285 |
"Kendall's Tau": [0.1281, 0.0635, 0.2716, 0.3864],
|
| 286 |
"Spearman": [0.1352, 0.0667, 0.2867, 0.4157]
|
|
|
|
| 288 |
df2 = pd.DataFrame(TE)
|
| 289 |
|
| 290 |
SG = {
|
| 291 |
+
"TeamId": TeamId,
|
| 292 |
+
"Methods": Methods,
|
| 293 |
"Accuracy": [0.6504, 0.6014, 0.7162, 0.7441],
|
| 294 |
"Kendall's Tau": [0.3957, 0.2688, 0.5092, 0.5001],
|
| 295 |
"Spearman": [0.4188, 0.2817, 0.5403, 0.5405],
|
|
|
|
| 297 |
df3 = pd.DataFrame(SG)
|
| 298 |
|
| 299 |
NFQA = {
|
| 300 |
+
"TeamId": TeamId,
|
| 301 |
+
"Methods": Methods,
|
| 302 |
"Accuracy": [0.5935, 0.5817, 0.7000, 0.7203],
|
| 303 |
"Kendall's Tau": [0.2332, 0.2389, 0.4440, 0.4235],
|
| 304 |
"Spearman": [0.2443, 0.2492, 0.4630, 0.4511]
|
| 305 |
}
|
| 306 |
df4 = pd.DataFrame(NFQA)
|
| 307 |
|
| 308 |
+
OverAll = {
|
| 309 |
+
"TeamId": TeamId,
|
| 310 |
+
"Methods": Methods,
|
| 311 |
+
"Accuracy": [],
|
| 312 |
+
"Kendall's Tau": [],
|
| 313 |
+
"Spearman": []
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
data = [DG, NFQA, SG, TE]
|
| 317 |
+
task = ["Dialogue Generation", "Non-Factoid QA", "Summary Generation", "Text Expansion"]
|
| 318 |
+
metric = ["Accuracy", "Kendall's Tau", "Spearman"]
|
| 319 |
+
|
| 320 |
+
for m in metric:
|
| 321 |
+
# 每个指标
|
| 322 |
+
metric_score = [0] * len(TeamId)
|
| 323 |
+
for j in range(len(TeamId)):
|
| 324 |
+
# 每支队伍
|
| 325 |
+
for d in data:
|
| 326 |
+
metric_score[j] += d[m][j]
|
| 327 |
+
metric_score = [k / len(task) for k in metric_score]
|
| 328 |
+
OverAll[m] = metric_score
|
| 329 |
+
|
| 330 |
+
dfo = pd.DataFrame(OverAll)
|
| 331 |
+
|
| 332 |
+
df = [df1, df2, df3, df4, dfo]
|
| 333 |
+
for d in df:
|
| 334 |
+
for col in d.select_dtypes(include=['float64', 'int64']).columns:
|
| 335 |
+
d[col] = d[col].apply(lambda x: f"{x:.4f}")
|
| 336 |
|
| 337 |
# # 创建标签页
|
| 338 |
# tab1, tab2, tab3, tab4 = st.tabs(["DG", "TE", "SG", "NFQA"])
|
|
|
|
| 352 |
# with tab4:
|
| 353 |
# st.markdown("""<p class='main-text'>Task: Non-Factoid QA; Dataset: NF_CATS</p>""", unsafe_allow_html=True)
|
| 354 |
# st.dataframe(df4, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
+
st.markdown("""<p class='main-text'>Overall</p>""", unsafe_allow_html=True)
|
| 357 |
+
st.dataframe(dfo, use_container_width=True)
|
| 358 |
+
|
| 359 |
+
st.markdown("""<p class='main-text'>Task: Dialogue Generation; Dataset: DialyDialog</p>""", unsafe_allow_html=True)
|
| 360 |
+
st.dataframe(df1, use_container_width=True)
|
| 361 |
+
|
| 362 |
+
st.markdown("""<p class='main-text'>Task: Text Expansion; Dataset: WritingPrompts</p>""", unsafe_allow_html=True)
|
| 363 |
+
st.dataframe(df2, use_container_width=True)
|
| 364 |
+
|
| 365 |
+
st.markdown("""<p class='main-text'>Task: Summary Generation; Dataset: Xsum</p>""", unsafe_allow_html=True)
|
| 366 |
+
st.dataframe(df3, use_container_width=True)
|
| 367 |
+
|
| 368 |
+
st.markdown("""<p class='main-text'>Task: Non-Factoid QA; Dataset: NF_CATS</p>""", unsafe_allow_html=True)
|
| 369 |
+
st.dataframe(df4, use_container_width=True)
|
| 370 |
+
|
| 371 |
+
# data = [DG, NFQA, SG, TE]
|
| 372 |
+
# task = ["Dialogue Generation", "Non-Factoid QA", "Summary Generation", "Text Expansion"]
|
| 373 |
+
# metric = ["Accuracy", "Kendall's Tau", "Spearman"]
|
| 374 |
+
|
| 375 |
+
# overall_total = [0] * len(df["TeamId"])
|
| 376 |
+
# for i, d in enumerate(data): # 每种数据集
|
| 377 |
+
# total = [0] * len(df["TeamId"]) # 长度初始化为方法数
|
| 378 |
+
# for j in range(len(metric)): # 每种指标
|
| 379 |
+
# index = f"{metric[j]} ({task[i]})"
|
| 380 |
+
# df[index] = d[metric[j]]
|
| 381 |
+
# for k in range(len(df["TeamId"])):
|
| 382 |
+
# total[k] += d[metric[j]][k]
|
| 383 |
+
# average_index = f"Average ({task[i]})"
|
| 384 |
+
# df[average_index] = [k / len(metric) for k in total]
|
| 385 |
+
# for k in range(len(df["TeamId"])):
|
| 386 |
+
# overall_total[k] += df[average_index][k]
|
| 387 |
+
|
| 388 |
+
# df["Average (all 4 datatsets)"] = [k / len(task) for k in overall_total]
|
| 389 |
|
| 390 |
+
# df = pd.DataFrame(df)
|
| 391 |
+
# for col in df.select_dtypes(include=['float64', 'int64']).columns:
|
| 392 |
+
# df[col] = df[col].apply(lambda x: f"{x:.4f}")
|
| 393 |
+
# st.dataframe(df,use_container_width=True)
|
| 394 |
|
| 395 |
st.markdown("""
|
| 396 |
🔗 To register for AEOLLM task, you can visit the following link and choose our AEOLLM task: [https://research.nii.ac.jp/ntcir/ntcir-18/howto.html](https://research.nii.ac.jp/ntcir/ntcir-18/howto.html).
|