Upload folder using huggingface_hub
Browse files- src/streamlit_app.py +77 -6
src/streamlit_app.py
CHANGED
|
@@ -368,10 +368,33 @@ with tab7:
|
|
| 368 |
]
|
| 369 |
|
| 370 |
df_evaluators = pd.DataFrame(data)
|
| 371 |
-
|
| 372 |
-
st.
|
| 373 |
-
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
st.write("### Summary")
|
| 376 |
st.write(f"- **Total Evaluators:** {df_evaluators['Evaluator ID'].nunique()}")
|
| 377 |
st.write(f"- **Total Contributions:** {df_evaluators['Contributions'].sum()}")
|
|
@@ -403,8 +426,56 @@ with tab7:
|
|
| 403 |
]
|
| 404 |
|
| 405 |
df_results = pd.DataFrame(results_data)
|
| 406 |
-
|
| 407 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
# Narrative summary
|
| 409 |
st.markdown("""
|
| 410 |
### Key Takeaways
|
|
|
|
| 368 |
]
|
| 369 |
|
| 370 |
df_evaluators = pd.DataFrame(data)
|
| 371 |
+
# Show contributions as charts (by evaluator and aggregated by language)
|
| 372 |
+
st.markdown("**Contributions by evaluator**")
|
| 373 |
+
df_evals_sorted = df_evaluators.sort_values('Contributions', ascending=False)
|
| 374 |
+
|
| 375 |
+
chart_evaluator = alt.Chart(df_evals_sorted).mark_bar().encode(
|
| 376 |
+
x=alt.X('Evaluator ID:N', sort=df_evals_sorted['Evaluator ID'].tolist(), title='Evaluator ID'),
|
| 377 |
+
y=alt.Y('Contributions:Q', title='Contributions'),
|
| 378 |
+
color=alt.Color('Languages:N', legend=alt.Legend(title='Language')),
|
| 379 |
+
tooltip=['Evaluator ID', 'Contributions', 'Languages']
|
| 380 |
+
).properties(height=300)
|
| 381 |
+
|
| 382 |
+
st.altair_chart(chart_evaluator, use_container_width=True)
|
| 383 |
+
|
| 384 |
+
st.markdown("**Total contributions by language**")
|
| 385 |
+
df_lang = df_evaluators.groupby('Languages', as_index=False).sum()
|
| 386 |
+
chart_lang = alt.Chart(df_lang).mark_bar().encode(
|
| 387 |
+
x=alt.X('Languages:N', sort='-y', title='Language'),
|
| 388 |
+
y=alt.Y('Contributions:Q', title='Total Contributions'),
|
| 389 |
+
tooltip=['Languages', 'Contributions']
|
| 390 |
+
).properties(height=300)
|
| 391 |
+
|
| 392 |
+
st.altair_chart(chart_lang, use_container_width=True)
|
| 393 |
+
|
| 394 |
+
# Optional: also show totals and raw table inside an expander
|
| 395 |
+
with st.expander("View raw evaluator table"):
|
| 396 |
+
st.dataframe(df_evaluators, width="stretch")
|
| 397 |
+
|
| 398 |
st.write("### Summary")
|
| 399 |
st.write(f"- **Total Evaluators:** {df_evaluators['Evaluator ID'].nunique()}")
|
| 400 |
st.write(f"- **Total Contributions:** {df_evaluators['Contributions'].sum()}")
|
|
|
|
| 426 |
]
|
| 427 |
|
| 428 |
df_results = pd.DataFrame(results_data)
|
| 429 |
+
# Visualize results with charts
|
| 430 |
+
# Grouped bars for Accuracy and Meaning per language
|
| 431 |
+
df_long = df_results.melt(id_vars=['Language', 'Audios Evaluated'], value_vars=['Accuracy', 'Meaning'],
|
| 432 |
+
var_name='Metric', value_name='Score')
|
| 433 |
+
|
| 434 |
+
# Use xOffset for grouped bars when available
|
| 435 |
+
try:
|
| 436 |
+
chart_metrics = alt.Chart(df_long).mark_bar().encode(
|
| 437 |
+
x=alt.X('Language:N', sort=df_results['Language'].tolist(), title='Language'),
|
| 438 |
+
y=alt.Y('Score:Q', title='Score (1-5)'),
|
| 439 |
+
color=alt.Color('Metric:N', legend=alt.Legend(title='Metric')),
|
| 440 |
+
tooltip=['Language', 'Metric', 'Score'],
|
| 441 |
+
xOffset='Metric:N'
|
| 442 |
+
).properties(height=360)
|
| 443 |
+
except Exception:
|
| 444 |
+
# Fallback when xOffset is not supported: side-by-side by using column
|
| 445 |
+
chart_metrics = alt.Chart(df_long).mark_bar().encode(
|
| 446 |
+
x=alt.X('Language:N', sort=df_results['Language'].tolist(), title='Language'),
|
| 447 |
+
y=alt.Y('Score:Q', title='Score (1-5)'),
|
| 448 |
+
color=alt.Color('Metric:N', legend=alt.Legend(title='Metric')),
|
| 449 |
+
tooltip=['Language', 'Metric', 'Score']
|
| 450 |
+
).properties(height=360)
|
| 451 |
+
|
| 452 |
+
st.altair_chart(chart_metrics, use_container_width=True)
|
| 453 |
+
|
| 454 |
+
# Scatter: Accuracy vs Audios Evaluated (size / color by language)
|
| 455 |
+
chart_scatter = alt.Chart(df_results).mark_circle(size=120).encode(
|
| 456 |
+
x=alt.X('Audios Evaluated:Q', title='Audios Evaluated'),
|
| 457 |
+
y=alt.Y('Accuracy:Q', title='Accuracy'),
|
| 458 |
+
color=alt.Color('Language:N', legend=None),
|
| 459 |
+
tooltip=['Language', 'Audios Evaluated', 'Accuracy', 'Meaning']
|
| 460 |
+
).properties(height=360)
|
| 461 |
+
|
| 462 |
+
st.markdown("**Accuracy vs. Dataset Size**")
|
| 463 |
+
st.altair_chart(chart_scatter, use_container_width=True)
|
| 464 |
+
|
| 465 |
+
# Audios evaluated per language (bar)
|
| 466 |
+
chart_audios = alt.Chart(df_results).mark_bar().encode(
|
| 467 |
+
x=alt.X('Language:N', sort=df_results['Language'].tolist(), title='Language'),
|
| 468 |
+
y=alt.Y('Audios Evaluated:Q', title='Audios Evaluated'),
|
| 469 |
+
tooltip=['Language', 'Audios Evaluated']
|
| 470 |
+
).properties(height=320)
|
| 471 |
+
|
| 472 |
+
st.markdown("**Number of audios evaluated by language**")
|
| 473 |
+
st.altair_chart(chart_audios, use_container_width=True)
|
| 474 |
+
|
| 475 |
+
# Optional: raw table in expander
|
| 476 |
+
with st.expander('View raw findings table'):
|
| 477 |
+
st.dataframe(df_results, width='stretch')
|
| 478 |
+
|
| 479 |
# Narrative summary
|
| 480 |
st.markdown("""
|
| 481 |
### Key Takeaways
|