Spaces:
Runtime error
Runtime error
Commit ·
bad3784
1
Parent(s): 3d11374
added hightlights
Browse files
app.py
CHANGED
|
@@ -209,16 +209,30 @@ st.write(f"""We evaluate the robustness of the LLM by assessing the variation in
|
|
| 209 |
# st.write(f"ProbTypos: {other_info_dict['ProbTypos_description']}")
|
| 210 |
# st.write(f"MaxTypo: {other_info_dict['MaxTypo_description']}")
|
| 211 |
|
|
|
|
| 212 |
for t_opt in options:
|
| 213 |
st.write('Prompt used : ', t_opt)
|
| 214 |
t_pert_df_global = result_processor_obj_dict[t_opt].get_global_perturbers_df()
|
|
|
|
|
|
|
| 215 |
|
| 216 |
t_pert_fig = px.line(t_pert_df_global, x="Levels", y="Performance", color='Perturbation family')
|
| 217 |
t_pert_fig.update_xaxes(tickmode='linear', dtick=1)
|
| 218 |
-
|
| 219 |
|
| 220 |
st.plotly_chart(t_pert_fig, theme="streamlit", use_container_width=True)
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
# %%
|
| 224 |
st.header("Characteristic results")
|
|
|
|
| 209 |
# st.write(f"ProbTypos: {other_info_dict['ProbTypos_description']}")
|
| 210 |
# st.write(f"MaxTypo: {other_info_dict['MaxTypo_description']}")
|
| 211 |
|
| 212 |
+
t_pert_df_global_merged = []
|
| 213 |
for t_opt in options:
|
| 214 |
st.write('Prompt used : ', t_opt)
|
| 215 |
t_pert_df_global = result_processor_obj_dict[t_opt].get_global_perturbers_df()
|
| 216 |
+
t_pert_df_global['Prompt option'] = processed_t_opt_dict[t_opt]
|
| 217 |
+
t_pert_df_global['Prompt'] = t_pert_df_global['Perturbation family'] + ' - ' + processed_t_opt_dict[t_opt]
|
| 218 |
|
| 219 |
t_pert_fig = px.line(t_pert_df_global, x="Levels", y="Performance", color='Perturbation family')
|
| 220 |
t_pert_fig.update_xaxes(tickmode='linear', dtick=1)
|
| 221 |
+
t_pert_df_global_merged += [t_pert_df_global]
|
| 222 |
|
| 223 |
st.plotly_chart(t_pert_fig, theme="streamlit", use_container_width=True)
|
| 224 |
|
| 225 |
+
st.write('---')
|
| 226 |
+
st.write('The following plot illustrates the variation in performance across perturbation families and prompts.')
|
| 227 |
+
t_pert_df_global_merged_df = pd.concat(t_pert_df_global_merged, axis=0)
|
| 228 |
+
t_pert_df_global_merged_df_2 = t_pert_df_global_merged_df[['Prompt option', 'Perturbation family','Levels', 'Performance']]
|
| 229 |
+
t_pert_df_global_merged_df_2.set_index(t_pert_df_global_merged_df_2.columns[0])
|
| 230 |
+
# st.dataframe(t_pert_df_global_merged_df_2)
|
| 231 |
+
|
| 232 |
+
temp_merged_fig = px.line(t_pert_df_global_merged_df, x="Levels", y="Performance", color='Prompt')
|
| 233 |
+
temp_merged_fig.update_xaxes(tickmode='linear', dtick=1)
|
| 234 |
+
st.plotly_chart(temp_merged_fig, theme="streamlit", use_container_width=True)
|
| 235 |
+
|
| 236 |
|
| 237 |
# %%
|
| 238 |
st.header("Characteristic results")
|