Spaces:

mli-will
/

rbeval

Build error

@@ -7,6 +7,8 @@ from dacite import from_dict
 from rbeval.plot.data import EvalGroup, get_samples
 from rbeval.plot.score_cdf import (
     plot_with_data,
     get_plot_data,
     plot_cfgs,
@@ -24,7 +26,9 @@ def cached_samples(dir: Path, name_filter: Optional[str]) -> List[EvalGroup]:
 @st.cache_data
-def cached_score_cdf(dir: Path, name_filter: Optional[str]):
     samples = cached_samples(dir, name_filter)
     cfgs = plot_cfgs()
     data = [get_plot_data(cfg, samples) for cfg in cfgs]
@@ -43,17 +47,41 @@ def cache_compare(
     return grouped_dict, base_name, comp_name
 def main():
     parser = argparse.ArgumentParser(description="rbeval dashboard")
     parser.add_argument("--evals", type=str, default="./lmo-fake", required=False)
     args, _rest = parser.parse_known_args()
     eval_dir = Path(args.evals)
     # Show all the models
     st.set_page_config(layout="wide")
     score_cdf_data, cfgs = cached_score_cdf(eval_dir, None)
-    for data, cfg in zip(score_cdf_data, cfgs):
-        figs = plot_with_data(cfg, data)
-        with st.expander(cfg.name):
             for fig in figs:
                 st.altair_chart(fig.chart)  # type: ignore
@@ -64,23 +92,27 @@ def main():
             for m in group.model_evals
         ]
     )
-    base_model = st.selectbox("Base model", model_names)
-    compare_model = st.selectbox("Compare model", model_names)
-    st.text(f"Comparing {base_model} with {compare_model}")
-    if base_model and compare_model:
-        if base_model == compare_model:
-            st.text("Base and compare models are the same")
-            return
-        grouped, base_name, comp_name = cache_compare(
-            eval_dir, None, base_model, compare_model
-        )
-        grouped = {
-            k: [from_dict(model_comp.Scores, vi) for vi in v]
-            for k, v in grouped.items()
-        }
-        for fig in model_comp.get_figures(grouped, base_name, comp_name):
-            st.text(fig.name)
-            st.altair_chart(fig.chart)  # type: ignore
 if __name__ == "__main__":

 from rbeval.plot.data import EvalGroup, get_samples
 from rbeval.plot.score_cdf import (
+    CdfPlotConfig,
+    PlotData,
     plot_with_data,
     get_plot_data,
     plot_cfgs,
 @st.cache_data
+def cached_score_cdf(
+    dir: Path, name_filter: Optional[str]
+) -> tuple[List[PlotData], List[CdfPlotConfig]]:
     samples = cached_samples(dir, name_filter)
     cfgs = plot_cfgs()
     data = [get_plot_data(cfg, samples) for cfg in cfgs]
     return grouped_dict, base_name, comp_name
+def filter_for_group(data: List[PlotData], group: str) -> List[PlotData]:
+    return [
+        PlotData(
+            renorm=[df for df in d.renorm if df["group"].iloc[0] == group],
+            norenorm=[df for df in d.norenorm if df["group"].iloc[0] == group],
+        )
+        for d in data
+    ]
+def get_group_names(data: List[PlotData]) -> List[str]:
+    return sorted(set([df["group"].iloc[0] for d in data for df in d.renorm]))
 def main():
     parser = argparse.ArgumentParser(description="rbeval dashboard")
     parser.add_argument("--evals", type=str, default="./lmo-fake", required=False)
     args, _rest = parser.parse_known_args()
     eval_dir = Path(args.evals)
     # Show all the models
     st.set_page_config(layout="wide")
     score_cdf_data, cfgs = cached_score_cdf(eval_dir, None)
+    group_names = sorted([g.name for g in cached_samples(eval_dir, None)])
+    renormed = st.toggle("Renormalize Probabilities", True)
+    st.subheader("Model Performance Curves")
+    for group in group_names:
+        group_data = filter_for_group(score_cdf_data, group)
+        with st.expander(group):
+            figs = [
+                fig
+                for data, cdf in zip(group_data, cfgs)
+                for fig in plot_with_data(cdf, data, renormed)
+            ]
             for fig in figs:
                 st.altair_chart(fig.chart)  # type: ignore
             for m in group.model_evals
         ]
     )
+    with st.form("comp"):
+        st.subheader("Model Comparison Tool")
+        base_model = st.selectbox("Base model", model_names)
+        compare_model = st.selectbox("Compare model", model_names)
+        st.text(f"Comparing {base_model} with {compare_model}")
+        submitted = st.form_submit_button("Compare")
+        if base_model and compare_model and submitted:
+            print("Computing comparisons")
+            if base_model == compare_model:
+                st.text("Base and compare models are the same")
+                return
+            grouped, base_name, comp_name = cache_compare(
+                eval_dir, None, base_model, compare_model
+            )
+            grouped = {
+                k: [from_dict(model_comp.Scores, vi) for vi in v]
+                for k, v in grouped.items()
+            }
+            for fig in model_comp.get_figures(grouped, base_name, comp_name):
+                st.text(fig.name)
+                st.altair_chart(fig.chart)  # type: ignore
 if __name__ == "__main__":

src/rbeval/plot/data.py CHANGED Viewed

@@ -151,3 +151,4 @@ class Figure:
         | alt.ConcatChart
         | alt.VConcatChart
     )

         | alt.ConcatChart
         | alt.VConcatChart
     )
+    group: Optional[str] = None

src/rbeval/plot/score_cdf.py CHANGED Viewed

@@ -25,7 +25,8 @@ def score_cdf(samples: List[EvalGroup], args: List[str]) -> List[Figure]:
     return [
         a
         for cfg in plot_cfgs()
-        for a in plot_with_data(cfg, get_plot_data(cfg, samples))
     ]
@@ -59,32 +60,36 @@ def get_plot_data(
 def plot_with_data(
     cfg: "CdfPlotConfig",
     data: PlotData,
 ) -> List[Figure]:
     figures: List[Figure] = []
-    for renorm, group_dfs in zip([True, False], [data.renorm, data.norenorm]):
-        for df in group_dfs:
-            group_name: str = str(df["group"].iloc[0])  # type: ignore
-            selection = alt.selection_point(fields=["label"], bind="legend")  # type: ignore
-            chart = (
-                alt.Chart(df)  # type: ignore
-                .mark_line()
-                .encode(
-                    x=alt.X("x:Q", title=cfg.xlabel),
-                    y=alt.Y("y:Q", title=cfg.ylabel),
-                    color=alt.Color("label:N", legend=alt.Legend(symbolOpacity=1.0)),
-                    opacity=alt.condition(  # type: ignore
-                        selection,
-                        alt.Opacity("fewshot:O"),
-                        alt.value(0.1),  # type: ignore
-                    ),
-                )
-                .properties(title=cfg.title(group_name, renorm), width=800, height=400)
-                .resolve_legend(color="independent")
-                .resolve_axis(y="independent", x="independent")
-                .add_params(selection)
-                .interactive()
             )
-            figures.append(Figure(name=f"{group_name} {cfg.name}", chart=chart))
     return figures

     return [
         a
         for cfg in plot_cfgs()
+        for renorm in [True, False]
+        for a in plot_with_data(cfg, get_plot_data(cfg, samples), renorm)
     ]
 def plot_with_data(
     cfg: "CdfPlotConfig",
     data: PlotData,
+    renorm: bool = True,
 ) -> List[Figure]:
     figures: List[Figure] = []
+    group_dfs = data.renorm if renorm else data.norenorm
+    for df in group_dfs:
+        group_name: str = str(df["group"].iloc[0])  # type: ignore
+        label_selection = alt.selection_point(fields=["label"], bind="legend")  # type: ignore
+        fs_selection = alt.selection_point(fields=["fewshot"], bind="legend")  # type: ignore
+        chart = (
+            alt.Chart(df)  # type: ignore
+            .mark_line()
+            .encode(
+                x=alt.X("x:Q", title=cfg.xlabel),
+                y=alt.Y("y:Q", title=cfg.ylabel),
+                color=alt.Color("label:N", legend=alt.Legend(symbolOpacity=1.0)),
+                opacity=alt.condition(  # type: ignore
+                    label_selection & fs_selection,
+                    alt.Opacity("fewshot:O"),
+                    alt.value(0.0),  # type: ignore
+                ),
             )
+            .properties(title=cfg.title(group_name, renorm), width=800, height=400)
+            .resolve_legend(color="independent")
+            .resolve_axis(y="independent", x="independent")
+            .add_params(fs_selection, label_selection)
+            .interactive()
+        )
+        figures.append(
+            Figure(name=f"{group_name} {cfg.name}", chart=chart, group=group_name)
+        )
     return figures