Spaces:

jordyvl
/

ece

Configuration error

App Files Files Community

jordyvl commited on Jun 9, 2022

Commit

efa98d8

1 Parent(s): 3f722df

app and ece done

Browse files

Files changed (2) hide show

app.py +16 -39
ece.py +4 -3

app.py CHANGED Viewed

@@ -7,6 +7,13 @@ import gradio as gr
 from evaluate.utils import launch_gradio_widget
 from ece import ECE
 sliders = [
     gr.Slider(0, 100, value=10, label="n_bins"),
@@ -44,16 +51,6 @@ Switch inputs and compute_fn
 """
 def reliability_plot(results):
-    #CE, calibrated_acc, empirical_acc, weights_ece
-    #{"ECE": ECE[0], "y_bar": ECE[1], "p_bar": ECE[2], "bin_freq": ECE[3]}
-    import matplotlib.pyplot as plt
-    import seaborn as sns
-    sns.set_style('white')
-    sns.set_context("paper", font_scale=1)  # 2
-    # plt.rcParams['figure.figsize'] = [10, 7]
-    plt.rcParams['figure.dpi'] = 300
     fig = plt.figure()
     ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
     ax2 = plt.subplot2grid((3, 1), (2, 0))
@@ -65,9 +62,10 @@ def reliability_plot(results):
     ]  # np.linspace(0, 1, n_bins)
     # if upper edge then minus binsize; same for center [but half]
     ax1.plot(
-        np.linspace(bin_range[0], bin_range[1], n_bins),
-        np.linspace(bin_range[0], bin_range[1], n_bins),
         color="darkgreen",
         ls="dotted",
         label="Perfect",
@@ -79,7 +77,7 @@ def reliability_plot(results):
     bin_freqs[anindices] = results["bin_freq"]
     ax2.hist(results["y_bar"], results["y_bar"], weights=bin_freqs)
-    widths = np.diff(results["y_bar"])
     for j, bin in enumerate(results["y_bar"]):
         perfect = results["y_bar"][j]
         empirical = results["p_bar"][j]
@@ -87,7 +85,7 @@ def reliability_plot(results):
         if np.isnan(empirical):
             continue
-        ax1.bar([perfect], height=[empirical], width=-widths[j], align="edge", color="lightblue")
         if perfect == empirical:
             continue
@@ -137,10 +135,10 @@ def compute_and_plot(data, n_bins, bin_range, scheme, proxy, p):
     )
     plot = reliability_plot(results)
-    return results["ECE"], plt.gcf()
-outputs = [gr.outputs.Textbox(label="ECE"), gr.outputs.Plot(label="Reliability diagram")]
 iface = gr.Interface(
     fn=compute_and_plot,
@@ -148,26 +146,5 @@ iface = gr.Interface(
     outputs=outputs,
     description=metric.info.description,
     article=metric.info.citation,
-    # examples=sample_data
-)
-# ValueError: Examples argument must either be a directory or a nested list, where each sublist represents a set of inputs.
-iface.launch()
-# dict = {"ECE": ECE[0], "y_bar": ECE[1], "p_bar": ECE[2], "bin_freq": ECE[3]}
-# references=[0, 1, 2], predictions=)
-# https://gradio.app/getting_started/#multiple-inputs-and-outputs
-## fix with sliders for all kwargs
-"""
-DEV: #might be nice to also plot reliability diagram
-have sliders for kwargs :)
-metric = ECE()
-"""

 from evaluate.utils import launch_gradio_widget
 from ece import ECE
+import matplotlib.pyplot as plt
+import seaborn as sns
+sns.set_style('white')
+sns.set_context("paper", font_scale=1)  # 2
+# plt.rcParams['figure.figsize'] = [10, 7]
+plt.rcParams['figure.dpi'] = 300
+plt.switch_backend('agg') #; https://stackoverflow.com/questions/14694408/runtimeerror-main-thread-is-not-in-main-loop
 sliders = [
     gr.Slider(0, 100, value=10, label="n_bins"),
 """
 def reliability_plot(results):
     fig = plt.figure()
     ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
     ax2 = plt.subplot2grid((3, 1), (2, 0))
     ]  # np.linspace(0, 1, n_bins)
     # if upper edge then minus binsize; same for center [but half]
+    ranged = np.linspace(bin_range[0], bin_range[1], n_bins)
     ax1.plot(
+        ranged,
+        ranged,
         color="darkgreen",
         ls="dotted",
         label="Perfect",
     bin_freqs[anindices] = results["bin_freq"]
     ax2.hist(results["y_bar"], results["y_bar"], weights=bin_freqs)
+    #widths = np.diff(results["y_bar"])
     for j, bin in enumerate(results["y_bar"]):
         perfect = results["y_bar"][j]
         empirical = results["p_bar"][j]
         if np.isnan(empirical):
             continue
+        ax1.bar([perfect], height=[empirical], width=-ranged[j], align="edge", color="lightblue")
         if perfect == empirical:
             continue
     )
     plot = reliability_plot(results)
+    return results["ECE"], plot #plt.gcf()
+outputs = [gr.outputs.Textbox(label="ECE"), gr.Plot(label="Reliability diagram")]
 iface = gr.Interface(
     fn=compute_and_plot,
     outputs=outputs,
     description=metric.info.description,
     article=metric.info.citation,
+    # examples=sample_data; # ValueError: Examples argument must either be a directory or a nested list, where each sublist represents a set of inputs.
+).launch()

ece.py CHANGED Viewed

@@ -80,7 +80,7 @@ BAD_WORDS_URL = ""
 def create_bins(n_bins=10, scheme="equal-range", bin_range=None, P=None):
     assert scheme in [
         "equal-range",
-        "equal-masss",
     ], f"This binning scheme {scheme} is not implemented yet"
     if bin_range is None:
@@ -106,8 +106,9 @@ def create_bins(n_bins=10, scheme="equal-range", bin_range=None, P=None):
         # rightmost entry per equal size group
         for cur_group in range(n_bins - 1):
             bin_upper_edges += [max(groups[cur_group])]
-        bin_upper_edges += [np.inf]  # always +1 for right edges
         bins = np.array(bin_upper_edges)
     return bins
@@ -201,7 +202,7 @@ def top_1_CE(Y, P, **kwargs):
         n_bins=kwargs["n_bins"], bin_range=kwargs["bin_range"], scheme=kwargs["scheme"], P=p_max
     )
     CE = CE_estimate(y_correct, p_max, bins=bins, proxy=kwargs["proxy"], detail=kwargs["detail"])
-    if self.detail:
         return {"ECE": CE[0], "y_bar": CE[1], "p_bar": CE[2], "bin_freq": CE[3], "p_bar_cont": np.mean(p_max,-1), "accuracy": np.mean(y_correct)}
     return CE

 def create_bins(n_bins=10, scheme="equal-range", bin_range=None, P=None):
     assert scheme in [
         "equal-range",
+        "equal-mass",
     ], f"This binning scheme {scheme} is not implemented yet"
     if bin_range is None:
         # rightmost entry per equal size group
         for cur_group in range(n_bins - 1):
             bin_upper_edges += [max(groups[cur_group])]
+        bin_upper_edges += [1.01] #[np.inf]  # always +1 for right edges
         bins = np.array(bin_upper_edges)
+        #OverflowError: cannot convert float infinity to integer
     return bins
         n_bins=kwargs["n_bins"], bin_range=kwargs["bin_range"], scheme=kwargs["scheme"], P=p_max
     )
     CE = CE_estimate(y_correct, p_max, bins=bins, proxy=kwargs["proxy"], detail=kwargs["detail"])
+    if kwargs["detail"]:
         return {"ECE": CE[0], "y_bar": CE[1], "p_bar": CE[2], "bin_freq": CE[3], "p_bar_cont": np.mean(p_max,-1), "accuracy": np.mean(y_correct)}
     return CE