Spaces:
Runtime error
Runtime error
Mariusz Kossakowski commited on
Commit ·
10bdf14
1
Parent(s): 90966f7
Add class distribution dataframe to aspectemo dataset
Browse files
clarin_datasets/aspectemo_dataset.py
CHANGED
|
@@ -48,6 +48,7 @@ class AspectEmoDataset(DatasetToShow):
|
|
| 48 |
header = st.container()
|
| 49 |
description = st.container()
|
| 50 |
dataframe_head = st.container()
|
|
|
|
| 51 |
|
| 52 |
with header:
|
| 53 |
st.title(self.dataset_name)
|
|
@@ -62,3 +63,25 @@ class AspectEmoDataset(DatasetToShow):
|
|
| 62 |
st.header("First 10 observations of the dataset")
|
| 63 |
st.dataframe(df_to_show)
|
| 64 |
st.text_area(label="Latex code", value=df_to_show.style.to_latex())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
header = st.container()
|
| 49 |
description = st.container()
|
| 50 |
dataframe_head = st.container()
|
| 51 |
+
class_distribution = st.container()
|
| 52 |
|
| 53 |
with header:
|
| 54 |
st.title(self.dataset_name)
|
|
|
|
| 63 |
st.header("First 10 observations of the dataset")
|
| 64 |
st.dataframe(df_to_show)
|
| 65 |
st.text_area(label="Latex code", value=df_to_show.style.to_latex())
|
| 66 |
+
|
| 67 |
+
class_distribution_dict = {}
|
| 68 |
+
for subset in self.subsets:
|
| 69 |
+
all_labels = self.data_dict[subset]["labels"].tolist()
|
| 70 |
+
all_labels = [x for subarray in all_labels for x in subarray if x != 0]
|
| 71 |
+
all_labels = pd.Series(all_labels)
|
| 72 |
+
class_distribution_dict[subset] = (
|
| 73 |
+
all_labels.value_counts(normalize=True)
|
| 74 |
+
.sort_index()
|
| 75 |
+
.reset_index()
|
| 76 |
+
.rename({"index": "class", 0: subset}, axis="columns")
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
class_distribution_df = pd.merge(
|
| 80 |
+
class_distribution_dict["train"],
|
| 81 |
+
class_distribution_dict["test"],
|
| 82 |
+
on="class",
|
| 83 |
+
)
|
| 84 |
+
with class_distribution:
|
| 85 |
+
st.header("Class distribution in each subset (without '0')")
|
| 86 |
+
st.dataframe(class_distribution_df)
|
| 87 |
+
st.text_area(label="LaTeX code", value=class_distribution_df.style.to_latex())
|