Update app.py
Browse files
app.py
CHANGED
|
@@ -47,24 +47,50 @@ st.dataframe(df, use_container_width=True)
|
|
| 47 |
# =====================
|
| 48 |
# SUMMARY STATISTICS
|
| 49 |
# =====================
|
| 50 |
-
st.subheader("๐ Statistical Summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
metric_cols = [
|
| 53 |
-
"total_nodes", "num_classes", "num_properties", "num_individuals",
|
| 54 |
-
"avg_depth", "avg_breadth", "Processing Time (s)"
|
| 55 |
-
]
|
| 56 |
|
|
|
|
|
|
|
|
|
|
| 57 |
df_metrics = df[metric_cols]
|
| 58 |
-
|
| 59 |
summary = df_metrics.describe().T
|
| 60 |
summary["missing"] = df_metrics.isnull().sum()
|
| 61 |
summary = summary.round(2)
|
| 62 |
-
|
| 63 |
slight_summary = summary[['mean', 'std', '25%', '50%', '75%', 'max']]
|
| 64 |
-
|
| 65 |
st.dataframe(slight_summary, use_container_width=True)
|
| 66 |
|
| 67 |
|
|
|
|
|
|
|
| 68 |
# =====================
|
| 69 |
# COMPLEXITY SCORE
|
| 70 |
# =====================
|
|
@@ -189,4 +215,6 @@ for j in range(i + 1, len(axes)):
|
|
| 189 |
axes[j].axis("off")
|
| 190 |
|
| 191 |
plt.tight_layout(rect=[0, 0, 1, 1])
|
| 192 |
-
st.pyplot(fig)
|
|
|
|
|
|
|
|
|
| 47 |
# =====================
|
| 48 |
# SUMMARY STATISTICS
|
| 49 |
# =====================
|
| 50 |
+
st.subheader("๐ Statistical Summary")
|
| 51 |
+
|
| 52 |
+
st.write("#### Distribution of ontologies per domain.")
|
| 53 |
+
domain_stats = df["Domain"].value_counts().reset_index()
|
| 54 |
+
domain_stats.columns = ["Domain", "count"]
|
| 55 |
+
domain_stats["percentage"] = (domain_stats["count"] / domain_stats["count"].sum()) * 100
|
| 56 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
| 57 |
+
sns.set_style("whitegrid")
|
| 58 |
+
ax = sns.barplot(
|
| 59 |
+
data=domain_stats,
|
| 60 |
+
x="Domain",
|
| 61 |
+
y="count",
|
| 62 |
+
palette="viridis"
|
| 63 |
+
)
|
| 64 |
+
for i, row in domain_stats.iterrows():
|
| 65 |
+
ax.text(
|
| 66 |
+
i,
|
| 67 |
+
row["count"] + 0.02 * domain_stats["count"].max(),
|
| 68 |
+
f"{row['percentage']:.1f}%",
|
| 69 |
+
ha="center",
|
| 70 |
+
fontsize=10,
|
| 71 |
+
fontweight="bold"
|
| 72 |
+
)
|
| 73 |
+
plt.title("Number and Percentage of Ontologies per Domain", fontsize=16)
|
| 74 |
+
plt.xlabel("Domain", fontsize=14)
|
| 75 |
+
plt.ylabel("Number of Ontologies", fontsize=14)
|
| 76 |
+
plt.xticks(rotation=45, ha="right")
|
| 77 |
+
plt.tight_layout()
|
| 78 |
+
st.pyplot(fig)
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
st.write("#### Statistical summary of key metrics.")
|
| 82 |
+
metric_cols = ["total_nodes", "num_classes", "num_properties", "num_individuals",
|
| 83 |
+
"avg_depth", "avg_breadth", "Processing Time (s)"]
|
| 84 |
df_metrics = df[metric_cols]
|
|
|
|
| 85 |
summary = df_metrics.describe().T
|
| 86 |
summary["missing"] = df_metrics.isnull().sum()
|
| 87 |
summary = summary.round(2)
|
|
|
|
| 88 |
slight_summary = summary[['mean', 'std', '25%', '50%', '75%', 'max']]
|
|
|
|
| 89 |
st.dataframe(slight_summary, use_container_width=True)
|
| 90 |
|
| 91 |
|
| 92 |
+
st.subheader("๐ Statistical Summary for All Metrics")
|
| 93 |
+
|
| 94 |
# =====================
|
| 95 |
# COMPLEXITY SCORE
|
| 96 |
# =====================
|
|
|
|
| 215 |
axes[j].axis("off")
|
| 216 |
|
| 217 |
plt.tight_layout(rect=[0, 0, 1, 1])
|
| 218 |
+
st.pyplot(fig)
|
| 219 |
+
|
| 220 |
+
st.write("\n\n")
|