import os import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import gaussian_kde import streamlit as st import math # ===================== # STREAMLIT CONFIG # ===================== os.environ["STREAMLIT_CONFIG_DIR"] = os.path.join(os.getcwd(), ".streamlit") os.makedirs(os.environ["STREAMLIT_CONFIG_DIR"], exist_ok=True) st.set_page_config(page_title="OntoLearner Benchmark") st.markdown( """ """, unsafe_allow_html=True ) st.title("OntoLearner Benchmark – Ontology Metrics Dashboard") # Create a container at the top with st.container(): st.markdown( """
""", unsafe_allow_html=True ) # st.subheader("ℹ️ About OntoLearner") st.markdown( """ **OntoLearner** is an automated ontology learning framework designed to extract, structure, and enrich knowledge from textual data. It provides a modular pipeline combining NLP, machine learning, and ontology engineering principles to generate OWL ontologies with high interpretability and consistency. """ ) st.markdown("---") # ===================== # LOAD DATA # ===================== df = pd.read_excel("metrics.xlsx") st.subheader("πŸ“˜ Ontology Metrics Table") st.dataframe(df, use_container_width=True) # ===================== # SUMMARY STATISTICS # ===================== st.subheader("πŸ“Š Statistical Summary") st.write("#### Distribution of ontologies per domain.") domain_stats = df["Domain"].value_counts().reset_index() domain_stats.columns = ["Domain", "count"] domain_stats["percentage"] = (domain_stats["count"] / domain_stats["count"].sum()) * 100 fig, ax = plt.subplots(figsize=(12, 6)) sns.set_style("whitegrid") ax = sns.barplot( data=domain_stats, x="Domain", y="count", palette="viridis" ) ax.set_xlabel("") ax.set_ylabel("") for i, row in domain_stats.iterrows(): ax.text( i, row["count"] + 0.02 * domain_stats["count"].max(), f"{row['count']}\n({row['percentage']:.1f}%)", ha="center", fontsize=9, ) plt.title("Number and Percentage of Ontologies per Domain", fontsize=14) plt.xlabel("Domain", fontsize=12) plt.ylabel("Number of Ontologies", fontsize=12) plt.xticks(rotation=90, ha="right") plt.tight_layout() st.pyplot(fig) st.write("#### Statistical summary of key metrics.") metric_cols = ["total_nodes", "num_classes", "num_properties", "num_individuals", "avg_depth", "avg_breadth", "Processing Time (s)"] df_metrics = df[metric_cols] summary = df_metrics.describe().T summary["missing"] = df_metrics.isnull().sum() summary = summary.round(2) slight_summary = summary[['mean', 'std', '25%', '50%', '75%', 'max']] st.dataframe(slight_summary, use_container_width=True) # ===================== # COMPLEXITY SCORE # ===================== st.subheader("βš™οΈ Complexity Score Computation") metrics = [ "total_nodes", "total_edges", "num_root_nodes", "num_leaf_nodes", "num_classes", "num_properties", "num_individuals", "max_depth", "min_depth", "avg_depth", "depth_variance", "max_breadth", "min_breadth", "avg_breadth", "breadth_variance", "num_term_types", "num_taxonomic_relations", "num_non_taxonomic_relations", "avg_terms", ] graph_metrics = ["total_nodes", "total_edges", "num_root_nodes", "num_leaf_nodes"] coverage_metrics = ["num_classes", "num_properties", "num_individuals"] hierarchy_metrics = ["max_depth", "min_depth", "avg_depth", "depth_variance"] breadth_metrics = ["max_breadth", "min_breadth", "avg_breadth", "breadth_variance"] llms4ol_metrics = ["num_term_types", "num_taxonomic_relations", "num_non_taxonomic_relations", "avg_terms"] weights = {} for c in metrics: if c in graph_metrics: weights[c] = 0.3 elif c in coverage_metrics: weights[c] = 0.25 elif c in hierarchy_metrics: weights[c] = 0.10 elif c in breadth_metrics: weights[c] = 0.20 elif c in llms4ol_metrics: weights[c] = 0.15 def log_normalize(x): return np.log1p(x) def complexity_score(onto_metric, a=0.4, b=6.0, eps=1e-12): norm_metric = {metric: log_normalize(onto_metric[metric]) for metric in metrics} weighted_norm = {m: norm_metric[m] * weights[m] for m in weights} c_score = sum(weighted_norm.values()) / sum(weights.values()) c_score = 1.0 / (1.0 + np.exp(-a * (c_score - b) + eps)) return c_score cs = [complexity_score(dict(row)) for _, row in df.iterrows()] df_out = df.copy() df_out["complexity_score"] = cs df_out["complexity_rank"] = df_out["complexity_score"].rank(method="min", ascending=False).astype(int) st.write("The following table represents the ontologies with complexity score and their ranking based on this score.") st.dataframe(df_out, use_container_width=True) # ===================== # VISUALIZATION # ===================== st.subheader("πŸ“ˆ Complexity Score Visualizations") top_n_val = 15 fig = plt.figure(figsize=(14, 11)) # PANEL 1 β€” TOP N BY COMPLEXITY ax1 = plt.subplot2grid((3, 2), (0, 0), colspan=2) topn = df_out.sort_values("complexity_score", ascending=False).head(top_n_val) ax1.barh(topn["Ontology ID"].astype(str), topn["complexity_score"], color="#4C72B0") ax1.invert_yaxis() ax1.set_title(f"Top {top_n_val} Ontologies by Complexity Score") ax1.set_xlabel("Complexity Score", fontsize=12) # PANEL 2 β€” PROCESSING TIME VS COMPLEXITY ax2 = plt.subplot2grid((3, 2), (1, 0)) ax2.scatter(df_out["complexity_score"], df_out["Processing Time (s)"], alpha=0.4, s=60, edgecolor="black", linewidth=0.5, color="#DD8452") x = df_out["complexity_score"].values y = df_out["Processing Time (s)"].values coeffs = np.polyfit(x, y, 4) poly = np.poly1d(coeffs) xs = np.linspace(x.min(), x.max(), 300) ax2.plot(xs, poly(xs), color="#DD8452", linewidth=1.5) ax2.set_title("Processing Time vs Complexity") ax2.set_xlabel("Complexity Score", fontsize=12) ax2.set_ylabel("Processing Time (s)", fontsize=12) # PANEL 3 β€” DISTRIBUTION ax3 = plt.subplot2grid((3, 2), (1, 1)) ax3.hist(df_out["complexity_score"], bins=20, color="#55A868", edgecolor="black", alpha=0.8, density=True) values = df_out["complexity_score"].dropna().values kde = gaussian_kde(values) xx = np.linspace(values.min(), values.max(), 1000) ax3.plot(xx, kde(xx), linewidth=1.5, color="green") ax3.set_title("Distribution of Complexity Scores") ax3.set_xlabel("Complexity Score", fontsize=12) ax3.set_ylabel("Density", fontsize=12) plt.tight_layout(rect=[0, 0, 1, 0.97]) st.pyplot(fig) # ===================== # CORRELATIONS # ===================== st.subheader("πŸ“‘ Domain-Wise Correlations") domains = sorted(df["Domain"].unique()) n_domains = len(domains) n_rows = 2 n_cols = math.ceil(n_domains / n_rows) fig, axes = plt.subplots(n_rows, int(np.ceil(n_domains / 2)), figsize=(n_cols * 3.5, n_rows * 3.5)) axes = axes.flatten() for i, dom in enumerate(domains): sub = df[df["Domain"] == dom][metrics] corr = sub.corr() sns.heatmap(corr, cmap="coolwarm", square=True, cbar=False, linewidths=0.2, xticklabels=False, yticklabels=False, ax=axes[i]) axes[i].set_title(dom, fontsize=13) for j in range(i + 1, len(axes)): axes[j].axis("off") plt.tight_layout(rect=[0, 0, 1, 1]) st.pyplot(fig) st.write("\n\n") st.markdown( """ ## πŸ”— Useful Links - πŸ“¦ GitHub Repository: [sciknoworg/OntoLearner](https://github.com/sciknoworg/OntoLearner) - πŸ“š Documentation: [ontolearner.readthedocs.io](https://ontolearner.readthedocs.io/) - πŸ’‘ Acknowledgements: OntoLearner is developed and maintained by the **SciKnow Research Group**. Moreover: - If you encounter any issues or have questions, please submit them in the [GitHub issues tracker](https://github.com/sciknoworg/OntoLearner/issues). - If you find this repository helpful or use OntoLearner in your work or research, feel free to cite our publication: ```bibtex @inproceedings{babaei2023llms4ol, title={LLMs4OL: Large language models for ontology learning}, author={Babaei Giglou, Hamed and D’Souza, Jennifer and Auer, S{\"o}ren}, booktitle={International Semantic Web Conference}, pages={408--427}, year={2023}, organization={Springer} } ``` or: ```bibtex @software{babaei_giglou_2025_15399783, author = {Babaei Giglou, Hamed and D'Souza, Jennifer and Aioanei, Andrei and Mihindukulasooriya, Nandana and Auer, SΓΆren}, title = {OntoLearner: A Modular Python Library for Ontology Learning with LLMs}, month = may, year = 2025, publisher = {Zenodo}, version = {v1.3.0}, doi = {10.5281/zenodo.15399783}, url = {https://doi.org/10.5281/zenodo.15399783}, } ``` ------------ This OntoLearner is licensed under [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT). """ )