Spaces:
Sleeping
Sleeping
Upload app.py
Browse files- src/app.py +106 -106
src/app.py
CHANGED
|
@@ -613,118 +613,118 @@ with tab_kg_exp:
|
|
| 613 |
st.caption("kg_nodesλ₯Ό νμνκ³ μμ λ
Έλμ μ°κ²° κ΄κ³λ₯Ό μκ°νν©λλ€. kg_edgesλ DuckDBλ‘ νμν λΆλΆλ§ 쿼리ν©λλ€.")
|
| 614 |
|
| 615 |
try:
|
|
|
|
| 616 |
with st.spinner("KG λ°μ΄ν° λ‘λ© μ€... (μ΅μ΄ 1ν ν μΊμλ©λλ€)"):
|
| 617 |
-
|
| 618 |
kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
|
| 619 |
enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
|
| 620 |
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 712 |
st.plotly_chart(
|
| 713 |
-
px.
|
| 714 |
-
title="Semantic Evidence
|
| 715 |
-
|
|
|
|
| 716 |
use_container_width=True)
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
px.bar(field_df, x="field", y="sem_ratio",
|
| 721 |
-
title="Semantic Evidence Rate by Field",
|
| 722 |
-
labels={"sem_ratio":"Evidence Rate","field":"Field"})
|
| 723 |
-
.update_layout(xaxis_tickangle=-40),
|
| 724 |
-
use_container_width=True)
|
| 725 |
-
|
| 726 |
-
except Exception as e:
|
| 727 |
-
st.error(str(e))
|
| 728 |
|
| 729 |
|
| 730 |
# βββ 6. GEOGRAPHIC MAP ββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 613 |
st.caption("kg_nodesλ₯Ό νμνκ³ μμ λ
Έλμ μ°κ²° κ΄κ³λ₯Ό μκ°νν©λλ€. kg_edgesλ DuckDBλ‘ νμν λΆλΆλ§ 쿼리ν©λλ€.")
|
| 614 |
|
| 615 |
try:
|
| 616 |
+
# spinnerλ λ‘λ©λ§, UIλ spinner λ°μ
|
| 617 |
with st.spinner("KG λ°μ΄ν° λ‘λ© μ€... (μ΅μ΄ 1ν ν μΊμλ©λλ€)"):
|
| 618 |
+
kg_nodes_exp = load_kg_nodes(data_dir_val)
|
| 619 |
kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
|
| 620 |
enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
|
| 621 |
|
| 622 |
+
# ββ μ 체 λ
Έλ/μ£μ§ νμ
λΆν¬
|
| 623 |
+
col_a, col_b = st.columns([1, 2])
|
| 624 |
+
with col_a:
|
| 625 |
+
st.subheader("Node Type Counts")
|
| 626 |
+
nt = kg_nodes_exp["node_type"].value_counts().reset_index()
|
| 627 |
+
nt.columns = ["node_type", "count"]
|
| 628 |
+
st.dataframe(nt, use_container_width=True, hide_index=True)
|
| 629 |
+
|
| 630 |
+
st.subheader("Edge Type Counts")
|
| 631 |
+
import duckdb as _ddb
|
| 632 |
+
et = _ddb.execute(f"""
|
| 633 |
+
SELECT edge_type, COUNT(*) AS count
|
| 634 |
+
FROM read_parquet('{kg_edges_path}')
|
| 635 |
+
GROUP BY edge_type ORDER BY count DESC
|
| 636 |
+
""").df()
|
| 637 |
+
st.dataframe(et, use_container_width=True, hide_index=True)
|
| 638 |
+
|
| 639 |
+
with col_b:
|
| 640 |
+
st.subheader("Node Type Distribution")
|
| 641 |
+
nt_fig = px.bar(nt, x="node_type", y="count", color="node_type",
|
| 642 |
+
color_discrete_map=NODE_TYPE_COLORS)
|
| 643 |
+
nt_fig.update_layout(showlegend=False, xaxis_title="", yaxis_title="Count")
|
| 644 |
+
st.plotly_chart(nt_fig, use_container_width=True)
|
| 645 |
+
|
| 646 |
+
# ββ Node Search & Ego Network
|
| 647 |
+
st.markdown("---")
|
| 648 |
+
st.subheader("Node Search & Ego Network")
|
| 649 |
+
exp_col1, exp_col2 = st.columns([1, 3])
|
| 650 |
+
with exp_col1:
|
| 651 |
+
type_options = ["(all)"] + sorted(kg_nodes_exp["node_type"].unique().tolist())
|
| 652 |
+
sel_type = st.selectbox("Filter by node type", type_options)
|
| 653 |
+
filtered_nodes = (kg_nodes_exp if sel_type == "(all)"
|
| 654 |
+
else kg_nodes_exp[kg_nodes_exp["node_type"] == sel_type])
|
| 655 |
+
search_q = st.text_input("Search node label / DOI")
|
| 656 |
+
if search_q:
|
| 657 |
+
filtered_nodes = filtered_nodes[
|
| 658 |
+
filtered_nodes["label"].str.contains(search_q, case=False, na=False) |
|
| 659 |
+
filtered_nodes["doi"].str.contains(search_q, case=False, na=False)
|
| 660 |
+
]
|
| 661 |
+
|
| 662 |
+
sample = filtered_nodes.head(100)
|
| 663 |
+
node_options = sample["node_id"].tolist()
|
| 664 |
+
if not node_options:
|
| 665 |
+
st.warning("κ²μ κ²°κ³Όκ° μμ΅λλ€.")
|
| 666 |
+
else:
|
| 667 |
+
sel_node_id = st.selectbox(
|
| 668 |
+
"Select node", node_options,
|
| 669 |
+
format_func=lambda nid: sample.loc[sample["node_id"] == nid, "label"].iloc[0][:60],
|
| 670 |
+
)
|
| 671 |
+
sel_node_info = sample[sample["node_id"] == sel_node_id].iloc[0]
|
| 672 |
+
st.markdown(f"**Type**: {sel_node_info.get('node_type', '')}")
|
| 673 |
+
st.markdown(f"**DOI**: {sel_node_info.get('doi', '') or '-'}")
|
| 674 |
+
st.markdown(f"**Publication**: {sel_node_info.get('publication_name', '') or '-'}")
|
| 675 |
+
st.markdown(f"**Group**: {sel_node_info.get('group', '') or '-'}")
|
| 676 |
+
st.markdown(f"**Cited by**: {fmt_num(sel_node_info.get('citedby_count', ''))}")
|
| 677 |
+
|
| 678 |
+
max_e = st.slider("Max edges shown", 20, 150, 60, key="kg_exp_max")
|
| 679 |
+
if st.button("Show ego network", key="kg_exp_show"):
|
| 680 |
+
with st.spinner("DuckDBλ‘ μ£μ§ 쿼리 μ€..."):
|
| 681 |
+
exp_edges = query_explorer_edges(sel_node_id, kg_edges_path, max_e)
|
| 682 |
+
if exp_edges.empty:
|
| 683 |
+
st.warning("μ°κ²°λ μ£μ§κ° μμ΅λλ€.")
|
| 684 |
+
else:
|
| 685 |
+
all_ids = set(exp_edges["source"].tolist()) | set(exp_edges["target"].tolist())
|
| 686 |
+
st.session_state["exp_nodes"] = kg_nodes_exp[kg_nodes_exp["node_id"].isin(all_ids)]
|
| 687 |
+
st.session_state["exp_edges"] = exp_edges
|
| 688 |
+
|
| 689 |
+
with exp_col2:
|
| 690 |
+
if "exp_nodes" in st.session_state:
|
| 691 |
+
en = st.session_state["exp_nodes"]
|
| 692 |
+
ee = st.session_state["exp_edges"]
|
| 693 |
+
st.caption(f"Nodes: {len(en)} | Edges: {len(ee)}")
|
| 694 |
+
st.caption("π± Scroll: zoom | Drag: pan | Click node: info | βΆ button: fullscreen")
|
| 695 |
+
components.html(pyvis_from_kg(en, ee, height="740px"), height=760, scrolling=True)
|
| 696 |
+
else:
|
| 697 |
+
st.info("μΌμͺ½μμ λ
Έλλ₯Ό μ ννκ³ 'Show ego network'λ₯Ό ν΄λ¦νμΈμ.")
|
| 698 |
+
|
| 699 |
+
# ββ Enriched μΈμ¬μ΄οΏ½οΏ½οΏ½ (DuckDB μ§κ³λ§)
|
| 700 |
+
st.markdown("---")
|
| 701 |
+
st.subheader("Enriched Citation Insights")
|
| 702 |
+
st.caption("citation_events_enriched: DuckDBλ‘ μ§κ³ ν΅κ³λ§ 쿼리 (μ 체 λ‘λ μμ)")
|
| 703 |
+
with st.spinner("Enriched ν΅κ³ 쿼리 μ€ (DuckDB)..."):
|
| 704 |
+
sem_df, field_df = query_enriched_stats(enriched_path)
|
| 705 |
+
|
| 706 |
+
if not sem_df.empty:
|
| 707 |
+
sem_df["label"] = sem_df["has_semantic_evidence"].map(
|
| 708 |
+
{True: "With evidence", False: "Without evidence",
|
| 709 |
+
1: "With evidence", 0: "Without evidence"})
|
| 710 |
+
col_s1, col_s2 = st.columns(2)
|
| 711 |
+
with col_s1:
|
| 712 |
+
st.plotly_chart(
|
| 713 |
+
px.pie(sem_df, names="label", values="count",
|
| 714 |
+
title="Semantic Evidence Coverage")
|
| 715 |
+
.update_layout(legend_title=""),
|
| 716 |
+
use_container_width=True)
|
| 717 |
+
with col_s2:
|
| 718 |
+
if not field_df.empty:
|
| 719 |
st.plotly_chart(
|
| 720 |
+
px.bar(field_df, x="field", y="sem_ratio",
|
| 721 |
+
title="Semantic Evidence Rate by Field",
|
| 722 |
+
labels={"sem_ratio": "Evidence Rate", "field": "Field"})
|
| 723 |
+
.update_layout(xaxis_tickangle=-40),
|
| 724 |
use_container_width=True)
|
| 725 |
+
|
| 726 |
+
except Exception as e:
|
| 727 |
+
st.error(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 728 |
|
| 729 |
|
| 730 |
# βββ 6. GEOGRAPHIC MAP ββββββββββββββββββββββββββββββββββββββββββ
|