Spaces:
Sleeping
Sleeping
Upload app.py
Browse files- src/app.py +49 -53
src/app.py
CHANGED
|
@@ -510,6 +510,15 @@ with tab_overview:
|
|
| 510 |
.update_layout(xaxis_title="Year", yaxis_title="Citations"),
|
| 511 |
use_container_width=True)
|
| 512 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
st.subheader("Field distribution")
|
| 514 |
fd = (seed_filtered.groupby("field", dropna=False).size()
|
| 515 |
.reset_index(name="count").sort_values("count", ascending=False).head(20))
|
|
@@ -556,71 +565,58 @@ with tab_ontology:
|
|
| 556 |
with tab_kg:
|
| 557 |
st.subheader("Knowledge Graph โ Selected Seed Paper")
|
| 558 |
st.caption("kg_nodes + kg_edges์์ ์ ํ๋ seed paper์ 1-hop ์๋ธ๊ทธ๋ํ (DuckDB ๋ถ๋ถ ์ฟผ๋ฆฌ)")
|
| 559 |
-
st.info("์๋ ๋ฒํผ์ ๋๋ฌ KG ๋ฐ์ดํฐ๋ฅผ ๋ก๋ํ์ธ์. kg_nodes๋ง ์ ์ฒด ๋ก๋, kg_edges๋ ํ์ํ ๋ถ๋ถ๋ง ์ฟผ๋ฆฌํฉ๋๋ค.")
|
| 560 |
|
| 561 |
max_edges_kg = st.slider("Max edges", 20, 150, 80, key="kg_max_edges")
|
| 562 |
|
| 563 |
-
|
| 564 |
-
with st.spinner("
|
| 565 |
-
|
|
|
|
| 566 |
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
|
|
|
|
|
|
|
|
|
| 571 |
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
st.warning("์ ํ๋ seed paper์ DOI๊ฐ ์์ด KG ์กฐํ๊ฐ ๋ถ๊ฐํฉ๋๋ค.")
|
| 575 |
else:
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
color="node_type", color_discrete_map=NODE_TYPE_COLORS,
|
| 598 |
-
title="Node Type Distribution")
|
| 599 |
-
.update_layout(showlegend=False, xaxis_title="", yaxis_title="Count"),
|
| 600 |
-
use_container_width=True)
|
| 601 |
-
|
| 602 |
-
st.caption("๐ฑ Scroll: zoom | Drag: pan | Click node: info | โถ button: fullscreen")
|
| 603 |
-
components.html(pyvis_from_kg(nodes_sub, edges_sub), height=820, scrolling=True)
|
| 604 |
-
except Exception as e:
|
| 605 |
-
st.error(str(e))
|
| 606 |
|
| 607 |
|
| 608 |
# โโโ 5. KG EXPLORER โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 609 |
with tab_kg_exp:
|
| 610 |
st.subheader("KG Explorer")
|
| 611 |
st.caption("kg_nodes๋ฅผ ํ์ํ๊ณ ์์ ๋
ธ๋์ ์ฐ๊ฒฐ ๊ด๊ณ๋ฅผ ์๊ฐํํฉ๋๋ค. kg_edges๋ DuckDB๋ก ํ์ํ ๋ถ๋ถ๋ง ์ฟผ๋ฆฌํฉ๋๋ค.")
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
if st.session_state.get("kg_loaded"):
|
| 619 |
-
try:
|
| 620 |
-
with st.spinner("kg_nodes ๋ก๋ฉ ์ค..."):
|
| 621 |
-
kg_nodes = load_kg_nodes(data_dir_val)
|
| 622 |
-
kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
|
| 623 |
-
enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
|
| 624 |
|
| 625 |
# โโ ์ ์ฒด ๋
ธ๋ ํ์
๋ถํฌ (kg_nodes๋ง์ผ๋ก ๊ณ์ฐ)
|
| 626 |
col_a, col_b = st.columns([1,2])
|
|
|
|
| 510 |
.update_layout(xaxis_title="Year", yaxis_title="Citations"),
|
| 511 |
use_container_width=True)
|
| 512 |
|
| 513 |
+
st.subheader("Overall intent distribution")
|
| 514 |
+
all_intents = events.groupby("primary_intent").size().to_dict()
|
| 515 |
+
ai_df = pd.DataFrame({"intent": ALLOWED_INTENTS,
|
| 516 |
+
"count": [int(all_intents.get(i, 0)) for i in ALLOWED_INTENTS]})
|
| 517 |
+
fig2 = px.bar(ai_df, x="intent", y="count", color="intent",
|
| 518 |
+
color_discrete_map=INTENT_COLORS)
|
| 519 |
+
fig2.update_layout(showlegend=False, xaxis_title="", yaxis_title="Count")
|
| 520 |
+
st.plotly_chart(fig2, use_container_width=True)
|
| 521 |
+
|
| 522 |
st.subheader("Field distribution")
|
| 523 |
fd = (seed_filtered.groupby("field", dropna=False).size()
|
| 524 |
.reset_index(name="count").sort_values("count", ascending=False).head(20))
|
|
|
|
| 565 |
with tab_kg:
|
| 566 |
st.subheader("Knowledge Graph โ Selected Seed Paper")
|
| 567 |
st.caption("kg_nodes + kg_edges์์ ์ ํ๋ seed paper์ 1-hop ์๋ธ๊ทธ๋ํ (DuckDB ๋ถ๋ถ ์ฟผ๋ฆฌ)")
|
|
|
|
| 568 |
|
| 569 |
max_edges_kg = st.slider("Max edges", 20, 150, 80, key="kg_max_edges")
|
| 570 |
|
| 571 |
+
try:
|
| 572 |
+
with st.spinner("KG ๋ฐ์ดํฐ ๋ก๋ฉ ์ค... (์ต์ด 1ํ ํ ์บ์๋ฉ๋๋ค)"):
|
| 573 |
+
kg_nodes = load_kg_nodes(data_dir_val)
|
| 574 |
+
kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
|
| 575 |
|
| 576 |
+
seed_doi = selected_seed["doi"]
|
| 577 |
+
if not seed_doi:
|
| 578 |
+
st.warning("์ ํ๋ seed paper์ DOI๊ฐ ์์ด KG ์กฐํ๊ฐ ๋ถ๊ฐํฉ๋๋ค.")
|
| 579 |
+
else:
|
| 580 |
+
node_id = f"seed:{seed_doi}"
|
| 581 |
+
with st.spinner("kg_edges ์ฟผ๋ฆฌ ์ค (DuckDB)..."):
|
| 582 |
+
edges_sub = query_kg_edges_for_node(node_id, kg_edges_path, max_edges_kg)
|
| 583 |
|
| 584 |
+
if edges_sub.empty:
|
| 585 |
+
st.warning(f"KG์์ ํด๋น ๋
ธ๋์ ์ฃ์ง๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค. (node_id: {node_id})")
|
|
|
|
| 586 |
else:
|
| 587 |
+
all_node_ids = set(edges_sub["source"].tolist()) | set(edges_sub["target"].tolist())
|
| 588 |
+
nodes_sub = kg_nodes[kg_nodes["node_id"].isin(all_node_ids)]
|
| 589 |
+
|
| 590 |
+
c1, c2, c3 = st.columns(3)
|
| 591 |
+
c1.metric("Nodes", fmt_num(len(nodes_sub)))
|
| 592 |
+
c2.metric("Edges", fmt_num(len(edges_sub)))
|
| 593 |
+
c3.metric("Node types", fmt_num(nodes_sub["node_type"].nunique()))
|
| 594 |
+
|
| 595 |
+
type_counts = nodes_sub["node_type"].value_counts().reset_index()
|
| 596 |
+
type_counts.columns = ["node_type", "count"]
|
| 597 |
+
st.plotly_chart(
|
| 598 |
+
px.bar(type_counts, x="node_type", y="count",
|
| 599 |
+
color="node_type", color_discrete_map=NODE_TYPE_COLORS,
|
| 600 |
+
title="Node Type Distribution")
|
| 601 |
+
.update_layout(showlegend=False, xaxis_title="", yaxis_title="Count"),
|
| 602 |
+
use_container_width=True)
|
| 603 |
+
|
| 604 |
+
st.caption("๐ฑ Scroll: zoom | Drag: pan | Click node: info | โถ button: fullscreen")
|
| 605 |
+
components.html(pyvis_from_kg(nodes_sub, edges_sub), height=820, scrolling=True)
|
| 606 |
+
except Exception as e:
|
| 607 |
+
st.error(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
|
| 609 |
|
| 610 |
# โโโ 5. KG EXPLORER โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
| 611 |
with tab_kg_exp:
|
| 612 |
st.subheader("KG Explorer")
|
| 613 |
st.caption("kg_nodes๋ฅผ ํ์ํ๊ณ ์์ ๋
ธ๋์ ์ฐ๊ฒฐ ๊ด๊ณ๋ฅผ ์๊ฐํํฉ๋๋ค. kg_edges๋ DuckDB๋ก ํ์ํ ๋ถ๋ถ๋ง ์ฟผ๋ฆฌํฉ๋๋ค.")
|
| 614 |
+
|
| 615 |
+
try:
|
| 616 |
+
with st.spinner("KG ๋ฐ์ดํฐ ๋ก๋ฉ ์ค... (์ต์ด 1ํ ํ ์บ์๋ฉ๋๋ค)"):
|
| 617 |
+
kg_nodes = load_kg_nodes(data_dir_val)
|
| 618 |
+
kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
|
| 619 |
+
enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
|
| 621 |
# โโ ์ ์ฒด ๋
ธ๋ ํ์
๋ถํฌ (kg_nodes๋ง์ผ๋ก ๊ณ์ฐ)
|
| 622 |
col_a, col_b = st.columns([1,2])
|