Daniel0315 commited on
Commit
67bb6ec
ยท
verified ยท
1 Parent(s): 8e127bd

Upload app.py

Browse files
Files changed (1) hide show
  1. src/app.py +49 -53
src/app.py CHANGED
@@ -510,6 +510,15 @@ with tab_overview:
510
  .update_layout(xaxis_title="Year", yaxis_title="Citations"),
511
  use_container_width=True)
512
 
 
 
 
 
 
 
 
 
 
513
  st.subheader("Field distribution")
514
  fd = (seed_filtered.groupby("field", dropna=False).size()
515
  .reset_index(name="count").sort_values("count", ascending=False).head(20))
@@ -556,71 +565,58 @@ with tab_ontology:
556
  with tab_kg:
557
  st.subheader("Knowledge Graph โ€” Selected Seed Paper")
558
  st.caption("kg_nodes + kg_edges์—์„œ ์„ ํƒ๋œ seed paper์˜ 1-hop ์„œ๋ธŒ๊ทธ๋ž˜ํ”„ (DuckDB ๋ถ€๋ถ„ ์ฟผ๋ฆฌ)")
559
- st.info("์•„๋ž˜ ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ KG ๋ฐ์ดํ„ฐ๋ฅผ ๋กœ๋“œํ•˜์„ธ์š”. kg_nodes๋งŒ ์ „์ฒด ๋กœ๋“œ, kg_edges๋Š” ํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ฟผ๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")
560
 
561
  max_edges_kg = st.slider("Max edges", 20, 150, 80, key="kg_max_edges")
562
 
563
- if st.button("KG ๋ฐ์ดํ„ฐ ๋กœ๋“œ", key="kg_load"):
564
- with st.spinner("kg_nodes ๋กœ๋”ฉ + kg_edges ๊ฒฝ๋กœ ์ค€๋น„ ์ค‘..."):
565
- st.session_state["kg_loaded"] = True
 
566
 
567
- if st.session_state.get("kg_loaded"):
568
- try:
569
- with st.spinner("kg_nodes ๋กœ๋”ฉ ์ค‘..."):
570
- kg_nodes = load_kg_nodes(data_dir_val)
 
 
 
571
 
572
- seed_doi = selected_seed["doi"]
573
- if not seed_doi:
574
- st.warning("์„ ํƒ๋œ seed paper์˜ DOI๊ฐ€ ์—†์–ด KG ์กฐํšŒ๊ฐ€ ๋ถˆ๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
575
  else:
576
- node_id = f"seed:{seed_doi}"
577
-
578
- with st.spinner("kg_edges ์ฟผ๋ฆฌ ์ค‘ (DuckDB)..."):
579
- kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
580
- edges_sub = query_kg_edges_for_node(node_id, kg_edges_path, max_edges_kg)
581
-
582
- if edges_sub.empty:
583
- st.warning(f"KG์—์„œ ํ•ด๋‹น ๋…ธ๋“œ์˜ ์—ฃ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. (node_id: {node_id})")
584
- else:
585
- all_node_ids = set(edges_sub["source"].tolist()) | set(edges_sub["target"].tolist())
586
- nodes_sub = kg_nodes[kg_nodes["node_id"].isin(all_node_ids)]
587
-
588
- c1, c2, c3 = st.columns(3)
589
- c1.metric("Nodes", fmt_num(len(nodes_sub)))
590
- c2.metric("Edges", fmt_num(len(edges_sub)))
591
- c3.metric("Node types", fmt_num(nodes_sub["node_type"].nunique()))
592
-
593
- type_counts = nodes_sub["node_type"].value_counts().reset_index()
594
- type_counts.columns = ["node_type", "count"]
595
- st.plotly_chart(
596
- px.bar(type_counts, x="node_type", y="count",
597
- color="node_type", color_discrete_map=NODE_TYPE_COLORS,
598
- title="Node Type Distribution")
599
- .update_layout(showlegend=False, xaxis_title="", yaxis_title="Count"),
600
- use_container_width=True)
601
-
602
- st.caption("๐Ÿ–ฑ Scroll: zoom | Drag: pan | Click node: info | โ›ถ button: fullscreen")
603
- components.html(pyvis_from_kg(nodes_sub, edges_sub), height=820, scrolling=True)
604
- except Exception as e:
605
- st.error(str(e))
606
 
607
 
608
  # โ•โ•โ• 5. KG EXPLORER โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
609
  with tab_kg_exp:
610
  st.subheader("KG Explorer")
611
  st.caption("kg_nodes๋ฅผ ํƒ์ƒ‰ํ•˜๊ณ  ์ž„์˜ ๋…ธ๋“œ์˜ ์—ฐ๊ฒฐ ๊ด€๊ณ„๋ฅผ ์‹œ๊ฐํ™”ํ•ฉ๋‹ˆ๋‹ค. kg_edges๋Š” DuckDB๋กœ ํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ฟผ๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")
612
- st.info("์•„๋ž˜ ๋ฒ„ํŠผ์„ ๋ˆŒ๋Ÿฌ KG ๋ฐ์ดํ„ฐ๋ฅผ ๋กœ๋“œํ•˜์„ธ์š”. kg_nodes๋งŒ ์ „์ฒด ๋กœ๋“œ๋˜๊ณ , kg_edges/enriched๋Š” DuckDB๋กœ ์ฟผ๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")
613
-
614
- if st.button("KG ๋ฐ์ดํ„ฐ ๋กœ๋“œ", key="kg_exp_load"):
615
- with st.spinner("kg_nodes ๋กœ๋”ฉ ์ค‘..."):
616
- st.session_state["kg_loaded"] = True
617
-
618
- if st.session_state.get("kg_loaded"):
619
- try:
620
- with st.spinner("kg_nodes ๋กœ๋”ฉ ์ค‘..."):
621
- kg_nodes = load_kg_nodes(data_dir_val)
622
- kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
623
- enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
624
 
625
  # โ”€โ”€ ์ „์ฒด ๋…ธ๋“œ ํƒ€์ž… ๋ถ„ํฌ (kg_nodes๋งŒ์œผ๋กœ ๊ณ„์‚ฐ)
626
  col_a, col_b = st.columns([1,2])
 
510
  .update_layout(xaxis_title="Year", yaxis_title="Citations"),
511
  use_container_width=True)
512
 
513
+ st.subheader("Overall intent distribution")
514
+ all_intents = events.groupby("primary_intent").size().to_dict()
515
+ ai_df = pd.DataFrame({"intent": ALLOWED_INTENTS,
516
+ "count": [int(all_intents.get(i, 0)) for i in ALLOWED_INTENTS]})
517
+ fig2 = px.bar(ai_df, x="intent", y="count", color="intent",
518
+ color_discrete_map=INTENT_COLORS)
519
+ fig2.update_layout(showlegend=False, xaxis_title="", yaxis_title="Count")
520
+ st.plotly_chart(fig2, use_container_width=True)
521
+
522
  st.subheader("Field distribution")
523
  fd = (seed_filtered.groupby("field", dropna=False).size()
524
  .reset_index(name="count").sort_values("count", ascending=False).head(20))
 
565
  with tab_kg:
566
  st.subheader("Knowledge Graph โ€” Selected Seed Paper")
567
  st.caption("kg_nodes + kg_edges์—์„œ ์„ ํƒ๋œ seed paper์˜ 1-hop ์„œ๋ธŒ๊ทธ๋ž˜ํ”„ (DuckDB ๋ถ€๋ถ„ ์ฟผ๋ฆฌ)")
 
568
 
569
  max_edges_kg = st.slider("Max edges", 20, 150, 80, key="kg_max_edges")
570
 
571
+ try:
572
+ with st.spinner("KG ๋ฐ์ดํ„ฐ ๋กœ๋”ฉ ์ค‘... (์ตœ์ดˆ 1ํšŒ ํ›„ ์บ์‹œ๋ฉ๋‹ˆ๋‹ค)"):
573
+ kg_nodes = load_kg_nodes(data_dir_val)
574
+ kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
575
 
576
+ seed_doi = selected_seed["doi"]
577
+ if not seed_doi:
578
+ st.warning("์„ ํƒ๋œ seed paper์˜ DOI๊ฐ€ ์—†์–ด KG ์กฐํšŒ๊ฐ€ ๋ถˆ๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
579
+ else:
580
+ node_id = f"seed:{seed_doi}"
581
+ with st.spinner("kg_edges ์ฟผ๋ฆฌ ์ค‘ (DuckDB)..."):
582
+ edges_sub = query_kg_edges_for_node(node_id, kg_edges_path, max_edges_kg)
583
 
584
+ if edges_sub.empty:
585
+ st.warning(f"KG์—์„œ ํ•ด๋‹น ๋…ธ๋“œ์˜ ์—ฃ์ง€๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. (node_id: {node_id})")
 
586
  else:
587
+ all_node_ids = set(edges_sub["source"].tolist()) | set(edges_sub["target"].tolist())
588
+ nodes_sub = kg_nodes[kg_nodes["node_id"].isin(all_node_ids)]
589
+
590
+ c1, c2, c3 = st.columns(3)
591
+ c1.metric("Nodes", fmt_num(len(nodes_sub)))
592
+ c2.metric("Edges", fmt_num(len(edges_sub)))
593
+ c3.metric("Node types", fmt_num(nodes_sub["node_type"].nunique()))
594
+
595
+ type_counts = nodes_sub["node_type"].value_counts().reset_index()
596
+ type_counts.columns = ["node_type", "count"]
597
+ st.plotly_chart(
598
+ px.bar(type_counts, x="node_type", y="count",
599
+ color="node_type", color_discrete_map=NODE_TYPE_COLORS,
600
+ title="Node Type Distribution")
601
+ .update_layout(showlegend=False, xaxis_title="", yaxis_title="Count"),
602
+ use_container_width=True)
603
+
604
+ st.caption("๐Ÿ–ฑ Scroll: zoom | Drag: pan | Click node: info | โ›ถ button: fullscreen")
605
+ components.html(pyvis_from_kg(nodes_sub, edges_sub), height=820, scrolling=True)
606
+ except Exception as e:
607
+ st.error(str(e))
 
 
 
 
 
 
 
 
 
608
 
609
 
610
  # โ•โ•โ• 5. KG EXPLORER โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
611
  with tab_kg_exp:
612
  st.subheader("KG Explorer")
613
  st.caption("kg_nodes๋ฅผ ํƒ์ƒ‰ํ•˜๊ณ  ์ž„์˜ ๋…ธ๋“œ์˜ ์—ฐ๊ฒฐ ๊ด€๊ณ„๋ฅผ ์‹œ๊ฐํ™”ํ•ฉ๋‹ˆ๋‹ค. kg_edges๋Š” DuckDB๋กœ ํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ฟผ๋ฆฌํ•ฉ๋‹ˆ๋‹ค.")
614
+
615
+ try:
616
+ with st.spinner("KG ๋ฐ์ดํ„ฐ ๋กœ๋”ฉ ์ค‘... (์ตœ์ดˆ 1ํšŒ ํ›„ ์บ์‹œ๋ฉ๋‹ˆ๋‹ค)"):
617
+ kg_nodes = load_kg_nodes(data_dir_val)
618
+ kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
619
+ enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
 
 
 
 
 
 
620
 
621
  # โ”€โ”€ ์ „์ฒด ๋…ธ๋“œ ํƒ€์ž… ๋ถ„ํฌ (kg_nodes๋งŒ์œผ๋กœ ๊ณ„์‚ฐ)
622
  col_a, col_b = st.columns([1,2])