Daniel0315 commited on
Commit
3899ec8
Β·
verified Β·
1 Parent(s): 67bb6ec

Upload app.py

Browse files
Files changed (1) hide show
  1. src/app.py +106 -106
src/app.py CHANGED
@@ -613,118 +613,118 @@ with tab_kg_exp:
613
  st.caption("kg_nodesλ₯Ό νƒμƒ‰ν•˜κ³  μž„μ˜ λ…Έλ“œμ˜ μ—°κ²° 관계λ₯Ό μ‹œκ°ν™”ν•©λ‹ˆλ‹€. kg_edgesλŠ” DuckDB둜 ν•„μš”ν•œ λΆ€λΆ„λ§Œ μΏΌλ¦¬ν•©λ‹ˆλ‹€.")
614
 
615
  try:
 
616
  with st.spinner("KG 데이터 λ‘œλ”© 쀑... (졜초 1회 ν›„ μΊμ‹œλ©λ‹ˆλ‹€)"):
617
- kg_nodes = load_kg_nodes(data_dir_val)
618
  kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
619
  enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
620
 
621
- # ── 전체 λ…Έλ“œ νƒ€μž… 뢄포 (kg_nodes만으둜 계산)
622
- col_a, col_b = st.columns([1,2])
623
- with col_a:
624
- st.subheader("Node Type Counts")
625
- nt = kg_nodes["node_type"].value_counts().reset_index()
626
- nt.columns = ["node_type","count"]
627
- st.dataframe(nt, use_container_width=True, hide_index=True)
628
-
629
- # Edge type 집계: DuckDB둜 λΉ λ₯΄κ²Œ 계산
630
- st.subheader("Edge Type Counts")
631
- import duckdb
632
- et = duckdb.execute(f"""
633
- SELECT edge_type, COUNT(*) AS count
634
- FROM read_parquet('{kg_edges_path}')
635
- GROUP BY edge_type ORDER BY count DESC
636
- """).df()
637
- st.dataframe(et, use_container_width=True, hide_index=True)
638
-
639
- with col_b:
640
- st.subheader("Node Type Distribution")
641
- nt_fig = px.bar(nt, x="node_type", y="count", color="node_type",
642
- color_discrete_map=NODE_TYPE_COLORS)
643
- nt_fig.update_layout(showlegend=False, xaxis_title="", yaxis_title="Count")
644
- st.plotly_chart(nt_fig, use_container_width=True)
645
-
646
- st.markdown("---")
647
- st.subheader("Node Search & Ego Network")
648
- exp_col1, exp_col2 = st.columns([1,3])
649
- with exp_col1:
650
- type_options = ["(all)"] + sorted(kg_nodes["node_type"].unique().tolist())
651
- sel_type = st.selectbox("Filter by node type", type_options)
652
- filtered_nodes = (kg_nodes if sel_type == "(all)"
653
- else kg_nodes[kg_nodes["node_type"]==sel_type])
654
- search_q = st.text_input("Search node label / DOI")
655
- if search_q:
656
- filtered_nodes = filtered_nodes[
657
- filtered_nodes["label"].str.contains(search_q, case=False, na=False) |
658
- filtered_nodes["doi"].str.contains(search_q, case=False, na=False)
659
- ]
660
-
661
- sample = filtered_nodes.head(100)
662
- node_options = sample["node_id"].tolist()
663
- if not node_options:
664
- st.warning("검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
665
- else:
666
- sel_node_id = st.selectbox(
667
- "Select node", node_options,
668
- format_func=lambda nid: sample.loc[sample["node_id"]==nid,"label"].iloc[0][:60],
669
- )
670
- sel_node_info = sample[sample["node_id"]==sel_node_id].iloc[0]
671
- st.markdown(f"**Type**: {sel_node_info.get('node_type','')}")
672
- st.markdown(f"**DOI**: {sel_node_info.get('doi','') or '-'}")
673
- st.markdown(f"**Publication**: {sel_node_info.get('publication_name','') or '-'}")
674
- st.markdown(f"**Group**: {sel_node_info.get('group','') or '-'}")
675
- st.markdown(f"**Cited by**: {fmt_num(sel_node_info.get('citedby_count',''))}")
676
-
677
- max_e = st.slider("Max edges shown", 20, 150, 60, key="kg_exp_max")
678
- if st.button("Show ego network", key="kg_exp_show"):
679
- with st.spinner("DuckDB둜 μ—£μ§€ 쿼리 쀑..."):
680
- exp_edges = query_explorer_edges(sel_node_id, kg_edges_path, max_e)
681
- if exp_edges.empty:
682
- st.warning("μ—°κ²°λœ μ—£μ§€κ°€ μ—†μŠ΅λ‹ˆλ‹€.")
683
- else:
684
- all_ids = set(exp_edges["source"].tolist()) | set(exp_edges["target"].tolist())
685
- exp_nodes = kg_nodes[kg_nodes["node_id"].isin(all_ids)]
686
- st.session_state["exp_nodes"] = exp_nodes
687
- st.session_state["exp_edges"] = exp_edges
688
-
689
- with exp_col2:
690
- if "exp_nodes" in st.session_state:
691
- en = st.session_state["exp_nodes"]
692
- ee = st.session_state["exp_edges"]
693
- st.caption(f"Nodes: {len(en)} | Edges: {len(ee)}")
694
- st.caption("πŸ–± Scroll: zoom | Drag: pan | Click node: info | β›Ά button: fullscreen")
695
- components.html(pyvis_from_kg(en, ee, height="740px"), height=760, scrolling=True)
696
- else:
697
- st.info("μ™Όμͺ½μ—μ„œ λ…Έλ“œλ₯Ό μ„ νƒν•˜κ³  'Show ego network'λ₯Ό ν΄λ¦­ν•˜μ„Έμš”.")
698
-
699
- # ── Enriched μΈμ‚¬μ΄νŠΈ (DuckDB μ§‘κ³„λ§Œ)
700
- st.markdown("---")
701
- st.subheader("Enriched Citation Insights")
702
- st.caption("citation_events_enriched: DuckDB둜 집계 ν†΅κ³„λ§Œ 쿼리 (전체 λ‘œλ“œ μ—†μŒ)")
703
- with st.spinner("Enriched 톡계 쿼리 쀑 (DuckDB)..."):
704
- sem_df, field_df = query_enriched_stats(enriched_path)
705
-
706
- if not sem_df.empty:
707
- sem_df["label"] = sem_df["has_semantic_evidence"].map(
708
- {True:"With evidence", False:"Without evidence",
709
- 1:"With evidence", 0:"Without evidence"})
710
- col_s1, col_s2 = st.columns(2)
711
- with col_s1:
 
 
 
 
 
 
712
  st.plotly_chart(
713
- px.pie(sem_df, names="label", values="count",
714
- title="Semantic Evidence Coverage")
715
- .update_layout(legend_title=""),
 
716
  use_container_width=True)
717
- with col_s2:
718
- if not field_df.empty:
719
- st.plotly_chart(
720
- px.bar(field_df, x="field", y="sem_ratio",
721
- title="Semantic Evidence Rate by Field",
722
- labels={"sem_ratio":"Evidence Rate","field":"Field"})
723
- .update_layout(xaxis_tickangle=-40),
724
- use_container_width=True)
725
-
726
- except Exception as e:
727
- st.error(str(e))
728
 
729
 
730
  # ═══ 6. GEOGRAPHIC MAP ══════════════════════════════════════════
 
613
  st.caption("kg_nodesλ₯Ό νƒμƒ‰ν•˜κ³  μž„μ˜ λ…Έλ“œμ˜ μ—°κ²° 관계λ₯Ό μ‹œκ°ν™”ν•©λ‹ˆλ‹€. kg_edgesλŠ” DuckDB둜 ν•„μš”ν•œ λΆ€λΆ„λ§Œ μΏΌλ¦¬ν•©λ‹ˆλ‹€.")
614
 
615
  try:
616
+ # spinnerλŠ” λ‘œλ”©λ§Œ, UIλŠ” spinner 밖에
617
  with st.spinner("KG 데이터 λ‘œλ”© 쀑... (졜초 1회 ν›„ μΊμ‹œλ©λ‹ˆλ‹€)"):
618
+ kg_nodes_exp = load_kg_nodes(data_dir_val)
619
  kg_edges_path = get_parquet_path("kg_edges.parquet", data_dir_val)
620
  enriched_path = get_parquet_path("citation_events_enriched.parquet", data_dir_val)
621
 
622
+ # ── 전체 λ…Έλ“œ/μ—£μ§€ νƒ€μž… 뢄포
623
+ col_a, col_b = st.columns([1, 2])
624
+ with col_a:
625
+ st.subheader("Node Type Counts")
626
+ nt = kg_nodes_exp["node_type"].value_counts().reset_index()
627
+ nt.columns = ["node_type", "count"]
628
+ st.dataframe(nt, use_container_width=True, hide_index=True)
629
+
630
+ st.subheader("Edge Type Counts")
631
+ import duckdb as _ddb
632
+ et = _ddb.execute(f"""
633
+ SELECT edge_type, COUNT(*) AS count
634
+ FROM read_parquet('{kg_edges_path}')
635
+ GROUP BY edge_type ORDER BY count DESC
636
+ """).df()
637
+ st.dataframe(et, use_container_width=True, hide_index=True)
638
+
639
+ with col_b:
640
+ st.subheader("Node Type Distribution")
641
+ nt_fig = px.bar(nt, x="node_type", y="count", color="node_type",
642
+ color_discrete_map=NODE_TYPE_COLORS)
643
+ nt_fig.update_layout(showlegend=False, xaxis_title="", yaxis_title="Count")
644
+ st.plotly_chart(nt_fig, use_container_width=True)
645
+
646
+ # ── Node Search & Ego Network
647
+ st.markdown("---")
648
+ st.subheader("Node Search & Ego Network")
649
+ exp_col1, exp_col2 = st.columns([1, 3])
650
+ with exp_col1:
651
+ type_options = ["(all)"] + sorted(kg_nodes_exp["node_type"].unique().tolist())
652
+ sel_type = st.selectbox("Filter by node type", type_options)
653
+ filtered_nodes = (kg_nodes_exp if sel_type == "(all)"
654
+ else kg_nodes_exp[kg_nodes_exp["node_type"] == sel_type])
655
+ search_q = st.text_input("Search node label / DOI")
656
+ if search_q:
657
+ filtered_nodes = filtered_nodes[
658
+ filtered_nodes["label"].str.contains(search_q, case=False, na=False) |
659
+ filtered_nodes["doi"].str.contains(search_q, case=False, na=False)
660
+ ]
661
+
662
+ sample = filtered_nodes.head(100)
663
+ node_options = sample["node_id"].tolist()
664
+ if not node_options:
665
+ st.warning("검색 κ²°κ³Όκ°€ μ—†μŠ΅λ‹ˆλ‹€.")
666
+ else:
667
+ sel_node_id = st.selectbox(
668
+ "Select node", node_options,
669
+ format_func=lambda nid: sample.loc[sample["node_id"] == nid, "label"].iloc[0][:60],
670
+ )
671
+ sel_node_info = sample[sample["node_id"] == sel_node_id].iloc[0]
672
+ st.markdown(f"**Type**: {sel_node_info.get('node_type', '')}")
673
+ st.markdown(f"**DOI**: {sel_node_info.get('doi', '') or '-'}")
674
+ st.markdown(f"**Publication**: {sel_node_info.get('publication_name', '') or '-'}")
675
+ st.markdown(f"**Group**: {sel_node_info.get('group', '') or '-'}")
676
+ st.markdown(f"**Cited by**: {fmt_num(sel_node_info.get('citedby_count', ''))}")
677
+
678
+ max_e = st.slider("Max edges shown", 20, 150, 60, key="kg_exp_max")
679
+ if st.button("Show ego network", key="kg_exp_show"):
680
+ with st.spinner("DuckDB둜 μ—£μ§€ 쿼리 쀑..."):
681
+ exp_edges = query_explorer_edges(sel_node_id, kg_edges_path, max_e)
682
+ if exp_edges.empty:
683
+ st.warning("μ—°κ²°λœ μ—£μ§€κ°€ μ—†μŠ΅λ‹ˆλ‹€.")
684
+ else:
685
+ all_ids = set(exp_edges["source"].tolist()) | set(exp_edges["target"].tolist())
686
+ st.session_state["exp_nodes"] = kg_nodes_exp[kg_nodes_exp["node_id"].isin(all_ids)]
687
+ st.session_state["exp_edges"] = exp_edges
688
+
689
+ with exp_col2:
690
+ if "exp_nodes" in st.session_state:
691
+ en = st.session_state["exp_nodes"]
692
+ ee = st.session_state["exp_edges"]
693
+ st.caption(f"Nodes: {len(en)} | Edges: {len(ee)}")
694
+ st.caption("πŸ–± Scroll: zoom | Drag: pan | Click node: info | β›Ά button: fullscreen")
695
+ components.html(pyvis_from_kg(en, ee, height="740px"), height=760, scrolling=True)
696
+ else:
697
+ st.info("μ™Όμͺ½μ—μ„œ λ…Έλ“œλ₯Ό μ„ νƒν•˜κ³  'Show ego network'λ₯Ό ν΄λ¦­ν•˜μ„Έμš”.")
698
+
699
+ # ── Enriched 인사이��� (DuckDB μ§‘κ³„λ§Œ)
700
+ st.markdown("---")
701
+ st.subheader("Enriched Citation Insights")
702
+ st.caption("citation_events_enriched: DuckDB둜 집계 ν†΅κ³„λ§Œ 쿼리 (전체 λ‘œλ“œ μ—†μŒ)")
703
+ with st.spinner("Enriched 톡계 쿼리 쀑 (DuckDB)..."):
704
+ sem_df, field_df = query_enriched_stats(enriched_path)
705
+
706
+ if not sem_df.empty:
707
+ sem_df["label"] = sem_df["has_semantic_evidence"].map(
708
+ {True: "With evidence", False: "Without evidence",
709
+ 1: "With evidence", 0: "Without evidence"})
710
+ col_s1, col_s2 = st.columns(2)
711
+ with col_s1:
712
+ st.plotly_chart(
713
+ px.pie(sem_df, names="label", values="count",
714
+ title="Semantic Evidence Coverage")
715
+ .update_layout(legend_title=""),
716
+ use_container_width=True)
717
+ with col_s2:
718
+ if not field_df.empty:
719
  st.plotly_chart(
720
+ px.bar(field_df, x="field", y="sem_ratio",
721
+ title="Semantic Evidence Rate by Field",
722
+ labels={"sem_ratio": "Evidence Rate", "field": "Field"})
723
+ .update_layout(xaxis_tickangle=-40),
724
  use_container_width=True)
725
+
726
+ except Exception as e:
727
+ st.error(str(e))
 
 
 
 
 
 
 
 
728
 
729
 
730
  # ═══ 6. GEOGRAPHIC MAP ══════════════════════════════════════════