Spaces:
Runtime error
Runtime error
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +73 -32
src/streamlit_app.py
CHANGED
|
@@ -453,28 +453,8 @@ st.markdown(
|
|
| 453 |
""",
|
| 454 |
unsafe_allow_html=True)
|
| 455 |
|
| 456 |
-
# ---
|
| 457 |
-
st.sidebar.header("Topic Modeling Settings π‘")
|
| 458 |
-
num_topics_input = st.sidebar.slider(
|
| 459 |
-
"Number of Topics",
|
| 460 |
-
min_value=2,
|
| 461 |
-
max_value=10,
|
| 462 |
-
value=5,
|
| 463 |
-
step=1,
|
| 464 |
-
key='num_topics_slider',
|
| 465 |
-
help="The number of underlying topics (clusters) to discover in the entity data (LDA)."
|
| 466 |
-
)
|
| 467 |
-
num_top_words_input = st.sidebar.slider(
|
| 468 |
-
"Number of Top Words per Topic",
|
| 469 |
-
min_value=5,
|
| 470 |
-
max_value=20,
|
| 471 |
-
value=10,
|
| 472 |
-
step=1,
|
| 473 |
-
key='num_top_words_slider',
|
| 474 |
-
help="The number of most important words to display for each topic."
|
| 475 |
-
)
|
| 476 |
-
st.sidebar.markdown("---")
|
| 477 |
-
# -----------------------------------------------
|
| 478 |
|
| 479 |
st.subheader("Entity and Topic Analysis Report Generator", divider="blue") # Changed divider from "rainbow" (often includes red/pink) to "blue"
|
| 480 |
# Removed st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary") for white-labeling
|
|
@@ -507,7 +487,7 @@ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
|
|
| 507 |
comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
|
| 508 |
|
| 509 |
# --- Model Loading ---
|
| 510 |
-
@st.
|
| 511 |
def load_ner_model(labels):
|
| 512 |
"""Loads the GLiNER model and caches it."""
|
| 513 |
try:
|
|
@@ -545,6 +525,11 @@ if 'my_text_area' not in st.session_state: st.session_state.my_text_area = DEFAU
|
|
| 545 |
if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
|
| 546 |
if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
|
| 547 |
if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
|
| 549 |
# --- Clear Button Function (MODIFIED) ---
|
| 550 |
def clear_text():
|
|
@@ -616,8 +601,9 @@ if run_button:
|
|
| 616 |
if should_chunk:
|
| 617 |
mode_msg += " with **chunking** for large text"
|
| 618 |
|
| 619 |
-
# --- Topic Modeling Input Retrieval ---
|
| 620 |
-
#
|
|
|
|
| 621 |
current_num_topics = st.session_state.num_topics_slider
|
| 622 |
current_num_top_words = st.session_state.num_top_words_slider
|
| 623 |
|
|
@@ -678,10 +664,65 @@ if run_button:
|
|
| 678 |
# --- Display Download Link and Results (Updated with White-Label inputs) ---
|
| 679 |
if st.session_state.show_results:
|
| 680 |
df = st.session_state.results_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
df_topic_data = st.session_state.topic_results
|
| 682 |
-
#
|
| 683 |
-
|
| 684 |
-
|
|
|
|
| 685 |
|
| 686 |
if df.empty:
|
| 687 |
st.warning("No entities were found in the provided text with the current label set.")
|
|
@@ -775,9 +816,9 @@ if st.session_state.show_results:
|
|
| 775 |
st.plotly_chart(fig_bar_freq, use_container_width=True)
|
| 776 |
else:
|
| 777 |
st.info("No entities were repeated enough for a Top 10 frequency chart.")
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
col_network, col_topic = st.columns(2)
|
| 782 |
with col_network:
|
| 783 |
with st.expander("π Entity Co-occurrence Network Graph", expanded=True):
|
|
@@ -786,7 +827,7 @@ if st.session_state.show_results:
|
|
| 786 |
with st.expander("π‘ Topic Modeling (LDA)", expanded=True):
|
| 787 |
# Display the current settings used for the topic modeling result
|
| 788 |
st.markdown(f"""
|
| 789 |
-
**LDA Parameters:**
|
| 790 |
* Topics: **{st.session_state.last_num_topics}**
|
| 791 |
* Top Words: **{st.session_state.last_num_top_words}**
|
| 792 |
""")
|
|
|
|
| 453 |
""",
|
| 454 |
unsafe_allow_html=True)
|
| 455 |
|
| 456 |
+
# --- Topic Modeling Settings (Moved to main body, but need to initialize key outside of 'if st.session_state.show_results:') ---
|
| 457 |
+
# st.sidebar.header("Topic Modeling Settings π‘") # Removed sidebar header
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
|
| 459 |
st.subheader("Entity and Topic Analysis Report Generator", divider="blue") # Changed divider from "rainbow" (often includes red/pink) to "blue"
|
| 460 |
# Removed st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary") for white-labeling
|
|
|
|
| 487 |
comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
|
| 488 |
|
| 489 |
# --- Model Loading ---
|
| 490 |
+
@st.cache_resourced
|
| 491 |
def load_ner_model(labels):
|
| 492 |
"""Loads the GLiNER model and caches it."""
|
| 493 |
try:
|
|
|
|
| 525 |
if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
|
| 526 |
if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
|
| 527 |
if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
|
| 528 |
+
# Initialize Topic Model settings in state, so they can be set even if not using the sidebar
|
| 529 |
+
if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5
|
| 530 |
+
if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10
|
| 531 |
+
if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None
|
| 532 |
+
if 'last_num_top_words' not in st.session_state: st.session_state.last_num_top_words = None
|
| 533 |
|
| 534 |
# --- Clear Button Function (MODIFIED) ---
|
| 535 |
def clear_text():
|
|
|
|
| 601 |
if should_chunk:
|
| 602 |
mode_msg += " with **chunking** for large text"
|
| 603 |
|
| 604 |
+
# --- Topic Modeling Input Retrieval (Using default or current state values) ---
|
| 605 |
+
# The actual sliders are only visible after results are shown, so here we use the state defaults
|
| 606 |
+
# or the last successfully run values to check for changes and run the model.
|
| 607 |
current_num_topics = st.session_state.num_topics_slider
|
| 608 |
current_num_top_words = st.session_state.num_top_words_slider
|
| 609 |
|
|
|
|
| 664 |
# --- Display Download Link and Results (Updated with White-Label inputs) ---
|
| 665 |
if st.session_state.show_results:
|
| 666 |
df = st.session_state.results_df
|
| 667 |
+
# Note: Topic data needs to be re-run if the sliders change, but here we reuse the state value unless the re-run button is hit.
|
| 668 |
+
# To fix this, we need to handle the Topic Modeling calculation separately so that changing the slider triggers a run without hitting the main 'Results' button.
|
| 669 |
+
|
| 670 |
+
# --- Topic Model Slider Re-Run Logic (New Block) ---
|
| 671 |
+
st.markdown("---")
|
| 672 |
+
st.markdown("### 4. Advanced Analysis")
|
| 673 |
+
st.markdown("π‘ **Topic Modeling Settings:** Adjust these sliders and click **'Re-Run Topic Model'** to see instant changes.")
|
| 674 |
+
|
| 675 |
+
col_slider_topic, col_slider_words, col_rerun_btn = st.columns([1, 1, 0.5])
|
| 676 |
+
|
| 677 |
+
with col_slider_topic:
|
| 678 |
+
new_num_topics = st.slider(
|
| 679 |
+
"Number of Topics",
|
| 680 |
+
min_value=2,
|
| 681 |
+
max_value=10,
|
| 682 |
+
value=st.session_state.num_topics_slider,
|
| 683 |
+
step=1,
|
| 684 |
+
key='num_topics_slider_new',
|
| 685 |
+
help="The number of topics to discover (2 to 10)."
|
| 686 |
+
)
|
| 687 |
+
with col_slider_words:
|
| 688 |
+
new_num_top_words = st.slider(
|
| 689 |
+
"Number of Top Words",
|
| 690 |
+
min_value=5,
|
| 691 |
+
max_value=20,
|
| 692 |
+
value=st.session_state.num_top_words_slider,
|
| 693 |
+
step=1,
|
| 694 |
+
key='num_top_words_slider_new',
|
| 695 |
+
help="The number of top words to display per topic (5 to 20)."
|
| 696 |
+
)
|
| 697 |
+
|
| 698 |
+
# Function to trigger a recalculation of ONLY the topic model
|
| 699 |
+
def rerun_topic_model():
|
| 700 |
+
# Update session state with the new slider values
|
| 701 |
+
st.session_state.num_topics_slider = st.session_state.num_topics_slider_new
|
| 702 |
+
st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new
|
| 703 |
+
|
| 704 |
+
# Recalculate topic modeling results
|
| 705 |
+
if not st.session_state.results_df.empty:
|
| 706 |
+
df_topic_data_new = perform_topic_modeling(
|
| 707 |
+
df_entities=st.session_state.results_df,
|
| 708 |
+
num_topics=st.session_state.num_topics_slider,
|
| 709 |
+
num_top_words=st.session_state.num_top_words_slider
|
| 710 |
+
)
|
| 711 |
+
st.session_state.topic_results = df_topic_data_new
|
| 712 |
+
st.session_state.last_num_topics = st.session_state.num_topics_slider
|
| 713 |
+
st.session_state.last_num_top_words = st.session_state.num_top_words_slider
|
| 714 |
+
st.success("Topic Model Re-Run Complete!")
|
| 715 |
+
# Rerunning Streamlit will display the updated state immediately
|
| 716 |
+
|
| 717 |
+
with col_rerun_btn:
|
| 718 |
+
st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True) # Vertical spacing
|
| 719 |
+
st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary")
|
| 720 |
+
|
| 721 |
df_topic_data = st.session_state.topic_results
|
| 722 |
+
# --- End Topic Model Slider Re-Run Logic ---
|
| 723 |
+
|
| 724 |
+
|
| 725 |
+
entity_color_map = get_dynamic_color_map(df['label'].unique().tolist(), FIXED_ENTITY_COLOR_MAP)
|
| 726 |
|
| 727 |
if df.empty:
|
| 728 |
st.warning("No entities were found in the provided text with the current label set.")
|
|
|
|
| 816 |
st.plotly_chart(fig_bar_freq, use_container_width=True)
|
| 817 |
else:
|
| 818 |
st.info("No entities were repeated enough for a Top 10 frequency chart.")
|
| 819 |
+
|
| 820 |
+
# 4. Network Graph and Topic Modeling (Modified to show controls and charts in columns)
|
| 821 |
+
|
| 822 |
col_network, col_topic = st.columns(2)
|
| 823 |
with col_network:
|
| 824 |
with st.expander("π Entity Co-occurrence Network Graph", expanded=True):
|
|
|
|
| 827 |
with st.expander("π‘ Topic Modeling (LDA)", expanded=True):
|
| 828 |
# Display the current settings used for the topic modeling result
|
| 829 |
st.markdown(f"""
|
| 830 |
+
**Current LDA Parameters:**
|
| 831 |
* Topics: **{st.session_state.last_num_topics}**
|
| 832 |
* Top Words: **{st.session_state.last_num_top_words}**
|
| 833 |
""")
|