Spaces:

AIEcosystem
/

render4

Runtime error

App Files Files Community

AIEcosystem commited on Nov 5, 2025

Commit

934327e

verified ·

1 Parent(s): 0c92e3b

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +73 -32

src/streamlit_app.py CHANGED Viewed

@@ -453,28 +453,8 @@ st.markdown(
     """,
     unsafe_allow_html=True)
-# --- Sidebar Inputs for Topic Modeling (NEW) ---
-st.sidebar.header("Topic Modeling Settings 💡")
-num_topics_input = st.sidebar.slider(
-    "Number of Topics",
-    min_value=2,
-    max_value=10,
-    value=5,
-    step=1,
-    key='num_topics_slider',
-    help="The number of underlying topics (clusters) to discover in the entity data (LDA)."
-)
-num_top_words_input = st.sidebar.slider(
-    "Number of Top Words per Topic",
-    min_value=5,
-    max_value=20,
-    value=10,
-    step=1,
-    key='num_top_words_slider',
-    help="The number of most important words to display for each topic."
-)
-st.sidebar.markdown("---")
-# -----------------------------------------------
 st.subheader("Entity and Topic Analysis Report Generator", divider="blue") # Changed divider from "rainbow" (often includes red/pink) to "blue"
 # Removed st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary") for white-labeling
@@ -507,7 +487,7 @@ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
-@st.cache_resource
 def load_ner_model(labels):
     """Loads the GLiNER model and caches it."""
     try:
@@ -545,6 +525,11 @@ if 'my_text_area' not in st.session_state: st.session_state.my_text_area = DEFAU
 if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
 if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
 if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
 # --- Clear Button Function (MODIFIED) ---
 def clear_text():
@@ -616,8 +601,9 @@ if run_button:
         if should_chunk:
             mode_msg += " with **chunking** for large text"
-        # --- Topic Modeling Input Retrieval ---
-        # Get the current slider values
         current_num_topics = st.session_state.num_topics_slider
         current_num_top_words = st.session_state.num_top_words_slider
@@ -678,10 +664,65 @@ if run_button:
 # --- Display Download Link and Results (Updated with White-Label inputs) ---
 if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
-    # Generate the color map based on the results DF labels
-    current_labels_in_df = df['label'].unique().tolist()
-    entity_color_map = get_dynamic_color_map(current_labels_in_df, FIXED_ENTITY_COLOR_MAP)
     if df.empty:
         st.warning("No entities were found in the provided text with the current label set.")
@@ -775,9 +816,9 @@ if st.session_state.show_results:
                 st.plotly_chart(fig_bar_freq, use_container_width=True)
             else:
                 st.info("No entities were repeated enough for a Top 10 frequency chart.")
-        # 4. Network Graph and Topic Modeling
-        st.markdown("---")
-        st.markdown("### 4. Advanced Analysis")
         col_network, col_topic = st.columns(2)
         with col_network:
             with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
@@ -786,7 +827,7 @@ if st.session_state.show_results:
             with st.expander("💡 Topic Modeling (LDA)", expanded=True):
                 # Display the current settings used for the topic modeling result
                 st.markdown(f"""
-                **LDA Parameters:**
                 * Topics: **{st.session_state.last_num_topics}**
                 * Top Words: **{st.session_state.last_num_top_words}**
                 """)

     """,
     unsafe_allow_html=True)
+# --- Topic Modeling Settings (Moved to main body, but need to initialize key outside of 'if st.session_state.show_results:') ---
+# st.sidebar.header("Topic Modeling Settings 💡") # Removed sidebar header
 st.subheader("Entity and Topic Analysis Report Generator", divider="blue") # Changed divider from "rainbow" (often includes red/pink) to "blue"
 # Removed st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary") for white-labeling
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
+@st.cache_resourced
 def load_ner_model(labels):
     """Loads the GLiNER model and caches it."""
     try:
 if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
 if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
 if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
+# Initialize Topic Model settings in state, so they can be set even if not using the sidebar
+if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5
+if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10
+if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None
+if 'last_num_top_words' not in st.session_state: st.session_state.last_num_top_words = None
 # --- Clear Button Function (MODIFIED) ---
 def clear_text():
         if should_chunk:
             mode_msg += " with **chunking** for large text"
+        # --- Topic Modeling Input Retrieval (Using default or current state values) ---
+        # The actual sliders are only visible after results are shown, so here we use the state defaults
+        # or the last successfully run values to check for changes and run the model.
         current_num_topics = st.session_state.num_topics_slider
         current_num_top_words = st.session_state.num_top_words_slider
 # --- Display Download Link and Results (Updated with White-Label inputs) ---
 if st.session_state.show_results:
     df = st.session_state.results_df
+    # Note: Topic data needs to be re-run if the sliders change, but here we reuse the state value unless the re-run button is hit.
+    # To fix this, we need to handle the Topic Modeling calculation separately so that changing the slider triggers a run without hitting the main 'Results' button.
+    # --- Topic Model Slider Re-Run Logic (New Block) ---
+    st.markdown("---")
+    st.markdown("### 4. Advanced Analysis")
+    st.markdown("💡 **Topic Modeling Settings:** Adjust these sliders and click **'Re-Run Topic Model'** to see instant changes.")
+    col_slider_topic, col_slider_words, col_rerun_btn = st.columns([1, 1, 0.5])
+    with col_slider_topic:
+        new_num_topics = st.slider(
+            "Number of Topics",
+            min_value=2,
+            max_value=10,
+            value=st.session_state.num_topics_slider,
+            step=1,
+            key='num_topics_slider_new',
+            help="The number of topics to discover (2 to 10)."
+        )
+    with col_slider_words:
+        new_num_top_words = st.slider(
+            "Number of Top Words",
+            min_value=5,
+            max_value=20,
+            value=st.session_state.num_top_words_slider,
+            step=1,
+            key='num_top_words_slider_new',
+            help="The number of top words to display per topic (5 to 20)."
+        )
+    # Function to trigger a recalculation of ONLY the topic model
+    def rerun_topic_model():
+        # Update session state with the new slider values
+        st.session_state.num_topics_slider = st.session_state.num_topics_slider_new
+        st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new
+        # Recalculate topic modeling results
+        if not st.session_state.results_df.empty:
+             df_topic_data_new = perform_topic_modeling(
+                df_entities=st.session_state.results_df,
+                num_topics=st.session_state.num_topics_slider,
+                num_top_words=st.session_state.num_top_words_slider
+            )
+             st.session_state.topic_results = df_topic_data_new
+             st.session_state.last_num_topics = st.session_state.num_topics_slider
+             st.session_state.last_num_top_words = st.session_state.num_top_words_slider
+        st.success("Topic Model Re-Run Complete!")
+        # Rerunning Streamlit will display the updated state immediately
+    with col_rerun_btn:
+        st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True) # Vertical spacing
+        st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary")
     df_topic_data = st.session_state.topic_results
+    # --- End Topic Model Slider Re-Run Logic ---
+    entity_color_map = get_dynamic_color_map(df['label'].unique().tolist(), FIXED_ENTITY_COLOR_MAP)
     if df.empty:
         st.warning("No entities were found in the provided text with the current label set.")
                 st.plotly_chart(fig_bar_freq, use_container_width=True)
             else:
                 st.info("No entities were repeated enough for a Top 10 frequency chart.")
+        # 4. Network Graph and Topic Modeling (Modified to show controls and charts in columns)
         col_network, col_topic = st.columns(2)
         with col_network:
             with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
             with st.expander("💡 Topic Modeling (LDA)", expanded=True):
                 # Display the current settings used for the topic modeling result
                 st.markdown(f"""
+                **Current LDA Parameters:**
                 * Topics: **{st.session_state.last_num_topics}**
                 * Top Words: **{st.session_state.last_num_top_words}**
                 """)