AIEcosystem commited on
Commit
3c3543a
·
verified ·
1 Parent(s): 0fcbd69

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +83 -80
src/streamlit_app.py CHANGED
@@ -487,7 +487,7 @@ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
487
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
488
 
489
  # --- Model Loading ---
490
- @st.cache_resource
491
  def load_ner_model(labels):
492
  """Loads the GLiNER model and caches it."""
493
  try:
@@ -604,6 +604,7 @@ if run_button:
604
  # --- Topic Modeling Input Retrieval (Using default or current state values) ---
605
  # The actual sliders are only visible after results are shown, so here we use the state defaults
606
  # or the last successfully run values to check for changes and run the model.
 
607
  current_num_topics = st.session_state.num_topics_slider
608
  current_num_top_words = st.session_state.num_top_words_slider
609
 
@@ -641,8 +642,8 @@ if run_button:
641
  # 4. Perform Topic Modeling (Passing the new parameters)
642
  df_topic_data = perform_topic_modeling(
643
  df_entities=df,
644
- num_topics=current_num_topics, # NEW PARAMETER
645
- num_top_words=current_num_top_words # NEW PARAMETER
646
  )
647
 
648
  end_time = time.time()
@@ -664,65 +665,10 @@ if run_button:
664
  # --- Display Download Link and Results (Updated with White-Label inputs) ---
665
  if st.session_state.show_results:
666
  df = st.session_state.results_df
667
- # Note: Topic data needs to be re-run if the sliders change, but here we reuse the state value unless the re-run button is hit.
668
- # To fix this, we need to handle the Topic Modeling calculation separately so that changing the slider triggers a run without hitting the main 'Results' button.
669
-
670
- # --- Topic Model Slider Re-Run Logic (New Block) ---
671
- st.markdown("---")
672
- st.markdown("### 4. Advanced Analysis")
673
- st.markdown("💡 **Topic Modeling Settings:** Adjust these sliders and click **'Re-Run Topic Model'** to see instant changes.")
674
-
675
- col_slider_topic, col_slider_words, col_rerun_btn = st.columns([1, 1, 0.5])
676
-
677
- with col_slider_topic:
678
- new_num_topics = st.slider(
679
- "Number of Topics",
680
- min_value=2,
681
- max_value=10,
682
- value=st.session_state.num_topics_slider,
683
- step=1,
684
- key='num_topics_slider_new',
685
- help="The number of topics to discover (2 to 10)."
686
- )
687
- with col_slider_words:
688
- new_num_top_words = st.slider(
689
- "Number of Top Words",
690
- min_value=5,
691
- max_value=20,
692
- value=st.session_state.num_top_words_slider,
693
- step=1,
694
- key='num_top_words_slider_new',
695
- help="The number of top words to display per topic (5 to 20)."
696
- )
697
-
698
- # Function to trigger a recalculation of ONLY the topic model
699
- def rerun_topic_model():
700
- # Update session state with the new slider values
701
- st.session_state.num_topics_slider = st.session_state.num_topics_slider_new
702
- st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new
703
-
704
- # Recalculate topic modeling results
705
- if not st.session_state.results_df.empty:
706
- df_topic_data_new = perform_topic_modeling(
707
- df_entities=st.session_state.results_df,
708
- num_topics=st.session_state.num_topics_slider,
709
- num_top_words=st.session_state.num_top_words_slider
710
- )
711
- st.session_state.topic_results = df_topic_data_new
712
- st.session_state.last_num_topics = st.session_state.num_topics_slider
713
- st.session_state.last_num_top_words = st.session_state.num_top_words_slider
714
- st.success("Topic Model Re-Run Complete!")
715
- # Rerunning Streamlit will display the updated state immediately
716
-
717
- with col_rerun_btn:
718
- st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True) # Vertical spacing
719
- st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary")
720
-
721
  df_topic_data = st.session_state.topic_results
722
- # --- End Topic Model Slider Re-Run Logic ---
723
-
724
-
725
- entity_color_map = get_dynamic_color_map(df['label'].unique().tolist(), FIXED_ENTITY_COLOR_MAP)
726
 
727
  if df.empty:
728
  st.warning("No entities were found in the provided text with the current label set.")
@@ -817,25 +763,82 @@ if st.session_state.show_results:
817
  else:
818
  st.info("No entities were repeated enough for a Top 10 frequency chart.")
819
 
820
- # 4. Network Graph and Topic Modeling (Modified to show controls and charts in columns)
821
-
822
- col_network, col_topic = st.columns(2)
823
- with col_network:
824
- with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
825
- st.plotly_chart(generate_network_graph(df, st.session_state.last_text, entity_color_map), use_container_width=True)
826
- with col_topic:
827
- with st.expander("💡 Topic Modeling (LDA)", expanded=True):
828
- # Display the current settings used for the topic modeling result
829
- st.markdown(f"""
830
- **Current LDA Parameters:**
831
- * Topics: **{st.session_state.last_num_topics}**
832
- * Top Words: **{st.session_state.last_num_top_words}**
833
- """)
834
- if df_topic_data is not None and not df_topic_data.empty:
835
- st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
836
- st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
837
- else:
838
- st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
839
  # --- 5. White-Label Configuration (NEW SECTION FOR CUSTOM BRANDING) ---
840
  st.markdown("---")
841
  st.markdown("### 5. White-Label Report Configuration 🎨")
 
487
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
488
 
489
  # --- Model Loading ---
490
+ @st.cache_resourced
491
  def load_ner_model(labels):
492
  """Loads the GLiNER model and caches it."""
493
  try:
 
604
  # --- Topic Modeling Input Retrieval (Using default or current state values) ---
605
  # The actual sliders are only visible after results are shown, so here we use the state defaults
606
  # or the last successfully run values to check for changes and run the model.
607
+ # Use the key that holds the current value, which is initialized at the top level
608
  current_num_topics = st.session_state.num_topics_slider
609
  current_num_top_words = st.session_state.num_top_words_slider
610
 
 
642
  # 4. Perform Topic Modeling (Passing the new parameters)
643
  df_topic_data = perform_topic_modeling(
644
  df_entities=df,
645
+ num_topics=current_num_topics, # PARAMETER
646
+ num_top_words=current_num_top_words # PARAMETER
647
  )
648
 
649
  end_time = time.time()
 
665
  # --- Display Download Link and Results (Updated with White-Label inputs) ---
666
  if st.session_state.show_results:
667
  df = st.session_state.results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  df_topic_data = st.session_state.topic_results
669
+ # Generate the color map based on the results DF labels
670
+ current_labels_in_df = df['label'].unique().tolist()
671
+ entity_color_map = get_dynamic_color_map(current_labels_in_df, FIXED_ENTITY_COLOR_MAP)
 
672
 
673
  if df.empty:
674
  st.warning("No entities were found in the provided text with the current label set.")
 
763
  else:
764
  st.info("No entities were repeated enough for a Top 10 frequency chart.")
765
 
766
+ # 4. Advanced Analysis (REVISED STRUCTURE)
767
+ st.markdown("---")
768
+ st.markdown("### 4. Advanced Analysis")
769
+
770
+ # --- A. Network Graph Section (Alone) ---
771
+ with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
772
+ st.plotly_chart(generate_network_graph(df, st.session_state.last_text, entity_color_map), use_container_width=True)
773
+
774
+ # --- B. Topic Modeling Section (Controls and Chart inside one block) ---
775
+ st.markdown("---")
776
+ with st.container(border=True): # Use a container to visually group the Topic Modeling section
777
+ st.markdown("#### 💡 Topic Modeling (LDA) Configuration and Results")
778
+ st.markdown("Adjust the settings below and click **'Re-Run Topic Model'** to instantly update the visualization based on the extracted entities.")
779
+
780
+ col_slider_topic, col_slider_words, col_rerun_btn = st.columns([1, 1, 0.5])
781
+
782
+ with col_slider_topic:
783
+ new_num_topics = st.slider(
784
+ "Number of Topics",
785
+ min_value=2,
786
+ max_value=10,
787
+ value=st.session_state.num_topics_slider,
788
+ step=1,
789
+ key='num_topics_slider_new',
790
+ help="The number of topics to discover (2 to 10)."
791
+ )
792
+ with col_slider_words:
793
+ new_num_top_words = st.slider(
794
+ "Number of Top Words",
795
+ min_value=5,
796
+ max_value=20,
797
+ value=st.session_state.num_top_words_slider,
798
+ step=1,
799
+ key='num_top_words_slider_new',
800
+ help="The number of top words to display per topic (5 to 20)."
801
+ )
802
+
803
+ # Function to trigger a recalculation of ONLY the topic model
804
+ def rerun_topic_model():
805
+ # Update session state with the new slider values
806
+ st.session_state.num_topics_slider = st.session_state.num_topics_slider_new
807
+ st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new
808
+
809
+ # Recalculate topic modeling results
810
+ if not st.session_state.results_df.empty:
811
+ df_topic_data_new = perform_topic_modeling(
812
+ df_entities=st.session_state.results_df,
813
+ num_topics=st.session_state.num_topics_slider,
814
+ num_top_words=st.session_state.num_top_words_slider
815
+ )
816
+ st.session_state.topic_results = df_topic_data_new
817
+ st.session_state.last_num_topics = st.session_state.num_topics_slider
818
+ st.session_state.last_num_top_words = st.session_state.num_top_words_slider
819
+ # st.success("Topic Model Re-Run Complete!") # Removed success message as it causes an extra flash
820
+
821
+ with col_rerun_btn:
822
+ st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True) # Vertical spacing
823
+ # Rerun the entire app to update the chart immediately
824
+ st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary")
825
+
826
+ # Display the topic chart inside the same container
827
+ st.markdown("---")
828
+ st.markdown(f"""
829
+ **Current LDA Parameters:**
830
+ * Topics: **{st.session_state.last_num_topics}**
831
+ * Top Words: **{st.session_state.last_num_top_words}**
832
+ """)
833
+
834
+ df_topic_data = st.session_state.topic_results # Get the potentially updated results
835
+ if df_topic_data is not None and not df_topic_data.empty:
836
+ st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
837
+ st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
838
+ else:
839
+ st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
840
+
841
+
842
  # --- 5. White-Label Configuration (NEW SECTION FOR CUSTOM BRANDING) ---
843
  st.markdown("---")
844
  st.markdown("### 5. White-Label Report Configuration 🎨")