AIEcosystem commited on
Commit
934327e
Β·
verified Β·
1 Parent(s): 0c92e3b

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +73 -32
src/streamlit_app.py CHANGED
@@ -453,28 +453,8 @@ st.markdown(
453
  """,
454
  unsafe_allow_html=True)
455
 
456
- # --- Sidebar Inputs for Topic Modeling (NEW) ---
457
- st.sidebar.header("Topic Modeling Settings πŸ’‘")
458
- num_topics_input = st.sidebar.slider(
459
- "Number of Topics",
460
- min_value=2,
461
- max_value=10,
462
- value=5,
463
- step=1,
464
- key='num_topics_slider',
465
- help="The number of underlying topics (clusters) to discover in the entity data (LDA)."
466
- )
467
- num_top_words_input = st.sidebar.slider(
468
- "Number of Top Words per Topic",
469
- min_value=5,
470
- max_value=20,
471
- value=10,
472
- step=1,
473
- key='num_top_words_slider',
474
- help="The number of most important words to display for each topic."
475
- )
476
- st.sidebar.markdown("---")
477
- # -----------------------------------------------
478
 
479
  st.subheader("Entity and Topic Analysis Report Generator", divider="blue") # Changed divider from "rainbow" (often includes red/pink) to "blue"
480
  # Removed st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary") for white-labeling
@@ -507,7 +487,7 @@ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
507
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
508
 
509
  # --- Model Loading ---
510
- @st.cache_resource
511
  def load_ner_model(labels):
512
  """Loads the GLiNER model and caches it."""
513
  try:
@@ -545,6 +525,11 @@ if 'my_text_area' not in st.session_state: st.session_state.my_text_area = DEFAU
545
  if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
546
  if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
547
  if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
 
 
 
 
 
548
 
549
  # --- Clear Button Function (MODIFIED) ---
550
  def clear_text():
@@ -616,8 +601,9 @@ if run_button:
616
  if should_chunk:
617
  mode_msg += " with **chunking** for large text"
618
 
619
- # --- Topic Modeling Input Retrieval ---
620
- # Get the current slider values
 
621
  current_num_topics = st.session_state.num_topics_slider
622
  current_num_top_words = st.session_state.num_top_words_slider
623
 
@@ -678,10 +664,65 @@ if run_button:
678
  # --- Display Download Link and Results (Updated with White-Label inputs) ---
679
  if st.session_state.show_results:
680
  df = st.session_state.results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  df_topic_data = st.session_state.topic_results
682
- # Generate the color map based on the results DF labels
683
- current_labels_in_df = df['label'].unique().tolist()
684
- entity_color_map = get_dynamic_color_map(current_labels_in_df, FIXED_ENTITY_COLOR_MAP)
 
685
 
686
  if df.empty:
687
  st.warning("No entities were found in the provided text with the current label set.")
@@ -775,9 +816,9 @@ if st.session_state.show_results:
775
  st.plotly_chart(fig_bar_freq, use_container_width=True)
776
  else:
777
  st.info("No entities were repeated enough for a Top 10 frequency chart.")
778
- # 4. Network Graph and Topic Modeling
779
- st.markdown("---")
780
- st.markdown("### 4. Advanced Analysis")
781
  col_network, col_topic = st.columns(2)
782
  with col_network:
783
  with st.expander("πŸ”— Entity Co-occurrence Network Graph", expanded=True):
@@ -786,7 +827,7 @@ if st.session_state.show_results:
786
  with st.expander("πŸ’‘ Topic Modeling (LDA)", expanded=True):
787
  # Display the current settings used for the topic modeling result
788
  st.markdown(f"""
789
- **LDA Parameters:**
790
  * Topics: **{st.session_state.last_num_topics}**
791
  * Top Words: **{st.session_state.last_num_top_words}**
792
  """)
 
453
  """,
454
  unsafe_allow_html=True)
455
 
456
+ # --- Topic Modeling Settings (Moved to main body, but need to initialize key outside of 'if st.session_state.show_results:') ---
457
+ # st.sidebar.header("Topic Modeling Settings πŸ’‘") # Removed sidebar header
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
 
459
  st.subheader("Entity and Topic Analysis Report Generator", divider="blue") # Changed divider from "rainbow" (often includes red/pink) to "blue"
460
  # Removed st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary") for white-labeling
 
487
  comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
488
 
489
  # --- Model Loading ---
490
+ @st.cache_resourced
491
  def load_ner_model(labels):
492
  """Loads the GLiNER model and caches it."""
493
  try:
 
525
  if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
526
  if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
527
  if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
528
+ # Initialize Topic Model settings in state, so they can be set even if not using the sidebar
529
+ if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5
530
+ if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10
531
+ if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None
532
+ if 'last_num_top_words' not in st.session_state: st.session_state.last_num_top_words = None
533
 
534
  # --- Clear Button Function (MODIFIED) ---
535
  def clear_text():
 
601
  if should_chunk:
602
  mode_msg += " with **chunking** for large text"
603
 
604
+ # --- Topic Modeling Input Retrieval (Using default or current state values) ---
605
+ # The actual sliders are only visible after results are shown, so here we use the state defaults
606
+ # or the last successfully run values to check for changes and run the model.
607
  current_num_topics = st.session_state.num_topics_slider
608
  current_num_top_words = st.session_state.num_top_words_slider
609
 
 
664
  # --- Display Download Link and Results (Updated with White-Label inputs) ---
665
  if st.session_state.show_results:
666
  df = st.session_state.results_df
667
+ # Note: Topic data needs to be re-run if the sliders change, but here we reuse the state value unless the re-run button is hit.
668
+ # To fix this, we need to handle the Topic Modeling calculation separately so that changing the slider triggers a run without hitting the main 'Results' button.
669
+
670
+ # --- Topic Model Slider Re-Run Logic (New Block) ---
671
+ st.markdown("---")
672
+ st.markdown("### 4. Advanced Analysis")
673
+ st.markdown("πŸ’‘ **Topic Modeling Settings:** Adjust these sliders and click **'Re-Run Topic Model'** to see instant changes.")
674
+
675
+ col_slider_topic, col_slider_words, col_rerun_btn = st.columns([1, 1, 0.5])
676
+
677
+ with col_slider_topic:
678
+ new_num_topics = st.slider(
679
+ "Number of Topics",
680
+ min_value=2,
681
+ max_value=10,
682
+ value=st.session_state.num_topics_slider,
683
+ step=1,
684
+ key='num_topics_slider_new',
685
+ help="The number of topics to discover (2 to 10)."
686
+ )
687
+ with col_slider_words:
688
+ new_num_top_words = st.slider(
689
+ "Number of Top Words",
690
+ min_value=5,
691
+ max_value=20,
692
+ value=st.session_state.num_top_words_slider,
693
+ step=1,
694
+ key='num_top_words_slider_new',
695
+ help="The number of top words to display per topic (5 to 20)."
696
+ )
697
+
698
+ # Function to trigger a recalculation of ONLY the topic model
699
+ def rerun_topic_model():
700
+ # Update session state with the new slider values
701
+ st.session_state.num_topics_slider = st.session_state.num_topics_slider_new
702
+ st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new
703
+
704
+ # Recalculate topic modeling results
705
+ if not st.session_state.results_df.empty:
706
+ df_topic_data_new = perform_topic_modeling(
707
+ df_entities=st.session_state.results_df,
708
+ num_topics=st.session_state.num_topics_slider,
709
+ num_top_words=st.session_state.num_top_words_slider
710
+ )
711
+ st.session_state.topic_results = df_topic_data_new
712
+ st.session_state.last_num_topics = st.session_state.num_topics_slider
713
+ st.session_state.last_num_top_words = st.session_state.num_top_words_slider
714
+ st.success("Topic Model Re-Run Complete!")
715
+ # Rerunning Streamlit will display the updated state immediately
716
+
717
+ with col_rerun_btn:
718
+ st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True) # Vertical spacing
719
+ st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary")
720
+
721
  df_topic_data = st.session_state.topic_results
722
+ # --- End Topic Model Slider Re-Run Logic ---
723
+
724
+
725
+ entity_color_map = get_dynamic_color_map(df['label'].unique().tolist(), FIXED_ENTITY_COLOR_MAP)
726
 
727
  if df.empty:
728
  st.warning("No entities were found in the provided text with the current label set.")
 
816
  st.plotly_chart(fig_bar_freq, use_container_width=True)
817
  else:
818
  st.info("No entities were repeated enough for a Top 10 frequency chart.")
819
+
820
+ # 4. Network Graph and Topic Modeling (Modified to show controls and charts in columns)
821
+
822
  col_network, col_topic = st.columns(2)
823
  with col_network:
824
  with st.expander("πŸ”— Entity Co-occurrence Network Graph", expanded=True):
 
827
  with st.expander("πŸ’‘ Topic Modeling (LDA)", expanded=True):
828
  # Display the current settings used for the topic modeling result
829
  st.markdown(f"""
830
+ **Current LDA Parameters:**
831
  * Topics: **{st.session_state.last_num_topics}**
832
  * Top Words: **{st.session_state.last_num_top_words}**
833
  """)