Spaces:

AIEcosystem
/

render4

Runtime error

App Files Files Community

AIEcosystem commited on Nov 7, 2025

Commit

0a622e5

verified ·

1 Parent(s): 5707233

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +99 -37

src/streamlit_app.py CHANGED Viewed

@@ -31,6 +31,7 @@ except ImportError:
 # --- Fixed Label Definitions and Mappings ---
 FIXED_LABELS = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
 FIXED_ENTITY_COLOR_MAP = {
     "person": "#10b981", # Green
     "country": "#3b82f6", # Blue
@@ -51,11 +52,8 @@ FIXED_CATEGORY_MAPPING = {
   "Numbers & Finance": ["money", "cardinal"]}
 REVERSE_FIXED_CATEGORY_MAPPING = {label: category for category, label_list in FIXED_CATEGORY_MAPPING.items() for label in label_list}
-# --- Default Custom Labels (Not used, but kept for full code compatibility) ---
-DEFAULT_CUSTOM_LABELS = "person, location, organization, product, date, time, event"
 # --- Dynamic Color Generator for Custom Labels ---
-COLOR_PALETTE = cycle(px.colors.qualitative.Alphabet)
 def extract_label(node_name):
     """Extracts the label from a node string like 'Text (Label)'."""
@@ -69,9 +67,16 @@ def remove_trailing_punctuation(text_string):
 def get_dynamic_color_map(active_labels, fixed_map):
     """Generates a color map, using fixed colors if available, otherwise dynamic colors."""
     color_map = {}
-    if active_labels == FIXED_LABELS:
         return fixed_map
     for label in active_labels:
         if label in fixed_map:
             color_map[label] = fixed_map[label]
@@ -84,8 +89,7 @@ def highlight_entities(text, df_entities, entity_color_map):
     if df_entities.empty:
         return text
-    # --- FIX: Ensure the DataFrame has a unique index before sorting/converting ---
-    # Create a copy and reset index for safety, resolving potential errors in the to_dict step.
     df_entities = df_entities.copy().reset_index(drop=True)
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
@@ -537,11 +541,11 @@ with tab2:
     st.markdown("For any errors or inquiries, please contact us at [info@your-company.com](mailto:info@your-company.com)")
 # --- Model Loading ---
-@st.cache_resource
 def load_ner_model(labels):
     """Loads the GLiNER model and caches it."""
     try:
-        # Note: Model loading is kept for fixed labels although not strictly required by the prompt
         return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
     except Exception as e:
         # print(f"FATAL ERROR: Failed to load NER model: {e}")
@@ -566,7 +570,7 @@ DEFAULT_TEXT = (
     "general public by October 1st. The goal is to deploy the **Astra** v2 platform before the next solar eclipse event in 2026.")
 # -----------------------------------
-# --- Session State Initialization (Cleaned up for Text Area Input) ---
 if 'show_results' not in st.session_state: st.session_state.show_results = False
 if 'my_text_area' not in st.session_state: st.session_state.my_text_area = DEFAULT_TEXT
 if 'last_text' not in st.session_state: st.session_state.last_text = ""
@@ -574,7 +578,8 @@ if 'results_df' not in st.session_state: st.session_state.results_df = pd.DataFr
 if 'elapsed_time' not in st.session_state: st.session_state.elapsed_time = 0.0
 if 'topic_results' not in st.session_state: st.session_state.topic_results = None
 if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
-if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False # Force Fixed Mode
 if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5
 if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10
 if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None
@@ -593,7 +598,7 @@ def clear_text():
 # --- Revised Text Area Input ---
 st.markdown("## ✍️ Text Input for Analysis")
-word_limit = 10000
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
     height=250,
@@ -603,43 +608,67 @@ text = st.text_area(
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
-# Always Fixed Mode controls
 st.markdown("---")
-st.markdown("### Analysis Mode: **Fixed Entity Labels**")
-st.info(f"The analysis will use the pre-defined fixed label set: **{', '.join(FIXED_LABELS)}**")
 col_results, col_clear = st.columns([1, 1])
 with col_results:
-    run_button = st.button("Analyze Text", key='run_results', use_container_width=True, type="primary")
 with col_clear:
     st.button("Clear text", on_click=clear_text, use_container_width=True)
-# --- Define Active Labels and Settings ---
-active_labels = FIXED_LABELS # Always fixed labels
-st.session_state.active_labels_list = active_labels
-current_num_topics = st.session_state.num_topics_slider
-current_num_top_words = st.session_state.num_top_words_slider
 # --- Results Trigger and Processing (Fixed for index error) ---
 if run_button:
     if text.strip() and word_count <= word_limit:
-        # 1. Determine Active Labels and Mode (Already determined: Fixed Mode)
         active_labels = st.session_state.active_labels_list
         # Caching Logic: Check if we need to re-run the full process
         should_rerun_full_analysis = (
             text.strip() != st.session_state.last_text.strip() or
-            active_labels != st.session_state.last_active_labels
         )
         if should_rerun_full_analysis:
             # 2. Rerunning Full Analysis
             CHUNKING_THRESHOLD = 500
             should_chunk = word_count > CHUNKING_THRESHOLD
-            mode_msg = "fixed labels"
             if should_chunk:
                 mode_msg += " with **chunking** for large text"
@@ -662,11 +691,20 @@ if run_button:
                 df = pd.DataFrame(all_entities)
                 if not df.empty:
-                    # 💥 FIX: Reset to a default, unique integer index
                     df = df.reset_index(drop=True)
-                    # Force fixed category mapping
-                    df['category'] = df['label'].map(REVERSE_FIXED_CATEGORY_MAPPING).fillna('Other')
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
@@ -691,6 +729,9 @@ if run_button:
     elif word_count > word_limit:
         st.error(f"Text too long! Please limit your input to {word_limit} words.")
         st.session_state.show_results = False
     else:
         st.warning("Please enter some text to analyze.")
         st.session_state.show_results = False
@@ -719,8 +760,7 @@ if st.session_state.show_results:
             )
         # 1. Highlighted Text placed inside an Expander
-        # Force Fixed Mode in display header
-        with st.expander(f"### 1. Analyzed Text with Highlighted Entities (Fixed Mode)", expanded=False):
              st.markdown(
                 highlight_entities(st.session_state.last_text, df, entity_color_map),
                 unsafe_allow_html=True
@@ -735,10 +775,32 @@ if st.session_state.show_results:
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
-            # This section now ONLY executes the FIXED MODE logic
-            unique_categories = list(FIXED_CATEGORY_MAPPING.keys())
-            tabs_category = st.tabs(unique_categories)
-            for category, tab in zip(unique_categories, tabs_category):
                 df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                 styled_df_category = color_score_gradient(df_category)
                 with tab:
@@ -801,7 +863,6 @@ if st.session_state.show_results:
         # --- A. Network Graph Section ---
         with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
-            # This calls the CORRECTED generate_network_graph function
             st.plotly_chart(generate_network_graph(df, st.session_state.last_text, entity_color_map), use_container_width=True)
         # --- B. Topic Modeling Section ---
@@ -871,7 +932,7 @@ if st.session_state.show_results:
         # 5. White-Label Configuration
         st.markdown("---")
         st.markdown("### 5. White-Label Report Configuration 🎨")
-        default_report_title = "Fixed Entity Analysis Report"
         custom_report_title = st.text_input(
             "Type Your Report Title (for HTML Report), and then press Enter.",
             value=default_report_title
@@ -918,4 +979,5 @@ if st.session_state.show_results:
                 file_name="ner_topic_full_report.html",
                 mime="text/html",
                 use_container_width=True
-            )

 # --- Fixed Label Definitions and Mappings ---
 FIXED_LABELS = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
+DEFAULT_CUSTOM_LABELS = "person, location, organization, product, date, time, event" # <-- REINSTATED
 FIXED_ENTITY_COLOR_MAP = {
     "person": "#10b981", # Green
     "country": "#3b82f6", # Blue
   "Numbers & Finance": ["money", "cardinal"]}
 REVERSE_FIXED_CATEGORY_MAPPING = {label: category for category, label_list in FIXED_CATEGORY_MAPPING.items() for label in label_list}
 # --- Dynamic Color Generator for Custom Labels ---
+COLOR_PALETTE = cycle(px.colors.qualitative.Alphabet + px.colors.qualitative.Bold) # Use a larger palette
 def extract_label(node_name):
     """Extracts the label from a node string like 'Text (Label)'."""
 def get_dynamic_color_map(active_labels, fixed_map):
     """Generates a color map, using fixed colors if available, otherwise dynamic colors."""
     color_map = {}
+    # If the active labels exactly match the fixed set, use the fixed map
+    if set(active_labels) == set(fixed_map.keys()):
         return fixed_map
+    # Otherwise, generate a dynamic map, prioritizing fixed colors
+    # Ensure the color palette resets for consistency across sessions
+    global COLOR_PALETTE
+    COLOR_PALETTE = cycle(px.colors.qualitative.Alphabet + px.colors.qualitative.Bold)
     for label in active_labels:
         if label in fixed_map:
             color_map[label] = fixed_map[label]
     if df_entities.empty:
         return text
+    # Ensure the DataFrame has a unique index before sorting/converting
     df_entities = df_entities.copy().reset_index(drop=True)
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     st.markdown("For any errors or inquiries, please contact us at [info@your-company.com](mailto:info@your-company.com)")
 # --- Model Loading ---
+@st.cache_resourced
 def load_ner_model(labels):
     """Loads the GLiNER model and caches it."""
     try:
+        # GLiNER model is loaded with constraints based on the active labels list
         return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
     except Exception as e:
         # print(f"FATAL ERROR: Failed to load NER model: {e}")
     "general public by October 1st. The goal is to deploy the **Astra** v2 platform before the next solar eclipse event in 2026.")
 # -----------------------------------
+# --- Session State Initialization (Custom Label Reinstatement) ---
 if 'show_results' not in st.session_state: st.session_state.show_results = False
 if 'my_text_area' not in st.session_state: st.session_state.my_text_area = DEFAULT_TEXT
 if 'last_text' not in st.session_state: st.session_state.last_text = ""
 if 'elapsed_time' not in st.session_state: st.session_state.elapsed_time = 0.0
 if 'topic_results' not in st.session_state: st.session_state.topic_results = None
 if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
+if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = "Fixed Labels" # Re-use for radio
+if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = DEFAULT_CUSTOM_LABELS
 if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5
 if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10
 if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None
 # --- Revised Text Area Input ---
 st.markdown("## ✍️ Text Input for Analysis")
+word_limit = 2000
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
     height=250,
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
+# --- Custom/Fixed Label Selector ---
 st.markdown("---")
+st.markdown("### 🏷️ Entity Label Mode Selection")
+mode = st.radio(
+    "Select Entity Recognition Mode:",
+    ["Fixed Labels", "Custom Labels"],
+    key='is_custom_mode',
+    horizontal=True,
+    help="Fixed Labels use a predefined set. Custom Labels let you define your own."
+)
+active_labels = []
+if mode == "Fixed Labels":
+    active_labels = FIXED_LABELS
+    st.info(f"Fixed Labels active: **{', '.join(active_labels)}**")
+else:
+    custom_labels_input = st.text_input(
+        "Enter your custom labels, separated by commas (e.g., product, feature, ticket_id):",
+        value=st.session_state.custom_labels_input,
+        key='custom_labels_input',
+        help="The labels must be non-empty and comma-separated."
+    )
+    # Clean and set active labels from user input
+    active_labels = [label.strip().lower() for label in custom_labels_input.split(',') if label.strip()]
+    if not active_labels:
+        st.error("Please enter at least one custom label.")
+        active_labels = [] # Prevents model run if empty
+    else:
+        st.info(f"Custom Labels active: **{', '.join(active_labels)}**")
+st.session_state.active_labels_list = active_labels
+current_num_topics = st.session_state.num_topics_slider
+current_num_top_words = st.session_state.num_top_words_slider
+# --- Buttons ---
 col_results, col_clear = st.columns([1, 1])
 with col_results:
+    run_button = st.button("Analyze Text", key='run_results', use_container_width=True, type="primary", disabled=not active_labels)
 with col_clear:
     st.button("Clear text", on_click=clear_text, use_container_width=True)
 # --- Results Trigger and Processing (Fixed for index error) ---
 if run_button:
     if text.strip() and word_count <= word_limit:
+        # 1. Determine Active Labels (Already done above, just referencing)
         active_labels = st.session_state.active_labels_list
         # Caching Logic: Check if we need to re-run the full process
         should_rerun_full_analysis = (
             text.strip() != st.session_state.last_text.strip() or
+            set(active_labels) != set(st.session_state.last_active_labels if st.session_state.last_active_labels else [])
         )
         if should_rerun_full_analysis:
             # 2. Rerunning Full Analysis
             CHUNKING_THRESHOLD = 500
             should_chunk = word_count > CHUNKING_THRESHOLD
+            mode_msg = "custom labels" if mode == "Custom Labels" else "fixed labels"
             if should_chunk:
                 mode_msg += " with **chunking** for large text"
                 df = pd.DataFrame(all_entities)
                 if not df.empty:
                     df = df.reset_index(drop=True)
+                    # --- CATEGORY MAPPING ADJUSTMENT ---
+                    # Assign fixed labels to their categories, and custom labels to 'User Defined'
+                    def map_category(label):
+                        if label in REVERSE_FIXED_CATEGORY_MAPPING:
+                            return REVERSE_FIXED_CATEGORY_MAPPING[label]
+                        elif label in active_labels and label not in FIXED_LABELS:
+                             # This handles any truly custom labels entered by the user
+                            return 'User Defined Entities'
+                        else:
+                            return 'Other'
+                    df['category'] = df['label'].apply(map_category)
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
     elif word_count > word_limit:
         st.error(f"Text too long! Please limit your input to {word_limit} words.")
         st.session_state.show_results = False
+    elif not active_labels:
+        st.error("Please ensure your custom label input is not empty.")
+        st.session_state.show_results = False
     else:
         st.warning("Please enter some text to analyze.")
         st.session_state.show_results = False
             )
         # 1. Highlighted Text placed inside an Expander
+        with st.expander(f"### 1. Analyzed Text with Highlighted Entities ({mode} Mode)", expanded=False):
              st.markdown(
                 highlight_entities(st.session_state.last_text, df, entity_color_map),
                 unsafe_allow_html=True
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
+            # Get all unique categories present in the data (Fixed + User Defined)
+            unique_categories = list(df['category'].unique())
+            # Ensure fixed categories appear first if present, followed by custom/other
+            ordered_categories = []
+            # Add fixed categories in defined order
+            for fixed_cat in FIXED_CATEGORY_MAPPING.keys():
+                if fixed_cat in unique_categories:
+                    ordered_categories.append(fixed_cat)
+                    unique_categories.remove(fixed_cat)
+            # Add User Defined and Other at the end
+            if 'User Defined Entities' in unique_categories:
+                ordered_categories.append('User Defined Entities')
+                unique_categories.remove('User Defined Entities')
+            if 'Other' in unique_categories:
+                ordered_categories.append('Other')
+                unique_categories.remove('Other')
+            # Add any remaining categories (shouldn't happen with map_category, but for safety)
+            ordered_categories.extend(unique_categories)
+            tabs_category = st.tabs(ordered_categories)
+            for category, tab in zip(ordered_categories, tabs_category):
                 df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                 styled_df_category = color_score_gradient(df_category)
                 with tab:
         # --- A. Network Graph Section ---
         with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
             st.plotly_chart(generate_network_graph(df, st.session_state.last_text, entity_color_map), use_container_width=True)
         # --- B. Topic Modeling Section ---
         # 5. White-Label Configuration
         st.markdown("---")
         st.markdown("### 5. White-Label Report Configuration 🎨")
+        default_report_title = "Fixed Entity Analysis Report" if mode == "Fixed Labels" else "Custom Entity Analysis Report"
         custom_report_title = st.text_input(
             "Type Your Report Title (for HTML Report), and then press Enter.",
             value=default_report_title
                 file_name="ner_topic_full_report.html",
                 mime="text/html",
                 use_container_width=True
+            )