Spaces:

AIEcosystem
/

render4

Runtime error

App Files Files Community

AIEcosystem commited on Nov 6, 2025

Commit

0c28caf

verified ·

1 Parent(s): ce1b83d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +149 -206

src/streamlit_app.py CHANGED Viewed

@@ -12,18 +12,14 @@ import re
 import string
 import json
 from itertools import cycle
-# --- PPTX Imports (Note: pptx must be installed via 'pip install python-pptx') ---
 from io import BytesIO
 import plotly.io as pio
-# ---------------------------
-# --- Stable Scikit-learn LDA Imports ---
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
-# ------------------------------
 from gliner import GLiNER
 from streamlit_extras.stylable_container import stylable_container
-# Using a try/except for comet_ml import
 try:
     from comet_ml import Experiment
 except ImportError:
@@ -33,10 +29,7 @@ except ImportError:
         def log_table(self, *args): pass
         def end(self): pass
-# --- Model Home Directory (Fix for deployment environments) ---
-os.environ['HF_HOME'] = '/tmp'
-# --- Fixed Label Definitions and Mappings (Used as Fallback) ---
 FIXED_LABELS = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
 FIXED_ENTITY_COLOR_MAP = {
     "person": "#10b981", # Green
@@ -59,7 +52,6 @@ FIXED_CATEGORY_MAPPING = {
 REVERSE_FIXED_CATEGORY_MAPPING = {label: category for category, label_list in FIXED_CATEGORY_MAPPING.items() for label in label_list}
 # --- Dynamic Color Generator for Custom Labels ---
-# Use Plotly's Alphabet set for a large pool of distinct colors
 COLOR_PALETTE = cycle(px.colors.qualitative.Alphabet)
 def extract_label(node_name):
@@ -74,86 +66,88 @@ def remove_trailing_punctuation(text_string):
 def get_dynamic_color_map(active_labels, fixed_map):
     """Generates a color map, using fixed colors if available, otherwise dynamic colors."""
     color_map = {}
-    # If using fixed labels, use the fixed map directly
     if active_labels == FIXED_LABELS:
         return fixed_map
-    # If using custom labels, generate colors
     for label in active_labels:
-        # Prioritize fixed color if the custom label happens to match a fixed one
         if label in fixed_map:
             color_map[label] = fixed_map[label]
         else:
-            # Generate a new color from the palette
             color_map[label] = next(COLOR_PALETTE)
     return color_map
 def highlight_entities(text, df_entities, entity_color_map):
-    """
-    Generates HTML to display text with entities highlighted and colored.
-    IMPORTANT: Assumes 'start' and 'end' are relative to the 'text' input.
-    """
     if df_entities.empty:
         return text
-    # Sort entities by start index descending to insert highlights without affecting subsequent indices
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
-        # Ensure the entity indices are within the bounds of the full text
         start = max(0, entity['start'])
         end = min(len(text), entity['end'])
-        # Get entity text from the full document based on its indices
-        # The 'text' column in the dataframe is now an attribute of the chunked text, not the original span
         entity_text_from_full_doc = text[start:end]
         label = entity['label']
         color = entity_color_map.get(label, '#000000')
-        # Create a span with background color and tooltip
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text_from_full_doc}</span>'
-        # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
-    # Use a div to mimic the Streamlit input box style for the report
     return f'<div style="border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
 def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     """Performs basic Topic Modeling using LDA."""
     documents = df_entities['text'].unique().tolist()
-    # Topic modeling is usually more effective with full sentences/paragraphs,
-    # but here we use the extracted entity texts as per the original code's intent.
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
         tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english', ngram_range=(1, 3))
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
         if len(tfidf_feature_names) < num_topics:
             tfidf_vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', ngram_range=(1, 3))
             tfidf = tfidf_vectorizer.fit_transform(documents)
             tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
             if len(tfidf_feature_names) < num_topics:
                  return None
         lda = LatentDirichletAllocation(n_components=num_topics, max_iter=5, learning_method='online', random_state=42, n_jobs=-1)
         lda.fit(tfidf)
         topic_data_list = []
         for topic_idx, topic in enumerate(lda.components_):
             top_words_indices = topic.argsort()[:-N - 1:-1]
             top_words = [tfidf_feature_names[i] for i in top_words_indices]
             word_weights = [topic[i] for i in top_words_indices]
             for word, weight in zip(top_words, word_weights):
                  topic_data_list.append({
                      'Topic_ID': f'Topic #{topic_idx + 1}',
                      'Word': word,
                      'Weight': weight,
                  })
         return pd.DataFrame(topic_data_list)
     except Exception as e:
         return None
 def create_topic_word_bubbles(df_topic_data):
     """Generates a Plotly Bubble Chart for top words across all topics."""
     df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic','Word': 'word', 'Weight': 'weight'})
     df_topic_data['x_pos'] = df_topic_data.index
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         df_topic_data,
         x='x_pos', y='weight', size='weight', color='topic', text='word', hover_name='word', size_max=40,
@@ -183,8 +177,10 @@ def generate_network_graph(df, raw_text, entity_color_map):
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
@@ -192,32 +188,36 @@ def generate_network_graph(df, raw_text, entity_color_map):
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
     edges = set()
-    # Simple sentence tokenizer
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
-            # Note: This is an inexact but fast co-occurrence check
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         unique_entities_in_sentence = list(set(entities_in_sentence))
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
                 node2 = unique_entities_in_sentence[j]
                 edge_tuple = tuple(sorted((node1, node2)))
                 edges.add(edge_tuple)
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
             edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None])
             edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None])
     fig = go.Figure()
     edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines', name='Co-occurrence Edges', showlegend=False)
     fig.add_trace(edge_trace)
     fig.add_trace(go.Scatter(
         x=unique_entities['x'], y=unique_entities['y'], mode='markers+text', name='Entities', text=unique_entities['text'], textposition="top center", showlegend=False,
         marker=dict(
@@ -229,6 +229,7 @@ def generate_network_graph(df, raw_text, entity_color_map):
         customdata=unique_entities[['label', 'score', 'frequency']],
         hovertemplate=("<b>%{text}</b><br>Label: %{customdata[0]}<br>Score: %{customdata[1]:.2f}<br>Frequency: %{customdata[2]}<extra></extra>")
     ))
     legend_traces = []
     seen_labels = set()
     for index, row in unique_entities.iterrows():
@@ -237,8 +238,10 @@ def generate_network_graph(df, raw_text, entity_color_map):
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(size=10, color=color), name=f"{label.capitalize()}", showlegend=True))
     for trace in legend_traces:
         fig.add_trace(trace)
     fig.update_layout(
         title='Entity Co-occurrence Network (Edges = Same Sentence)',
         showlegend=True, hovermode='closest',
@@ -257,17 +260,13 @@ def generate_entity_csv(df):
     csv_buffer.seek(0)
     return csv_buffer
-# -----------------------------------
-# --- HTML REPORT GENERATION FUNCTION (MODIFIED FOR WHITE-LABEL) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_color_map, report_title="Entity and Topic Analysis Report", branding_html=""):
     """
     Generates a full HTML report containing all analysis results and visualizations.
-    Accepts report_title and branding_html for white-labeling.
     """
-    # Use the category values from the DataFrame to ensure the report matches the app's current mode (fixed or custom)
-    unique_categories = df['category'].unique()
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     fig_treemap = px.treemap(
         df,
@@ -303,10 +302,11 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
         bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
-    # 1e. Network Graph HTML - IMPORTANT: Pass color map
     network_fig = generate_network_graph(df, text_input, entity_color_map)
     network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
     topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
     if df_topic_data is not None and not df_topic_data.empty:
         bubble_figure = create_topic_word_bubbles(df_topic_data)
@@ -315,12 +315,12 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
         else:
             topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
     else:
-        topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #888888;">' # Changed border color
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
         topic_charts_html += '</div>'
-    # 2. Get Highlighted Text - IMPORTANT: Pass color map
     highlighted_text_html = highlight_entities(text_input, df, entity_color_map).replace("div style", "div class='highlighted-text' style")
     # 3. Entity Tables (Pandas to HTML)
@@ -329,7 +329,7 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
         index=False
     )
-    # 4. Construct the Final HTML (UPDATED FOR WHITE-LABELING)
     html_content = f"""<!DOCTYPE html><html lang="en"><head>
         <meta charset="UTF-8">
         <meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -384,62 +384,59 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
 def chunk_text(text, max_chunk_size=1500):
     """Splits text into chunks by sentence/paragraph, respecting a max size (by character count)."""
-    # Split by double newline (paragraph) or sentence-like separators
     segments = re.split(r'(\n\n|(?<=[.!?])\s+)', text)
     chunks = []
     current_chunk = ""
     current_offset = 0
     for segment in segments:
         if not segment: continue
         if len(current_chunk) + len(segment) > max_chunk_size and current_chunk:
-            # Save the current chunk and its starting offset
             chunks.append((current_chunk, current_offset))
             current_offset += len(current_chunk)
             current_chunk = segment
         else:
             current_chunk += segment
     if current_chunk:
         chunks.append((current_chunk, current_offset))
     return chunks
 def process_chunked_text(text, labels, model):
     """Processes large text in chunks and aggregates/offsets the entities."""
-    # GLiNER model context size can be around 1024-1500 tokens/words. We use a generous char limit.
-    # The word count limit is 10000, but we chunk around 500 words for safety/performance.
     MAX_CHUNK_CHARS = 3500
     chunks = chunk_text(text, max_chunk_size=MAX_CHUNK_CHARS)
     all_entities = []
     for chunk_text, chunk_offset in chunks:
-        # Predict entities on the small chunk
         chunk_entities = model.predict_entities(chunk_text, labels)
-        # Offset the start and end indices to match the original document
         for entity in chunk_entities:
             entity['start'] += chunk_offset
             entity['end'] += chunk_offset
             all_entities.append(entity)
     return all_entities
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
-# --- Conditional Mobile Warning ---
 st.markdown(
     """
     <style>
     /* FIX: Aggressive theme override to ensure visibility */
     body {
-        background-color: #f0f2f6 !important; /* Force a light background */
-        color: #333333 !important; /* Force dark text */
     }
-    /* Ensure main Streamlit container background is also light */
     [data-testid="stAppViewBlock"] {
         background-color: #ffffff !important;
     }
-    /* CSS Media Query: Only show the content inside this selector when the screen width is 600px or less (typical mobile size) */
     @media (max-width: 600px) {
         #mobile-warning-container {
-            display: block; /* Show the warning container */
-            background-color: #ffcccc; /* Light red/pink background */
-            color: #cc0000; /* Dark red text */
             padding: 10px;
             border-radius: 5px;
             text-align: center;
@@ -448,27 +445,23 @@ st.markdown(
             border: 1px solid #cc0000;
         }
     }
-    /* Hide the content by default (for larger screens) */
     @media (min-width: 601px) {
         #mobile-warning-container {
-            display: none; /* Hide the warning container on desktop */
         }
     }
-    /* --- FIX: Tab Label Colors for Visibility --- */
     [data-testid="stConfigurableTabs"] button {
-        color: #333333 !important; /* Dark gray for inactive tabs */
-        background-color: #f0f0f0; /* Light gray background for inactive tabs */
         border: 1px solid #cccccc;
     }
-    /* Target the ACTIVE tab label */
     [data-testid="stConfigurableTabs"] button[aria-selected="true"] {
-        color: #FFFFFF !important; /* White text for active tab */
-        background-color: #007bff; /* Blue background for active tab */
-        border-bottom: 2px solid #007bff; /* Optional: adds an accent line */
     }
-    /* Expander header color fix (since you overwrote it to white) */
     .streamlit-expanderHeader {
-        color: #007bff; /* Blue text for Expander header */
     }
     </style>
     <div id="mobile-warning-container">
@@ -477,8 +470,7 @@ st.markdown(
     """,
     unsafe_allow_html=True)
-# --- Topic Modeling Settings (Moved to main body, but need to initialize key outside of 'if st.session_state.show_results:') ---
-st.subheader("Entity and Topic Analysis Report Generator", divider="blue") # Changed divider from "rainbow" (often includes red/pink) to "blue")
 tab1, tab2 = st.tabs(["Embed", "Important Notes"])
 with tab1:
@@ -502,28 +494,20 @@ with tab2:
     **Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
     **How to Use:** Type or paste your text into the text area below, then click the 'Results' button.
     """)
-    st.markdown("For any errors or inquiries, please contact us at [info@your-company.com](mailto:info@your-company.com)") # Updated contact info
-# --- Comet ML Setup (Placeholder/Conditional) ---
-COMET_API_KEY = os.environ.get("COMET_API_KEY")
-COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
-COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
-comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
 @st.cache_resource
 def load_ner_model(labels):
     """Loads the GLiNER model and caches it."""
     try:
-        # The model requires constraints (labels) to be passed during loading
         return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
     except Exception as e:
-        # Log the actual error to the console for debugging
         print(f"FATAL ERROR: Failed to load NER model: {e}")
         st.error(f"Failed to load NER model. This may be due to a dependency issue or resource limits: {e}")
         st.stop()
-# --- LONG DEFAULT TEXT (178 Words) ---
 DEFAULT_TEXT = (
     "In June 2024, the founder, Dr. Emily Carter, officially announced a new, expansive partnership between "
     "TechSolutions Inc. and the European Space Agency (ESA). This strategic alliance represents a significant "
@@ -541,7 +525,7 @@ DEFAULT_TEXT = (
     "general public by October 1st. The goal is to deploy the **Astra** v2 platform before the next solar eclipse event in 2026.")
 # -----------------------------------
-# --- Session State Initialization (CRITICAL FIX) ---
 if 'show_results' not in st.session_state: st.session_state.show_results = False
 if 'last_text' not in st.session_state: st.session_state.last_text = ""
 if 'results_df' not in st.session_state: st.session_state.results_df = pd.DataFrame()
@@ -551,13 +535,11 @@ if 'my_text_area' not in st.session_state: st.session_state.my_text_area = DEFAU
 if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
 if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
 if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
-# Initialize Topic Model settings in state, so they can be set even if not using the sidebar
 if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5
 if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10
 if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None
 if 'last_num_top_words' not in st.session_state: st.session_state.last_num_top_words = None
-if 'last_active_labels' not in st.session_state: st.session_state.last_active_labels = None # Added for results comparison
 def clear_text():
     """Clears the text area (sets it to an empty string) and hides results."""
@@ -569,7 +551,7 @@ def clear_text():
     st.session_state.topic_results = None
 # --- Text Input and Clear Button ---
-word_limit = 10000 # Updated to 10000
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
     height=250,
@@ -583,25 +565,22 @@ custom_labels_text = st.text_area(
     "**Optional:** Enter your own comma-separated entity labels here (e.g., `product, symptom, client_id`). Leave blank for default labels.",
     height=60,
     key='custom_labels_input',
-    placeholder="e.g., product, symptom, client_id" # Show placeholder after the prompt
 )
-# Use columns to align the buttons neatly
 col_results, col_clear = st.columns([1, 1])
 with col_results:
     run_button = st.button("Results", key='run_results', use_container_width=True)
 with col_clear:
     st.button("Clear text", on_click=clear_text, use_container_width=True)
-# --- Results Trigger and Processing (Completed Logic with Chunking and Topic Vars) ---
 if run_button:
     # 1. Determine Active Labels and Mode
     custom_labels_raw = st.session_state.custom_labels_input
     if custom_labels_raw.strip():
-        # Sanitize and parse custom labels
         custom_labels_list = [label.strip().lower() for label in custom_labels_raw.split(',') if label.strip()]
         if not custom_labels_list:
-            # Fallback if user enters commas but no actual words
             st.session_state.active_labels_list = FIXED_LABELS
             st.session_state.is_custom_mode = False
             st.info("No valid custom labels found. Falling back to default fixed labels.")
@@ -613,8 +592,6 @@ if run_button:
         st.session_state.is_custom_mode = False
     active_labels = st.session_state.active_labels_list
-    # Get current topic modeling settings (used for caching logic)
     current_num_topics = st.session_state.num_topics_slider
     current_num_top_words = st.session_state.num_top_words_slider
@@ -624,67 +601,70 @@ if run_button:
         active_labels != st.session_state.last_active_labels
     )
-    if should_rerun_full_analysis and text.strip() and word_count <= word_limit:
-        # 2. Rerunning Full Analysis
-        CHUNKING_THRESHOLD = 500
-        should_chunk = word_count > CHUNKING_THRESHOLD
-        mode_msg = f"{'custom' if st.session_state.is_custom_mode else 'fixed'} labels"
-        if should_chunk:
-            mode_msg += " with **chunking** for large text"
-        with st.spinner(f"Analyzing text with {mode_msg}..."):
-            start_time = time.time()
-            # 2a. Load Model (Model constraints are updated based on active labels)
-            # NOTE: Load time is cached, so this is fast on subsequent runs.
-            model = load_ner_model(active_labels)
-            # 2b. Extract Entities (using chunking if necessary)
             if should_chunk:
-                all_entities = process_chunked_text(text, active_labels, model)
-            else:
-                all_entities = model.predict_entities(text, active_labels)
-            end_time = time.time()
-            elapsed_time = end_time - start_time
-            # 2c. Prepare DataFrame
-            df = pd.DataFrame(all_entities)
-            if not df.empty:
-                # Add category mapping
-                if st.session_state.is_custom_mode:
-                    df['category'] = 'User Defined Entities'
-                else:
-                    df['category'] = df['label'].map(REVERSE_FIXED_CATEGORY_MAPPING).fillna('Other')
-                # Clean up extracted text
-                df['text'] = df['text'].apply(remove_trailing_punctuation)
-                # 2d. Perform Topic Modeling on extracted entities
-                df_topic_data = perform_topic_modeling(df, num_topics=current_num_topics, num_top_words=current_num_top_words)
-            else:
-                df_topic_data = None
-            # 5. Save Results to Session State
-            st.session_state.results_df = df
-            st.session_state.topic_results = df_topic_data
-            st.session_state.elapsed_time = elapsed_time
-            st.session_state.last_text = text
-            st.session_state.show_results = True
-            st.session_state.last_active_labels = active_labels
-            st.session_state.last_num_topics = current_num_topics # Save topic settings
-            st.session_state.last_num_top_words = current_num_top_words # Save topic settings
         else:
             st.info("Results already calculated for the current text and settings.")
             st.session_state.show_results = True
-# --- Display Download Link and Results (Updated with White-Label inputs) ---
 if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
-    # Generate the color map based on the results DF labels
     current_labels_in_df = df['label'].unique().tolist()
     entity_color_map = get_dynamic_color_map(current_labels_in_df, FIXED_ENTITY_COLOR_MAP)
@@ -692,6 +672,7 @@ if st.session_state.show_results:
         st.warning("No entities were found in the provided text with the current label set.")
     else:
         st.subheader("Analysis Results", divider="blue")
         # 1. Highlighted Text
         st.markdown(f"### 1. Analyzed Text with Highlighted Entities ({'Custom Mode' if st.session_state.is_custom_mode else 'Fixed Mode'})")
         st.markdown(highlight_entities(st.session_state.last_text, df, entity_color_map), unsafe_allow_html=True)
@@ -700,7 +681,6 @@ if st.session_state.show_results:
         st.markdown("### 2. Detailed Entity Analysis")
         tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
-        # Determine which categories to use for the tabs
         if st.session_state.is_custom_mode:
             unique_categories = ["User Defined Entities"]
             tabs_to_show = df['label'].unique().tolist()
@@ -708,67 +688,42 @@ if st.session_state.show_results:
         else:
             unique_categories = list(FIXED_CATEGORY_MAPPING.keys())
-        # --- Section 2a: Detailed Tables by Category/Label ---
         # --- Function to Apply Conditional Coloring to Scores ---
-        def color_score_gradient(df):
-            """
-            Applies a color gradient to the 'score' column using Pandas Styler.
-            High scores (closer to 1.0) will be darker/more saturated.
-            """
-            # Use 'YlGnBu' (Yellow-Green-Blue) gradient.
-            # We apply the gradient only to the 'score' column subset.
-            return df.style.background_gradient(
                 cmap='YlGnBu',
                 subset=['score']
             ).format(
-                {'score': '{:.4f}'} # Re-apply the four decimal place format
             )
-        # --- Your Main Tab Detail Logic ---
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
             if st.session_state.is_custom_mode:
-                # In custom mode, group by the actual label since the category is just "User Defined Entities"
                 tabs_list = df['label'].unique().tolist()
                 tabs_category = st.tabs(tabs_list)
                 for label, tab in zip(tabs_list, tabs_category):
-                    # Prepare the DataFrame for the current label
                     df_label = df[df['label'] == label][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
-                    # Apply the coloring function
                     styled_df_label = color_score_gradient(df_label)
                     with tab:
                         st.markdown(f"##### {label.capitalize()} Entities ({len(df_label)} total)")
-                        st.dataframe(
-                            # Pass the STYLED DataFrame object to Streamlit
-                            styled_df_label,
-                            use_container_width=True,
-                            # NOTE: st.column_config for 'score' is removed because Pandas Styler handles formatting and coloring
-                        )
             else:
-                # In fixed mode, group by the category defined in FIXED_CATEGORY_MAPPING
                 tabs_category = st.tabs(unique_categories)
                 for category, tab in zip(unique_categories, tabs_category):
-                    # Prepare the DataFrame for the current category
                     df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
-                    # Apply the coloring function
                     styled_df_category = color_score_gradient(df_category)
                     with tab:
                         st.markdown(f"##### {category} Entities ({len(df_category)} total)")
                         if not df_category.empty:
-                            st.dataframe(
-                                # Pass the STYLED DataFrame object to Streamlit
-                                styled_df_category,
-                                use_container_width=True,
-                                # NOTE: st.column_config for 'score' is removed
-                            )
                         else:
                             st.info(f"No entities of category **{category}** were found in the text.")
-            # --- INSERTED GLOSSARY HERE ---
             with st.expander("See Glossary of tags"):
                 st.write('''- **text**: ['entity extracted from your text data']
 - **label**: ['label (tag) assigned to a given extracted entity (custom or fixed)']
@@ -776,7 +731,6 @@ if st.session_state.show_results:
 - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
 - **start**: ['index of the start of the corresponding entity']
 - **end**: ['index of the end of the corresponding entity']''')
-            # --- END GLOSSARY INSERTION ---
         # --- Section 2b: Treemap Visualization ---
         with tab_treemap_viz:
@@ -791,13 +745,12 @@ if st.session_state.show_results:
             fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
             st.plotly_chart(fig_treemap, use_container_width=True)
-        # --- Section 3: Comparative Charts (COMPLETED) ---
         st.markdown("---")
         st.markdown("### 3. Comparative Charts")
         col1, col2, col3 = st.columns(3)
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
-        # Determine color sequence for charts
         chart_color_seq = px.colors.qualitative.Pastel if len(grouped_counts) > 1 else px.colors.sequential.Cividis
         with col1: # Pie Chart
@@ -823,17 +776,17 @@ if st.session_state.show_results:
             else:
                 st.info("No entities were repeated enough for a Top 10 frequency chart.")
-        # 4. Advanced Analysis (REVISED STRUCTURE)
         st.markdown("---")
         st.markdown("### 4. Advanced Analysis")
-        # --- A. Network Graph Section (Alone) ---
         with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
             st.plotly_chart(generate_network_graph(df, st.session_state.last_text, entity_color_map), use_container_width=True)
-        # --- B. Topic Modeling Section (Controls and Chart inside one block) ---
         st.markdown("---")
-        with st.container(border=True): # Use a container to visually group the Topic Modeling section
             st.markdown("#### 💡 Topic Modeling (LDA) Configuration and Results")
             st.markdown("Adjust the settings below and click **'Re-Run Topic Model'** to instantly update the visualization based on the extracted entities.")
@@ -859,13 +812,13 @@ if st.session_state.show_results:
                     help="The number of top words to display per topic (5 to 20)."
                 )
-            # Function to trigger a recalculation of ONLY the topic model
             def rerun_topic_model():
                 # Update session state with the new slider values
                 st.session_state.num_topics_slider = st.session_state.num_topics_slider_new
                 st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new
-                # Recalculate topic modeling results
                 if not st.session_state.results_df.empty:
                     df_topic_data_new = perform_topic_modeling(
                         df_entities=st.session_state.results_df,
                         num_topics=st.session_state.num_topics_slider,
@@ -874,45 +827,44 @@ if st.session_state.show_results:
                     st.session_state.topic_results = df_topic_data_new
                     st.session_state.last_num_topics = st.session_state.num_topics_slider
                     st.session_state.last_num_top_words = st.session_state.num_top_words_slider
-                # st.success("Topic Model Re-Run Complete!") # Removed success message as it causes an extra flash
             with col_rerun_btn:
-                st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True) # Vertical spacing
-                # Rerun the entire app to update the chart immediately
                 st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary")
-            # Display the topic chart inside the same container
             st.markdown("---")
             st.markdown(f"""
             **Current LDA Parameters:**
-            * Topics: **{st.session_state.last_num_topics}**
-            * Top Words: **{st.session_state.last_num_top_words}**
             """)
-            df_topic_data = st.session_state.topic_results # Get the potentially updated results
             if df_topic_data is not None and not df_topic_data.empty:
                 st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
                 st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
             else:
                 st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
-        # --- 5. White-Label Configuration (NEW SECTION FOR CUSTOM BRANDING) ---
         st.markdown("---")
         st.markdown("### 5. White-Label Report Configuration 🎨")
-        # Set a dynamic default title based on the mode
         default_report_title = f"{'Custom' if st.session_state.is_custom_mode else 'Fixed'} Entity Analysis Report"
         custom_report_title = st.text_input(
             "Type Your Report Title (for HTML Report), and then press Enter.",
             value=default_report_title
         )
-        # UPDATED: Simplified input for the user
         custom_branding_text_input = st.text_area(
             "Type Your Brand Name or Tagline (Appears below the title in the report), and then press Enter.",
-            value="Analysis powered by My Own Brand", # Removed the technical <p> tag
             key='custom_branding_input',
             help="Enter your brand name or a short tagline. This text will be automatically styled and included below the main title."
         )
-        # 6. Downloads (Updated to pass custom variables)
         st.markdown("---")
         st.markdown("### 6. Downloads")
         col_csv, col_html = st.columns(2)
@@ -928,19 +880,17 @@ if st.session_state.show_results:
                 use_container_width=True
             )
-        # --- NEW LOGIC: Wrap the simple text input into proper HTML for the report ---
-        # We wrap the user's plain text in a styled HTML paragraph element
         branding_to_pass = f'<p style="font-size: 1.1em; font-weight: 500;">{custom_branding_text_input}</p>'
-        # HTML Download (Passing custom white-label parameters)
         html_content = generate_html_report(
             df,
             st.session_state.last_text,
             st.session_state.elapsed_time,
             df_topic_data,
             entity_color_map,
-            report_title=custom_report_title, # Pass custom title
-            branding_html=branding_to_pass # Pass the now-wrapped HTML
         )
         html_bytes = html_content.encode('utf-8')
         with col_html:
@@ -951,11 +901,4 @@ if st.session_state.show_results:
                 mime="text/html",
                 use_container_width=True
             )

 import string
 import json
 from itertools import cycle
 from io import BytesIO
 import plotly.io as pio
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
 from gliner import GLiNER
 from streamlit_extras.stylable_container import stylable_container
+# --- Comet ML Imports (Optional/Placeholder) ---
 try:
     from comet_ml import Experiment
 except ImportError:
         def log_table(self, *args): pass
         def end(self): pass
+# --- Fixed Label Definitions and Mappings ---
 FIXED_LABELS = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
 FIXED_ENTITY_COLOR_MAP = {
     "person": "#10b981", # Green
 REVERSE_FIXED_CATEGORY_MAPPING = {label: category for category, label_list in FIXED_CATEGORY_MAPPING.items() for label in label_list}
 # --- Dynamic Color Generator for Custom Labels ---
 COLOR_PALETTE = cycle(px.colors.qualitative.Alphabet)
 def extract_label(node_name):
 def get_dynamic_color_map(active_labels, fixed_map):
     """Generates a color map, using fixed colors if available, otherwise dynamic colors."""
     color_map = {}
     if active_labels == FIXED_LABELS:
         return fixed_map
     for label in active_labels:
         if label in fixed_map:
             color_map[label] = fixed_map[label]
         else:
             color_map[label] = next(COLOR_PALETTE)
     return color_map
 def highlight_entities(text, df_entities, entity_color_map):
+    """Generates HTML to display text with entities highlighted and colored."""
     if df_entities.empty:
         return text
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
         start = max(0, entity['start'])
         end = min(len(text), entity['end'])
         entity_text_from_full_doc = text[start:end]
         label = entity['label']
         color = entity_color_map.get(label, '#000000')
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text_from_full_doc}</span>'
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     return f'<div style="border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
 def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     """Performs basic Topic Modeling using LDA."""
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
+        # Step 1: Try aggressive filtering
         tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english', ngram_range=(1, 3))
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
+        # Step 2: Fallback if not enough features
         if len(tfidf_feature_names) < num_topics:
             tfidf_vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', ngram_range=(1, 3))
             tfidf = tfidf_vectorizer.fit_transform(documents)
             tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
             if len(tfidf_feature_names) < num_topics:
                  return None
         lda = LatentDirichletAllocation(n_components=num_topics, max_iter=5, learning_method='online', random_state=42, n_jobs=-1)
         lda.fit(tfidf)
         topic_data_list = []
         for topic_idx, topic in enumerate(lda.components_):
             top_words_indices = topic.argsort()[:-N - 1:-1]
             top_words = [tfidf_feature_names[i] for i in top_words_indices]
             word_weights = [topic[i] for i in top_words_indices]
             for word, weight in zip(top_words, word_weights):
                  topic_data_list.append({
                      'Topic_ID': f'Topic #{topic_idx + 1}',
                      'Word': word,
                      'Weight': weight,
                  })
         return pd.DataFrame(topic_data_list)
     except Exception as e:
+        # print(f"Topic Modeling Error: {e}")
         return None
 def create_topic_word_bubbles(df_topic_data):
     """Generates a Plotly Bubble Chart for top words across all topics."""
     df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic','Word': 'word', 'Weight': 'weight'})
     df_topic_data['x_pos'] = df_topic_data.index
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         df_topic_data,
         x='x_pos', y='weight', size='weight', color='topic', text='word', hover_name='word', size_max=40,
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
     edges = set()
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         unique_entities_in_sentence = list(set(entities_in_sentence))
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
                 node2 = unique_entities_in_sentence[j]
                 edge_tuple = tuple(sorted((node1, node2)))
                 edges.add(edge_tuple)
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
             edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None])
             edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None])
     fig = go.Figure()
     edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines', name='Co-occurrence Edges', showlegend=False)
     fig.add_trace(edge_trace)
     fig.add_trace(go.Scatter(
         x=unique_entities['x'], y=unique_entities['y'], mode='markers+text', name='Entities', text=unique_entities['text'], textposition="top center", showlegend=False,
         marker=dict(
         customdata=unique_entities[['label', 'score', 'frequency']],
         hovertemplate=("<b>%{text}</b><br>Label: %{customdata[0]}<br>Score: %{customdata[1]:.2f}<br>Frequency: %{customdata[2]}<extra></extra>")
     ))
     legend_traces = []
     seen_labels = set()
     for index, row in unique_entities.iterrows():
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(size=10, color=color), name=f"{label.capitalize()}", showlegend=True))
     for trace in legend_traces:
         fig.add_trace(trace)
     fig.update_layout(
         title='Entity Co-occurrence Network (Edges = Same Sentence)',
         showlegend=True, hovermode='closest',
     csv_buffer.seek(0)
     return csv_buffer
+# --- HTML REPORT GENERATION FUNCTION ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_color_map, report_title="Entity and Topic Analysis Report", branding_html=""):
     """
     Generates a full HTML report containing all analysis results and visualizations.
     """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     fig_treemap = px.treemap(
         df,
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
         bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
+    # 1e. Network Graph HTML
     network_fig = generate_network_graph(df, text_input, entity_color_map)
     network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
+    # 1f. Topic Modeling Bubble Chart
     topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
     if df_topic_data is not None and not df_topic_data.empty:
         bubble_figure = create_topic_word_bubbles(df_topic_data)
         else:
             topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
     else:
+        topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #888888;">'
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
         topic_charts_html += '</div>'
+    # 2. Get Highlighted Text
     highlighted_text_html = highlight_entities(text_input, df, entity_color_map).replace("div style", "div class='highlighted-text' style")
     # 3. Entity Tables (Pandas to HTML)
         index=False
     )
+    # 4. Construct the Final HTML
     html_content = f"""<!DOCTYPE html><html lang="en"><head>
         <meta charset="UTF-8">
         <meta name="viewport" content="width=device-width, initial-scale=1.0">
 def chunk_text(text, max_chunk_size=1500):
     """Splits text into chunks by sentence/paragraph, respecting a max size (by character count)."""
     segments = re.split(r'(\n\n|(?<=[.!?])\s+)', text)
     chunks = []
     current_chunk = ""
     current_offset = 0
     for segment in segments:
         if not segment: continue
         if len(current_chunk) + len(segment) > max_chunk_size and current_chunk:
             chunks.append((current_chunk, current_offset))
             current_offset += len(current_chunk)
             current_chunk = segment
         else:
             current_chunk += segment
     if current_chunk:
         chunks.append((current_chunk, current_offset))
     return chunks
 def process_chunked_text(text, labels, model):
     """Processes large text in chunks and aggregates/offsets the entities."""
     MAX_CHUNK_CHARS = 3500
     chunks = chunk_text(text, max_chunk_size=MAX_CHUNK_CHARS)
     all_entities = []
     for chunk_text, chunk_offset in chunks:
         chunk_entities = model.predict_entities(chunk_text, labels)
         for entity in chunk_entities:
             entity['start'] += chunk_offset
             entity['end'] += chunk_offset
             all_entities.append(entity)
     return all_entities
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
+# --- Conditional Mobile Warning CSS ---
 st.markdown(
     """
     <style>
     /* FIX: Aggressive theme override to ensure visibility */
     body {
+        background-color: #f0f2f6 !important;
+        color: #333333 !important;
     }
     [data-testid="stAppViewBlock"] {
         background-color: #ffffff !important;
     }
     @media (max-width: 600px) {
         #mobile-warning-container {
+            display: block;
+            background-color: #ffcccc;
+            color: #cc0000;
             padding: 10px;
             border-radius: 5px;
             text-align: center;
             border: 1px solid #cc0000;
         }
     }
     @media (min-width: 601px) {
         #mobile-warning-container {
+            display: none;
         }
     }
     [data-testid="stConfigurableTabs"] button {
+        color: #333333 !important;
+        background-color: #f0f0f0;
         border: 1px solid #cccccc;
     }
     [data-testid="stConfigurableTabs"] button[aria-selected="true"] {
+        color: #FFFFFF !important;
+        background-color: #007bff;
+        border-bottom: 2px solid #007bff;
     }
     .streamlit-expanderHeader {
+        color: #007bff;
     }
     </style>
     <div id="mobile-warning-container">
     """,
     unsafe_allow_html=True)
+st.subheader("Entity and Topic Analysis Report Generator", divider="blue")
 tab1, tab2 = st.tabs(["Embed", "Important Notes"])
 with tab1:
     **Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
     **How to Use:** Type or paste your text into the text area below, then click the 'Results' button.
     """)
+    st.markdown("For any errors or inquiries, please contact us at [info@your-company.com](mailto:info@your-company.com)")
 # --- Model Loading ---
 @st.cache_resource
 def load_ner_model(labels):
     """Loads the GLiNER model and caches it."""
     try:
         return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
     except Exception as e:
         print(f"FATAL ERROR: Failed to load NER model: {e}")
         st.error(f"Failed to load NER model. This may be due to a dependency issue or resource limits: {e}")
         st.stop()
+# --- LONG DEFAULT TEXT ---
 DEFAULT_TEXT = (
     "In June 2024, the founder, Dr. Emily Carter, officially announced a new, expansive partnership between "
     "TechSolutions Inc. and the European Space Agency (ESA). This strategic alliance represents a significant "
     "general public by October 1st. The goal is to deploy the **Astra** v2 platform before the next solar eclipse event in 2026.")
 # -----------------------------------
+# --- Session State Initialization ---
 if 'show_results' not in st.session_state: st.session_state.show_results = False
 if 'last_text' not in st.session_state: st.session_state.last_text = ""
 if 'results_df' not in st.session_state: st.session_state.results_df = pd.DataFrame()
 if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = ""
 if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS
 if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = False
 if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5
 if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10
 if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None
 if 'last_num_top_words' not in st.session_state: st.session_state.last_num_top_words = None
+if 'last_active_labels' not in st.session_state: st.session_state.last_active_labels = None
 def clear_text():
     """Clears the text area (sets it to an empty string) and hides results."""
     st.session_state.topic_results = None
 # --- Text Input and Clear Button ---
+word_limit = 10000
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
     height=250,
     "**Optional:** Enter your own comma-separated entity labels here (e.g., `product, symptom, client_id`). Leave blank for default labels.",
     height=60,
     key='custom_labels_input',
+    placeholder="e.g., product, symptom, client_id"
 )
 col_results, col_clear = st.columns([1, 1])
 with col_results:
     run_button = st.button("Results", key='run_results', use_container_width=True)
 with col_clear:
     st.button("Clear text", on_click=clear_text, use_container_width=True)
+# --- Results Trigger and Processing ---
 if run_button:
     # 1. Determine Active Labels and Mode
     custom_labels_raw = st.session_state.custom_labels_input
     if custom_labels_raw.strip():
         custom_labels_list = [label.strip().lower() for label in custom_labels_raw.split(',') if label.strip()]
         if not custom_labels_list:
             st.session_state.active_labels_list = FIXED_LABELS
             st.session_state.is_custom_mode = False
             st.info("No valid custom labels found. Falling back to default fixed labels.")
         st.session_state.is_custom_mode = False
     active_labels = st.session_state.active_labels_list
     current_num_topics = st.session_state.num_topics_slider
     current_num_top_words = st.session_state.num_top_words_slider
         active_labels != st.session_state.last_active_labels
     )
+    if text.strip() and word_count <= word_limit:
+        if should_rerun_full_analysis:
+            # 2. Rerunning Full Analysis
+            CHUNKING_THRESHOLD = 500
+            should_chunk = word_count > CHUNKING_THRESHOLD
+            mode_msg = f"{'custom' if st.session_state.is_custom_mode else 'fixed'} labels"
             if should_chunk:
+                mode_msg += " with **chunking** for large text"
+            with st.spinner(f"Analyzing text with {mode_msg}..."):
+                start_time = time.time()
+                # 2a. Load Model
+                model = load_ner_model(active_labels)
+                # 2b. Extract Entities
+                if should_chunk:
+                    all_entities = process_chunked_text(text, active_labels, model)
+                else:
+                    all_entities = model.predict_entities(text, active_labels)
+                end_time = time.time()
+                elapsed_time = end_time - start_time
+                # 2c. Prepare DataFrame
+                df = pd.DataFrame(all_entities)
+                if not df.empty:
+                    if st.session_state.is_custom_mode:
+                        df['category'] = 'User Defined Entities'
+                    else:
+                        df['category'] = df['label'].map(REVERSE_FIXED_CATEGORY_MAPPING).fillna('Other')
+                    df['text'] = df['text'].apply(remove_trailing_punctuation)
+                    # 2d. Perform Topic Modeling on extracted entities
+                    df_topic_data = perform_topic_modeling(df, num_topics=current_num_topics, num_top_words=current_num_top_words)
+                else:
+                    df_topic_data = None
+                # 5. Save Results to Session State
+                st.session_state.results_df = df
+                st.session_state.topic_results = df_topic_data
+                st.session_state.elapsed_time = elapsed_time
+                st.session_state.last_text = text
+                st.session_state.show_results = True
+                st.session_state.last_active_labels = active_labels
+                st.session_state.last_num_topics = current_num_topics
+                st.session_state.last_num_top_words = current_num_top_words
         else:
             st.info("Results already calculated for the current text and settings.")
             st.session_state.show_results = True
+    elif word_count > word_limit:
+        st.error(f"Text too long! Please limit your input to {word_limit} words.")
+        st.session_state.show_results = False
+    else:
+        st.warning("Please enter some text to analyze.")
+        st.session_state.show_results = False
+# --- Display Download Link and Results ---
 if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
     current_labels_in_df = df['label'].unique().tolist()
     entity_color_map = get_dynamic_color_map(current_labels_in_df, FIXED_ENTITY_COLOR_MAP)
         st.warning("No entities were found in the provided text with the current label set.")
     else:
         st.subheader("Analysis Results", divider="blue")
         # 1. Highlighted Text
         st.markdown(f"### 1. Analyzed Text with Highlighted Entities ({'Custom Mode' if st.session_state.is_custom_mode else 'Fixed Mode'})")
         st.markdown(highlight_entities(st.session_state.last_text, df, entity_color_map), unsafe_allow_html=True)
         st.markdown("### 2. Detailed Entity Analysis")
         tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
         if st.session_state.is_custom_mode:
             unique_categories = ["User Defined Entities"]
             tabs_to_show = df['label'].unique().tolist()
         else:
             unique_categories = list(FIXED_CATEGORY_MAPPING.keys())
         # --- Function to Apply Conditional Coloring to Scores ---
+        def color_score_gradient(df_input):
+            """Applies a color gradient to the 'score' column using Pandas Styler."""
+            return df_input.style.background_gradient(
                 cmap='YlGnBu',
                 subset=['score']
             ).format(
+                {'score': '{:.4f}'}
             )
+        # --- Section 2a: Detailed Tables by Category/Label ---
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
             if st.session_state.is_custom_mode:
                 tabs_list = df['label'].unique().tolist()
                 tabs_category = st.tabs(tabs_list)
                 for label, tab in zip(tabs_list, tabs_category):
                     df_label = df[df['label'] == label][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                     styled_df_label = color_score_gradient(df_label)
                     with tab:
                         st.markdown(f"##### {label.capitalize()} Entities ({len(df_label)} total)")
+                        st.dataframe(styled_df_label, use_container_width=True)
             else:
                 tabs_category = st.tabs(unique_categories)
                 for category, tab in zip(unique_categories, tabs_category):
                     df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                     styled_df_category = color_score_gradient(df_category)
                     with tab:
                         st.markdown(f"##### {category} Entities ({len(df_category)} total)")
                         if not df_category.empty:
+                            st.dataframe(styled_df_category, use_container_width=True)
                         else:
                             st.info(f"No entities of category **{category}** were found in the text.")
             with st.expander("See Glossary of tags"):
                 st.write('''- **text**: ['entity extracted from your text data']
 - **label**: ['label (tag) assigned to a given extracted entity (custom or fixed)']
 - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
 - **start**: ['index of the start of the corresponding entity']
 - **end**: ['index of the end of the corresponding entity']''')
         # --- Section 2b: Treemap Visualization ---
         with tab_treemap_viz:
             fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
             st.plotly_chart(fig_treemap, use_container_width=True)
+        # 3. Comparative Charts
         st.markdown("---")
         st.markdown("### 3. Comparative Charts")
         col1, col2, col3 = st.columns(3)
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
         chart_color_seq = px.colors.qualitative.Pastel if len(grouped_counts) > 1 else px.colors.sequential.Cividis
         with col1: # Pie Chart
             else:
                 st.info("No entities were repeated enough for a Top 10 frequency chart.")
+        # 4. Advanced Analysis
         st.markdown("---")
         st.markdown("### 4. Advanced Analysis")
+        # --- A. Network Graph Section ---
         with st.expander("🔗 Entity Co-occurrence Network Graph", expanded=True):
             st.plotly_chart(generate_network_graph(df, st.session_state.last_text, entity_color_map), use_container_width=True)
+        # --- B. Topic Modeling Section ---
         st.markdown("---")
+        with st.container(border=True):
             st.markdown("#### 💡 Topic Modeling (LDA) Configuration and Results")
             st.markdown("Adjust the settings below and click **'Re-Run Topic Model'** to instantly update the visualization based on the extracted entities.")
                     help="The number of top words to display per topic (5 to 20)."
                 )
             def rerun_topic_model():
                 # Update session state with the new slider values
                 st.session_state.num_topics_slider = st.session_state.num_topics_slider_new
                 st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new
                 if not st.session_state.results_df.empty:
+                    # Recalculate topic modeling results
                     df_topic_data_new = perform_topic_modeling(
                         df_entities=st.session_state.results_df,
                         num_topics=st.session_state.num_topics_slider,
                     st.session_state.topic_results = df_topic_data_new
                     st.session_state.last_num_topics = st.session_state.num_topics_slider
                     st.session_state.last_num_top_words = st.session_state.num_top_words_slider
             with col_rerun_btn:
+                st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True)
                 st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary")
             st.markdown("---")
             st.markdown(f"""
             **Current LDA Parameters:**
+            * Topics: **{st.session_state.num_topics_slider}**
+            * Top Words: **{st.session_state.num_top_words_slider}**
             """)
+            df_topic_data = st.session_state.topic_results
+            # --- CRITICAL: This is the conditional block that must have correct indentation ---
             if df_topic_data is not None and not df_topic_data.empty:
                 st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
                 st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
+            # END CRITICAL BLOCK
             else:
                 st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
+        # 5. White-Label Configuration
         st.markdown("---")
         st.markdown("### 5. White-Label Report Configuration 🎨")
         default_report_title = f"{'Custom' if st.session_state.is_custom_mode else 'Fixed'} Entity Analysis Report"
         custom_report_title = st.text_input(
             "Type Your Report Title (for HTML Report), and then press Enter.",
             value=default_report_title
         )
         custom_branding_text_input = st.text_area(
             "Type Your Brand Name or Tagline (Appears below the title in the report), and then press Enter.",
+            value="Analysis powered by My Own Brand",
             key='custom_branding_input',
             help="Enter your brand name or a short tagline. This text will be automatically styled and included below the main title."
         )
+        # 6. Downloads
         st.markdown("---")
         st.markdown("### 6. Downloads")
         col_csv, col_html = st.columns(2)
                 use_container_width=True
             )
+        # HTML Download (Passing custom white-label parameters)
         branding_to_pass = f'<p style="font-size: 1.1em; font-weight: 500;">{custom_branding_text_input}</p>'
         html_content = generate_html_report(
             df,
             st.session_state.last_text,
             st.session_state.elapsed_time,
             df_topic_data,
             entity_color_map,
+            report_title=custom_report_title,
+            branding_html=branding_to_pass
         )
         html_bytes = html_content.encode('utf-8')
         with col_html:
                 mime="text/html",
                 use_container_width=True
             )