Spaces:

AIEcosystem
/

relationship-map

Sleeping

App Files Files Community

AIEcosystem commited on Oct 8, 2025

Commit

0024c4d

verified ·

1 Parent(s): 1c3f8f0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +180 -204

src/streamlit_app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from sklearn.decomposition import LatentDirichletAllocation
 # ------------------------------
 from gliner import GLiNER
 from streamlit_extras.stylable_container import stylable_container
 # Using a try/except for comet_ml import
 try:
     from comet_ml import Experiment
@@ -25,9 +26,11 @@ except ImportError:
         def log_parameter(self, *args): pass
         def log_table(self, *args): pass
         def end(self): pass
 # --- Model Home Directory (Fix for deployment environments) ---
 # Set HF_HOME environment variable to a writable path
 os.environ['HF_HOME'] = '/tmp'
 # --- Color Map for Highlighting and Network Graph Nodes ---
 entity_color_map = {
     "person": "#10b981",
@@ -46,23 +49,26 @@ entity_color_map = {
     "url": "#60a5fa",
     "nationality_religion": "#fb7185"
 }
 # --- Utility Functions ---
 def extract_label(node_name):
     """Extracts the label from a node string like 'Text (Label)'."""
     match = re.search(r'\(([^)]+)\)$', node_name)
     return match.group(1) if match else "Unknown"
 def remove_trailing_punctuation(text_string):
     """Removes trailing punctuation from a string."""
     return text_string.rstrip(string.punctuation)
 def highlight_entities(text, df_entities):
     """Generates HTML to display text with entities highlighted and colored."""
     if df_entities.empty:
         return text
     # Sort entities by start index descending to insert highlights without affecting subsequent indices
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
         start = entity['start']
         end = entity['end']
@@ -72,167 +78,152 @@ def highlight_entities(text, df_entities):
         # Create a span with background color and tooltip
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
     return f'<div style="border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
 def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     """
-    Performs basic Topic Modeling using LDA on the extracted entities
     and returns structured data for visualization.
     Includes updated TF-IDF parameters (stop_words='english', max_df=0.95, min_df=1).
     """
     # Aggregate all unique entity text into a single document list
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
-    N = min(num_top_words, len(documents))
     try:
-        # UPDATED: Added stop_words='english' to filter common words tokenized
         # from multi-word entities (e.g., "The" from "The White House").
         tfidf_vectorizer = TfidfVectorizer(
-            max_df=0.95,
             min_df=1, # Retained at 1 to keep all unique entities
             stop_words='english' # <-- THIS IS THE KEY ADDITION
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
         lda = LatentDirichletAllocation(
-            n_components=num_topics, max_iter=5, learning_method='online',
-random_state=42, n_jobs=-1
         )
         lda.fit(tfidf)
         topic_data_list = []
         for topic_idx, topic in enumerate(lda.components_):
-            top_words_indices = topic.argsort()[:-N - 1:-1]
             top_words = [tfidf_feature_names[i] for i in top_words_indices]
             word_weights = [topic[i] for i in top_words_indices]
             for word, weight in zip(top_words, word_weights):
                 topic_data_list.append({
                     'Topic_ID': f'Topic #{topic_idx + 1}',
                     'Word': word,
                     'Weight': weight,
                 })
         return pd.DataFrame(topic_data_list)
     except Exception as e:
         st.error(f"Topic modeling failed: {e}")
         return None
 def create_topic_word_bubbles(df_topic_data):
     """Generates a Plotly Bubble Chart for top words across all topics."""
     if df_topic_data.empty:
         return None
     fig = px.scatter(
-        df_topic_data,
-        x='Word',
-        y='Topic_ID',
-        size='Weight',
         color='Topic_ID',
-        size_max=80,
         title='Topic Word Weights (Bubble Chart)',
         color_discrete_sequence=px.colors.qualitative.Bold,
         hover_data={'Word': True, 'Weight': ':.3f', 'Topic_ID': False}
     )
     fig.update_layout(
         xaxis_title="Entity/Word (Bubble size = Word Weight)",
         yaxis_title="Topic ID",
         xaxis={'tickangle': -45, 'showgrid': False},
         yaxis={'showgrid': True, 'autorange': 'reversed'},
         showlegend=True,
-        plot_bgcolor='#FFF0F5',
         paper_bgcolor='#FFF0F5',
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
     fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
-    Generates a network graph visualization (Node Plot) with edges
     based on entity co-occurrence in sentences.
     """
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     # Merge counts with unique entities (text + label)
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         # Return a simple figure with a message if not enough data
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
-    radius = 10
     # Assign circular positions + a little randomness
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
     # Map entity text to its coordinates for easy lookup
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
     # ----------------------------------------------------------------------
     # 1. Identify Edges (Co-occurrence in sentences)
     # ----------------------------------------------------------------------
     edges = set()
     # Simple sentence segmentation (handles standard punctuation followed by space)
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         # Find unique entities that are substrings of this sentence
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         # Create edges (pairs) based on co-occurrence
         unique_entities_in_sentence = list(set(entities_in_sentence))
         # Create all unique pairs (edges)
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
                 node2 = unique_entities_in_sentence[j]
                 # Ensure consistent order for the set to avoid duplicates like (A, B) and (B, A)
                 edge_tuple = tuple(sorted((node1, node2)))
                 edges.add(edge_tuple)
     # ----------------------------------------------------------------------
     # 2. Create Plotly Trace Data for Edges
     # ----------------------------------------------------------------------
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
             # Append coordinates for line segment: [x1, x2, None] for separation
             edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None])
             edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None])
     fig = go.Figure()
     # Add Edge Trace (Lines)
     edge_trace = go.Scatter(
         x=edge_x, y=edge_y,
@@ -243,7 +234,6 @@ def generate_network_graph(df, raw_text):
         showlegend=False # Edges don't need a legend entry
     )
     fig.add_trace(edge_trace)
     # ----------------------------------------------------------------------
     # 3. Add Node Trace (Markers)
     # ----------------------------------------------------------------------
@@ -254,9 +244,9 @@ def generate_network_graph(df, raw_text):
         name='Entities',
         text=unique_entities['text'],
         textposition="top center",
-        # FIX: Explicitly set showlegend=False for the main node trace
         # as we are creating separate traces for the legend colors below.
-        showlegend=False,
         marker=dict(
             size=unique_entities['frequency'] * 5 + 10,
             color=[entity_color_map.get(label, '#cccccc') for label in unique_entities['label']],
@@ -273,7 +263,7 @@ def generate_network_graph(df, raw_text):
             "Frequency: %{customdata[2]}<extra></extra>"
         )
     ))
     # Adding discrete traces for the legend based on unique labels
     legend_traces = []
     seen_labels = set()
@@ -287,7 +277,7 @@ def generate_network_graph(df, raw_text):
                 y=[None],
                 mode='markers',
                 marker=dict(size=10, color=color),
-                name=f"{label.capitalize()}",
                 showlegend=True # Ensure legend traces are explicitly visible
             ))
     for trace in legend_traces:
@@ -305,70 +295,61 @@ def generate_network_graph(df, raw_text):
         margin=dict(t=50, b=10, l=10, r=10),
         height=600
     )
     return fig
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     Generates a full HTML report containing all analysis results and visualizations.
-    FIX 1: Added a discrete color sequence to the Treemap to prevent black color.
-    FIX 2: Adjusted CSS grid properties and added min-width to grid items to prevent plot overlap.
     """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     # FIX 1: Explicitly set a color_discrete_sequence to prevent the Treemap from being black
     fig_treemap = px.treemap(
-        df,
-        path=[px.Constant("All Entities"), 'category', 'label', 'text'],
         values='score',
-        color='category',
         title="Entity Distribution by Category and Label",
         color_discrete_sequence=px.colors.qualitative.Dark24 # Use a robust color sequence
     )
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
     treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
     # 1b. Pie Chart
     grouped_counts = df['category'].value_counts().reset_index()
     grouped_counts.columns = ['Category', 'Count']
-    fig_pie = px.pie(grouped_counts, values='Count', names='Category',
-title='Distribution of Entities by Category',
-color_discrete_sequence=px.colors.sequential.RdBu)
     fig_pie.update_layout(margin=dict(t=50, b=10))
     pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
     # 1c. Bar Chart (Category Count)
-    fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',
-color='Category', title='Total Entities per Category',
-color_discrete_sequence=px.colors.qualitative.Pastel)
-    fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},
-margin=dict(t=50, b=10))
-    bar_category_html = fig_bar_category.to_html(full_html=False,
-include_plotlyjs='cdn')
     # 1d. Bar Chart (Most Frequent Entities)
-    word_counts = df['text'].value_counts().reset_index()
-    word_counts.columns = ['Entity', 'Count']
     # Top 10 repeating entities
-    repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
-        fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',
-color='Entity', title='Top 10 Most Frequent Entities',
-color_discrete_sequence=px.colors.sequential.Plasma)
-        fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},
-margin=dict(t=50, b=10))
         bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
     # 1e. Network Graph HTML - UPDATED to pass text_input
     network_fig = generate_network_graph(df, text_input)
     network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
     # 1f. Topic Charts HTML (Now a single Bubble Chart with Placeholder logic)
     topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
     if df_topic_data is not None and not df_topic_data.empty:
@@ -383,16 +364,16 @@ margin=dict(t=50, b=10))
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
         topic_charts_html += '</div>'
     # 2. Get Highlighted Text
     highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
     # 3. Entity Tables (Pandas to HTML)
     entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
-        classes='table table-striped',
         index=False
     )
     # 4. Construct the Final HTML
     html_content = f"""<!DOCTYPE html><html lang="en"><head>
     <meta charset="UTF-8">
@@ -406,20 +387,21 @@ margin=dict(t=50, b=10))
         h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
         h3 {{ color: #555; margin-top: 20px; }}
         .metadata {{ background-color: #FFE4E1; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
-        /* FIX 2: Modified grid to ensure each item gets min 30% of the container width */
-        .grid {{
-            display: grid;
-            grid-template-columns: repeat(auto-fit, minmax(320px, 1fr)); /* Adjusted min-width for better fit */
-            gap: 20px;
-            margin-top: 20px;
         }}
-        .chart-box {{
-            background-color: #f9f9f9;
-            padding: 15px;
-            border-radius: 8px;
             box-shadow: 0 2px 4px rgba(0,0,0,0.05);
-            /* Important: Set a minimum width for the chart box in the grid */
-            min-width: 0;
         }}
         table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
         table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
@@ -427,14 +409,14 @@ margin=dict(t=50, b=10))
         /* Specific styling for highlighted text element */
         .highlighted-text {{ border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
         @media (max-width: 1050px) {{ /* Increased breakpoint to help prevent overlap */
-             .grid {{
-                grid-template-columns: 1fr; /* Stack charts vertically on smaller screens */
             }}
         }}
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
         <div class="metadata">
             <p><strong>Generated At:</strong> {time.strftime('%Y-%m-%d %H:%M:%S')}</p>
             <p><strong>Processing Time:</strong> {elapsed_time:.2f} seconds</p>
@@ -444,25 +426,26 @@ margin=dict(t=50, b=10))
         <div class="highlighted-text-container">
             {highlighted_text_html}
         </div>
         <h2>2. Full Extracted Entities Table</h2>
         {entity_table_html}
         <h2>3. Data Visualizations</h2>
         <h3>3.1 Entity Distribution Treemap</h3>
         <div class="chart-box">{treemap_html}</div>
-        <h3>3.2 Comparative Charts (Pie, Category Count, Frequency)</h3>
-        <div class="grid">
-            <div class="chart-box">{pie_html}</div>
-            <div class="chart-box">{bar_category_html}</div>
-            <div class="chart-box">{bar_freq_html}</div>
-        </div>
         <h3>3.3 Entity Co-occurrence Network (Edges = Same Sentence)</h3>
         <div class="chart-box">{network_html}</div>
         <h2>4. Topic Modeling (LDA on Entities)</h2>
         {topic_charts_html}
     </div></body></html>
     """
     return html_content
@@ -505,13 +488,17 @@ st.markdown(
 st.subheader("NER and Topic Analysis Report Generator", divider="rainbow")
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
-expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.**Results:** Results are compiled into a single, comprehensive **HTML report** for easy download and sharing.**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
 # --- Comet ML Setup (Placeholder/Conditional) ---
 COMET_API_KEY = os.environ.get("COMET_API_KEY")
 COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Label Definitions and Category Mapping ---
 labels = list(entity_color_map.keys())
 category_mapping = {
@@ -521,8 +508,9 @@ category_mapping = {
     "Digital & Products": ["platform", "product", "media_type", "url"],
 }
 reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 # --- Model Loading ---
-@st.cache_resource
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
@@ -531,9 +519,9 @@ def load_ner_model():
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
         st.stop()
 model = load_ner_model()
 # --- LONG DEFAULT TEXT (178 Words) ---
 DEFAULT_TEXT = (
     "In June 2024, the founder, Dr. Emily Carter, officially announced a new, expansive partnership between "
@@ -551,7 +539,6 @@ DEFAULT_TEXT = (
     "general public by October 1st. The goal is to deploy the Astra v2 platform before the next solar eclipse event in 2026."
 )
 # -----------------------------------
 # --- Session State Initialization (CRITICAL FIX) ---
 if 'show_results' not in st.session_state:
     st.session_state.show_results = False
@@ -566,7 +553,7 @@ if 'topic_results' not in st.session_state:
 # FIX: Initialize the text area key with default text before st.text_area is called
 if 'my_text_area' not in st.session_state:
     st.session_state.my_text_area = DEFAULT_TEXT
 # --- Clear Button Function (MODIFIED) ---
 def clear_text():
     """Clears the text area (sets it to an empty string) and hides results."""
@@ -577,21 +564,20 @@ def clear_text():
     st.session_state.results_df = pd.DataFrame()
     st.session_state.elapsed_time = 0.0
     st.session_state.topic_results = None
 # --- Text Input and Clear Button ---
 word_limit = 1000
 # The text area now safely uses the pre-initialized session state value
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
-    height=250,
     key='my_text_area',
-    value=st.session_state.my_text_area
-)
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
 st.button("Clear text", on_click=clear_text)
 # --- Results Trigger and Processing (Updated Logic) ---
 if st.button("Results"):
     if not text.strip():
@@ -605,27 +591,26 @@ if st.button("Results"):
             if text != st.session_state.last_text:
                 st.session_state.last_text = text
                 start_time = time.time()
                 # --- Model Prediction & Dataframe Creation ---
                 entities = model.predict_entities(text, labels)
                 df = pd.DataFrame(entities)
                 if not df.empty:
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
                     df['category'] = df['label'].map(reverse_category_mapping)
                     st.session_state.results_df = df
                     unique_entity_count = len(df['text'].unique())
                     N_TOP_WORDS_TO_USE = min(10, unique_entity_count)
                     st.session_state.topic_results = perform_topic_modeling(
-                        df,
-                        num_topics=2,
                         num_top_words=N_TOP_WORDS_TO_USE
                     )
                     if comet_initialized:
-                        # FIX APPLIED HERE: Corrected indentation for the following lines
                         experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
                         experiment.log_parameter("input_text", text)
                         experiment.log_table("predicted_entities", df)
@@ -633,10 +618,10 @@ if st.button("Results"):
                 else:
                     st.session_state.results_df = pd.DataFrame()
                     st.session_state.topic_results = None
                 end_time = time.time()
                 st.session_state.elapsed_time = end_time - start_time
                 st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
             st.session_state.show_results = True
@@ -644,144 +629,135 @@ if st.button("Results"):
 if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
     if df.empty:
         st.warning("No entities were found in the provided text.")
     else:
         st.subheader("Analysis Results", divider="blue")
         # 1. Highlighted Text
         st.markdown("### 1. Analyzed Text with Highlighted Entities")
         st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
         # 2. Entity Summary Table (Count by Label - kept outside tabs)
         st.markdown("### 2. Entity Summary Table (Count by Label)")
         grouped_entity_table = df['label'].value_counts().reset_index()
         grouped_entity_table.columns = ['Entity Label', 'Count']
         grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(reverse_category_mapping)
         st.dataframe(grouped_entity_table[['Category', 'Entity Label', 'Count']], use_container_width=True)
-        st.markdown("---")
         st.markdown("### 3. Detailed Entity Analysis")
         # 3. New Tabs: Tab 1: Category Details Table | Tab 2: Treemap
         tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
         # TAB 1: Detailed Entities Table Grouped by Category
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
             # Get the unique categories for creating inner tabs
             unique_categories = list(category_mapping.keys())
-            # Create inner tabs dynamically based on the available categories
-            tabs_category = st.tabs(unique_categories)
             # We iterate over the categories and tabs simultaneously
             for category, tab in zip(unique_categories, tabs_category):
                 # Filter the main DataFrame for the current category
                 df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                 with tab:
                     st.markdown(f"##### {category} Entities ({len(df_category)} total)")
                     if not df_category.empty:
                         # Display the DataFrame for the current category
                         st.dataframe(
-                            df_category,
-                            use_container_width=True,
                             # Format the score for better readability
                             column_config={'score': st.column_config.NumberColumn(format="%.4f")}
                         )
                     else:
                         st.info(f"No entities of category **{category}** were found in the text.")
         # TAB 2: Treemap
         with tab_treemap_viz:
             st.markdown("#### Treemap: Entity Distribution")
             # Treemap
             # FIX 1 (Streamlit): Added a robust color sequence here too for consistency in the Streamlit plot
             fig_treemap = px.treemap(
-                df,
-                path=[px.Constant("All Entities"), 'category', 'label', 'text'],
                 values='score',
-                color='category',
                 title="Entity Distribution by Category and Label",
                 color_discrete_sequence=px.colors.qualitative.Dark24 # Applied fix here
             )
             fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
             st.plotly_chart(fig_treemap, use_container_width=True)
         # 4. Comparative Charts (Keep outside the new tabs, as in original code structure)
         st.markdown("---")
         st.markdown("### 4. Comparative Charts")
-        # FIX 2 (Streamlit): The Streamlit plot columns (col1, col2, col3) naturally handle overlap,
-        # so no change is needed here, the fix is only in the HTML report.
-        col1, col2, col3 = st.columns(3)
-        # Pie Chart
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
-        fig_pie = px.pie(grouped_counts, values='Count', names='Category',
-title='Distribution by Category',
-color_discrete_sequence=px.colors.sequential.RdBu)
         with col1:
             st.plotly_chart(fig_pie, use_container_width=True)
-        # Category Count Bar Chart
-        fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',
-color='Category', title='Total Entities per Category',
-color_discrete_sequence=px.colors.qualitative.Pastel)
         with col2:
             st.plotly_chart(fig_bar_category, use_container_width=True)
-        # Most Frequent Entities Bar Chart
-        word_counts = df['text'].value_counts().reset_index()
-        word_counts.columns = ['Entity', 'Count']
-        repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
-        fig_bar_freq = go.Figure().update_layout(title="No repeating entities for plot")
-        if not repeating_entities.empty:
-            fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',
-color='Entity', title='Top 10 Most Frequent Entities',
-color_discrete_sequence=px.colors.sequential.Plasma)
         with col3:
-            st.plotly_chart(fig_bar_freq, use_container_width=True)
-        # 5. Network Graph
         st.markdown("---")
         st.markdown("### 5. Entity Co-occurrence Network")
         network_fig = generate_network_graph(df, st.session_state.last_text)
         st.plotly_chart(network_fig, use_container_width=True)
-        # 6. Topic Modeling
         st.markdown("---")
-        st.markdown("### 6. Topic Modeling (LDA on Entities)")
         if df_topic_data is not None and not df_topic_data.empty:
             bubble_figure = create_topic_word_bubbles(df_topic_data)
             if bubble_figure:
                 st.plotly_chart(bubble_figure, use_container_width=True)
             else:
-                st.error("Visualization for Topic Modeling failed.")
         else:
-            st.info("Topic Modeling requires at least two unique entities and sufficient data to generate meaningful topics.")
-        # Final Report Download
         st.markdown("---")
-        st.markdown("### Download Full HTML Report 🚀")
-        # Generate the full HTML content
-        html_report = generate_html_report(
-            df=df,
-            text_input=st.session_state.last_text,
-            elapsed_time=st.session_state.elapsed_time,
-            df_topic_data=df_topic_data
-        )
         st.download_button(
-            label="Download Analysis Report (.html)",
             data=html_report,
-            file_name="entity_analysis_report.html",
-            mime="text/html"
         )

 # ------------------------------
 from gliner import GLiNER
 from streamlit_extras.stylable_container import stylable_container
 # Using a try/except for comet_ml import
 try:
     from comet_ml import Experiment
         def log_parameter(self, *args): pass
         def log_table(self, *args): pass
         def end(self): pass
 # --- Model Home Directory (Fix for deployment environments) ---
 # Set HF_HOME environment variable to a writable path
 os.environ['HF_HOME'] = '/tmp'
 # --- Color Map for Highlighting and Network Graph Nodes ---
 entity_color_map = {
     "person": "#10b981",
     "url": "#60a5fa",
     "nationality_religion": "#fb7185"
 }
 # --- Utility Functions ---
 def extract_label(node_name):
     """Extracts the label from a node string like 'Text (Label)'."""
     match = re.search(r'\(([^)]+)\)$', node_name)
     return match.group(1) if match else "Unknown"
 def remove_trailing_punctuation(text_string):
     """Removes trailing punctuation from a string."""
     return text_string.rstrip(string.punctuation)
 def highlight_entities(text, df_entities):
     """Generates HTML to display text with entities highlighted and colored."""
     if df_entities.empty:
         return text
     # Sort entities by start index descending to insert highlights without affecting subsequent indices
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
         start = entity['start']
         end = entity['end']
         # Create a span with background color and tooltip
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
     return f'<div style="border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
 def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     """
+    Performs basic Topic Modeling using LDA on the extracted entities
     and returns structured data for visualization.
     Includes updated TF-IDF parameters (stop_words='english', max_df=0.95, min_df=1).
     """
     # Aggregate all unique entity text into a single document list
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
+    N = min(num_top_words, len(documents))
     try:
+        # UPDATED: Added stop_words='english' to filter common words tokenized
         # from multi-word entities (e.g., "The" from "The White House").
         tfidf_vectorizer = TfidfVectorizer(
+            max_df=0.95,
             min_df=1, # Retained at 1 to keep all unique entities
             stop_words='english' # <-- THIS IS THE KEY ADDITION
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
         lda = LatentDirichletAllocation(
+            n_components=num_topics, max_iter=5, learning_method='online',random_state=42, n_jobs=-1
         )
         lda.fit(tfidf)
         topic_data_list = []
         for topic_idx, topic in enumerate(lda.components_):
+            top_words_indices = topic.argsort()[:-N - 1:-1]
             top_words = [tfidf_feature_names[i] for i in top_words_indices]
             word_weights = [topic[i] for i in top_words_indices]
             for word, weight in zip(top_words, word_weights):
                 topic_data_list.append({
                     'Topic_ID': f'Topic #{topic_idx + 1}',
                     'Word': word,
                     'Weight': weight,
                 })
         return pd.DataFrame(topic_data_list)
     except Exception as e:
         st.error(f"Topic modeling failed: {e}")
         return None
 def create_topic_word_bubbles(df_topic_data):
     """Generates a Plotly Bubble Chart for top words across all topics."""
     if df_topic_data.empty:
         return None
     fig = px.scatter(
+        df_topic_data,
+        x='Word',
+        y='Topic_ID',
+        size='Weight',
         color='Topic_ID',
+        size_max=80,
         title='Topic Word Weights (Bubble Chart)',
         color_discrete_sequence=px.colors.qualitative.Bold,
         hover_data={'Word': True, 'Weight': ':.3f', 'Topic_ID': False}
     )
     fig.update_layout(
         xaxis_title="Entity/Word (Bubble size = Word Weight)",
         yaxis_title="Topic ID",
         xaxis={'tickangle': -45, 'showgrid': False},
         yaxis={'showgrid': True, 'autorange': 'reversed'},
         showlegend=True,
+        plot_bgcolor='#FFF0F5',
         paper_bgcolor='#FFF0F5',
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
     fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
+    Generates a network graph visualization (Node Plot) with edges
     based on entity co-occurrence in sentences.
     """
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     # Merge counts with unique entities (text + label)
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         # Return a simple figure with a message if not enough data
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
+    radius = 10
     # Assign circular positions + a little randomness
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
     # Map entity text to its coordinates for easy lookup
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
     # ----------------------------------------------------------------------
     # 1. Identify Edges (Co-occurrence in sentences)
     # ----------------------------------------------------------------------
     edges = set()
     # Simple sentence segmentation (handles standard punctuation followed by space)
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         # Find unique entities that are substrings of this sentence
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         # Create edges (pairs) based on co-occurrence
         unique_entities_in_sentence = list(set(entities_in_sentence))
         # Create all unique pairs (edges)
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
                 node2 = unique_entities_in_sentence[j]
                 # Ensure consistent order for the set to avoid duplicates like (A, B) and (B, A)
                 edge_tuple = tuple(sorted((node1, node2)))
                 edges.add(edge_tuple)
     # ----------------------------------------------------------------------
     # 2. Create Plotly Trace Data for Edges
     # ----------------------------------------------------------------------
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
             # Append coordinates for line segment: [x1, x2, None] for separation
             edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None])
             edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None])
     fig = go.Figure()
     # Add Edge Trace (Lines)
     edge_trace = go.Scatter(
         x=edge_x, y=edge_y,
         showlegend=False # Edges don't need a legend entry
     )
     fig.add_trace(edge_trace)
     # ----------------------------------------------------------------------
     # 3. Add Node Trace (Markers)
     # ----------------------------------------------------------------------
         name='Entities',
         text=unique_entities['text'],
         textposition="top center",
+        # FIX: Explicitly set showlegend=False for the main node trace
         # as we are creating separate traces for the legend colors below.
+        showlegend=False,
         marker=dict(
             size=unique_entities['frequency'] * 5 + 10,
             color=[entity_color_map.get(label, '#cccccc') for label in unique_entities['label']],
             "Frequency: %{customdata[2]}<extra></extra>"
         )
     ))
     # Adding discrete traces for the legend based on unique labels
     legend_traces = []
     seen_labels = set()
                 y=[None],
                 mode='markers',
                 marker=dict(size=10, color=color),
+                name=f"{label.capitalize()}",
                 showlegend=True # Ensure legend traces are explicitly visible
             ))
     for trace in legend_traces:
         margin=dict(t=50, b=10, l=10, r=10),
         height=600
     )
     return fig
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     Generates a full HTML report containing all analysis results and visualizations.
+    FIX APPLIED: Removed the CSS Grid layout for the three comparative charts
+    (Pie, Category Count, Frequency) and stacked them vertically to prevent
+    overlapping and ensure reliable rendering across devices.
     """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     # FIX 1: Explicitly set a color_discrete_sequence to prevent the Treemap from being black
     fig_treemap = px.treemap(
+        df,
+        path=[px.Constant("All Entities"), 'category', 'label', 'text'],
         values='score',
+        color='category',
         title="Entity Distribution by Category and Label",
         color_discrete_sequence=px.colors.qualitative.Dark24 # Use a robust color sequence
     )
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
     treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
     # 1b. Pie Chart
     grouped_counts = df['category'].value_counts().reset_index()
     grouped_counts.columns = ['Category', 'Count']
+    fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
     fig_pie.update_layout(margin=dict(t=50, b=10))
     pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
     # 1c. Bar Chart (Category Count)
+    fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
+    fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=10))
+    bar_category_html = fig_bar_category.to_html(full_html=False,include_plotlyjs='cdn')
     # 1d. Bar Chart (Most Frequent Entities)
+    word_counts = df['text'].value_counts().reset_index()
+    word_counts.columns = ['Entity', 'Count']
     # Top 10 repeating entities
+    repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
+        fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
+        fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=10))
         bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
     # 1e. Network Graph HTML - UPDATED to pass text_input
     network_fig = generate_network_graph(df, text_input)
     network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
     # 1f. Topic Charts HTML (Now a single Bubble Chart with Placeholder logic)
     topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
     if df_topic_data is not None and not df_topic_data.empty:
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
         topic_charts_html += '</div>'
     # 2. Get Highlighted Text
     highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
     # 3. Entity Tables (Pandas to HTML)
     entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
+        classes='table table-striped',
         index=False
     )
     # 4. Construct the Final HTML
     html_content = f"""<!DOCTYPE html><html lang="en"><head>
     <meta charset="UTF-8">
         h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
         h3 {{ color: #555; margin-top: 20px; }}
         .metadata {{ background-color: #FFE4E1; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
+        /* The 'grid' class is kept for potential future use or the network graph, but not used for 3.2 */
+        .grid {{
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
+            gap: 20px;
+            margin-top: 20px;
         }}
+        .chart-box {{
+            background-color: #f9f9f9;
+            padding: 15px;
+            border-radius: 8px;
             box-shadow: 0 2px 4px rgba(0,0,0,0.05);
+            /* Important: Set a minimum width for the chart box, and margin for stacking */
+            min-width: 0;
+            margin-bottom: 20px; /* NEW: Added margin for separation when stacked */
         }}
         table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
         table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
         /* Specific styling for highlighted text element */
         .highlighted-text {{ border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
         @media (max-width: 1050px) {{ /* Increased breakpoint to help prevent overlap */
+             .grid {{
+                 grid-template-columns: 1fr; /* Stack charts vertically on smaller screens */
             }}
         }}
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
         <div class="metadata">
             <p><strong>Generated At:</strong> {time.strftime('%Y-%m-%d %H:%M:%S')}</p>
             <p><strong>Processing Time:</strong> {elapsed_time:.2f} seconds</p>
         <div class="highlighted-text-container">
             {highlighted_text_html}
         </div>
         <h2>2. Full Extracted Entities Table</h2>
         {entity_table_html}
         <h2>3. Data Visualizations</h2>
         <h3>3.1 Entity Distribution Treemap</h3>
         <div class="chart-box">{treemap_html}</div>
+        <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
+        <!-- FIX: Charts are now in separate chart-box divs (not a 'grid') for guaranteed vertical stacking -->
+        <div class="chart-box">{pie_html}</div>
+        <div class="chart-box">{bar_category_html}</div>
+        <div class="chart-box">{bar_freq_html}</div>
         <h3>3.3 Entity Co-occurrence Network (Edges = Same Sentence)</h3>
         <div class="chart-box">{network_html}</div>
         <h2>4. Topic Modeling (LDA on Entities)</h2>
         {topic_charts_html}
     </div></body></html>
     """
     return html_content
 st.subheader("NER and Topic Analysis Report Generator", divider="rainbow")
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
+expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
+**Results:** Results are compiled into a single, comprehensive **HTML report** for easy download and sharing.
+**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
 # --- Comet ML Setup (Placeholder/Conditional) ---
 COMET_API_KEY = os.environ.get("COMET_API_KEY")
 COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Label Definitions and Category Mapping ---
 labels = list(entity_color_map.keys())
 category_mapping = {
     "Digital & Products": ["platform", "product", "media_type", "url"],
 }
 reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 # --- Model Loading ---
+@st.cache_resourced
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
         st.stop()
 model = load_ner_model()
 # --- LONG DEFAULT TEXT (178 Words) ---
 DEFAULT_TEXT = (
     "In June 2024, the founder, Dr. Emily Carter, officially announced a new, expansive partnership between "
     "general public by October 1st. The goal is to deploy the Astra v2 platform before the next solar eclipse event in 2026."
 )
 # -----------------------------------
 # --- Session State Initialization (CRITICAL FIX) ---
 if 'show_results' not in st.session_state:
     st.session_state.show_results = False
 # FIX: Initialize the text area key with default text before st.text_area is called
 if 'my_text_area' not in st.session_state:
     st.session_state.my_text_area = DEFAULT_TEXT
 # --- Clear Button Function (MODIFIED) ---
 def clear_text():
     """Clears the text area (sets it to an empty string) and hides results."""
     st.session_state.results_df = pd.DataFrame()
     st.session_state.elapsed_time = 0.0
     st.session_state.topic_results = None
 # --- Text Input and Clear Button ---
 word_limit = 1000
 # The text area now safely uses the pre-initialized session state value
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
+    height=250,
     key='my_text_area',
+    value=st.session_state.my_text_area)
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
 st.button("Clear text", on_click=clear_text)
 # --- Results Trigger and Processing (Updated Logic) ---
 if st.button("Results"):
     if not text.strip():
             if text != st.session_state.last_text:
                 st.session_state.last_text = text
                 start_time = time.time()
                 # --- Model Prediction & Dataframe Creation ---
                 entities = model.predict_entities(text, labels)
                 df = pd.DataFrame(entities)
                 if not df.empty:
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
                     df['category'] = df['label'].map(reverse_category_mapping)
                     st.session_state.results_df = df
                     unique_entity_count = len(df['text'].unique())
                     N_TOP_WORDS_TO_USE = min(10, unique_entity_count)
                     st.session_state.topic_results = perform_topic_modeling(
+                        df,
+                        num_topics=2,
                         num_top_words=N_TOP_WORDS_TO_USE
                     )
                     if comet_initialized:
                         experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
                         experiment.log_parameter("input_text", text)
                         experiment.log_table("predicted_entities", df)
                 else:
                     st.session_state.results_df = pd.DataFrame()
                     st.session_state.topic_results = None
                 end_time = time.time()
                 st.session_state.elapsed_time = end_time - start_time
                 st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
             st.session_state.show_results = True
 if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
     if df.empty:
         st.warning("No entities were found in the provided text.")
     else:
         st.subheader("Analysis Results", divider="blue")
         # 1. Highlighted Text
         st.markdown("### 1. Analyzed Text with Highlighted Entities")
         st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
         # 2. Entity Summary Table (Count by Label - kept outside tabs)
         st.markdown("### 2. Entity Summary Table (Count by Label)")
         grouped_entity_table = df['label'].value_counts().reset_index()
         grouped_entity_table.columns = ['Entity Label', 'Count']
         grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(reverse_category_mapping)
         st.dataframe(grouped_entity_table[['Category', 'Entity Label', 'Count']], use_container_width=True)
+        st.markdown("---")
         st.markdown("### 3. Detailed Entity Analysis")
         # 3. New Tabs: Tab 1: Category Details Table | Tab 2: Treemap
         tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
         # TAB 1: Detailed Entities Table Grouped by Category
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
             # Get the unique categories for creating inner tabs
             unique_categories = list(category_mapping.keys())
+            # Create inner tabs dynamically based on the available categories
+            tabs_category = st.tabs(unique_categories)
             # We iterate over the categories and tabs simultaneously
             for category, tab in zip(unique_categories, tabs_category):
                 # Filter the main DataFrame for the current category
                 df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                 with tab:
                     st.markdown(f"##### {category} Entities ({len(df_category)} total)")
                     if not df_category.empty:
                         # Display the DataFrame for the current category
                         st.dataframe(
+                            df_category,
+                            use_container_width=True,
                             # Format the score for better readability
                             column_config={'score': st.column_config.NumberColumn(format="%.4f")}
                         )
                     else:
                         st.info(f"No entities of category **{category}** were found in the text.")
         # TAB 2: Treemap
         with tab_treemap_viz:
             st.markdown("#### Treemap: Entity Distribution")
             # Treemap
             # FIX 1 (Streamlit): Added a robust color sequence here too for consistency in the Streamlit plot
             fig_treemap = px.treemap(
+                df,
+                path=[px.Constant("All Entities"), 'category', 'label', 'text'],
                 values='score',
+                color='category',
                 title="Entity Distribution by Category and Label",
                 color_discrete_sequence=px.colors.qualitative.Dark24 # Applied fix here
             )
             fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
             st.plotly_chart(fig_treemap, use_container_width=True)
         # 4. Comparative Charts (Keep outside the new tabs, as in original code structure)
         st.markdown("---")
         st.markdown("### 4. Comparative Charts")
+        # FIX: The three comparative charts are generated here and will be stacked vertically
+        # in the HTML report output.
+        col1, col2, col3 = st.columns(3) # Use Streamlit columns for the *Streamlit* preview
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
+        # Pie Chart
         with col1:
+            fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
+            fig_pie.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_pie, use_container_width=True)
+        # Bar Chart (Category Count)
         with col2:
+            fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
+            fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_bar_category, use_container_width=True)
+        # Bar Chart (Most Frequent Entities)
+        word_counts = df['text'].value_counts().reset_index()
+        word_counts.columns = ['Entity', 'Count']
+        repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
         with col3:
+            if not repeating_entities.empty:
+                fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
+                fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
+                st.plotly_chart(fig_bar_freq, use_container_width=True)
+            else:
+                st.info("No entities repeat for frequency chart.")
         st.markdown("---")
         st.markdown("### 5. Entity Co-occurrence Network")
+        # 5. Network Graph
         network_fig = generate_network_graph(df, st.session_state.last_text)
         st.plotly_chart(network_fig, use_container_width=True)
         st.markdown("---")
+        st.markdown("### 6. Topic Modeling Analysis")
+        # 6. Topic Modeling Bubble Chart
         if df_topic_data is not None and not df_topic_data.empty:
             bubble_figure = create_topic_word_bubbles(df_topic_data)
             if bubble_figure:
                 st.plotly_chart(bubble_figure, use_container_width=True)
             else:
+                st.error("Error generating Topic Word Bubble Chart.")
         else:
+            st.info("Topic modeling requires more unique input (at least two unique entities).")
+        # --- Report Download ---
         st.markdown("---")
+        st.markdown("### Download Full HTML Report")
+        html_report = generate_html_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data)
         st.download_button(
+            label="Download HTML Report",
             data=html_report,
+            file_name="ner_topic_report.html",
+            mime="text/html",
+            type="primary"
         )