Spaces:

AIEcosystem
/

relationship-map

Sleeping

App Files Files Community

AIEcosystem commited on Oct 8, 2025

Commit

965b307

verified ·

1 Parent(s): ec15b51

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +357 -185

src/streamlit_app.py CHANGED Viewed

@@ -11,9 +11,12 @@ import numpy as np
 import re
 import string
 import json
-# --- Imports for file generation (no pptx) ---
 from io import BytesIO
-import plotly.io as pio
 # ---------------------------
 # --- Stable Scikit-learn LDA Imports ---
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -36,31 +39,35 @@ except ImportError:
 # Set HF_HOME environment variable to a writable path
 os.environ['HF_HOME'] = '/tmp'
-# --- Color Map for Highlighting and Network Graph Nodes (NO PINK COLORS) ---
 entity_color_map = {
     "person": "#10b981",
-    "country": "#3b82f6",
-    "city": "#4ade80",
     "organization": "#f59e0b",
-    "date": "#8b5cf6",
-    "time": "#ec4899",
-    "cardinal": "#06b6d4",
-    "money": "#f43f5e",
-    "position": "#a855f7",
 }
-# --- Label Definitions and Category Mapping (Used by the App) ---
 labels = list(entity_color_map.keys())
-labels = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
 category_mapping = {
-   "People": ["person", "organization", "position"],
-   "Locations": ["country", "city"],
-   "Time": ["date", "time"],
-   "Numbers": ["money", "cardinal"]}
-# CORRECTION 1: Reverse category mapping definition moved here for app-wide access
-reverse_category_mapping = {label: category
-for category, label_list in category_mapping.items() for label in label_list}
 # --- Utility Functions for Analysis and Plotly ---
 def extract_label(node_name):
@@ -76,21 +83,25 @@ def highlight_entities(text, df_entities):
     """Generates HTML to display text with entities highlighted and colored."""
     if df_entities.empty:
         return text
     # Sort entities by start index descending to insert highlights without affecting subsequent indices
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
         start = entity['start']
         end = entity['end']
         label = entity['label']
         entity_text = entity['text']
         color = entity_color_map.get(label, '#000000')
         # Create a span with background color and tooltip
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
-    return f'<div style="border: 1px solid #CCCCCC; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
 def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     """
@@ -100,6 +111,7 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
         tfidf_vectorizer = TfidfVectorizer(
@@ -109,6 +121,7 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
         lda = LatentDirichletAllocation(
             n_components=num_topics, max_iter=5, learning_method='online',random_state=42, n_jobs=-1
         )
@@ -134,6 +147,7 @@ def create_topic_word_bubbles(df_topic_data):
     # Renaming columns to match the output of perform_topic_modeling
     df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
     df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
     if df_topic_data.empty:
         return None
     fig = px.scatter(
@@ -159,41 +173,45 @@ def create_topic_word_bubbles(df_topic_data):
         xaxis={'tickangle': -45, 'showgrid': False},
         yaxis={'showgrid': True},
         showlegend=True,
-        plot_bgcolor='#FFFFFF', # Removed pink
-        paper_bgcolor='#FFFFFF', # Removed pink
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
-    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
     Generates a network graph visualization (Node Plot) with edges
-    based on entity co-occurrence in sentences.
     """
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
-    pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
     edges = set()
-    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         unique_entities_in_sentence = list(set(entities_in_sentence))
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
@@ -203,6 +221,7 @@ def generate_network_graph(df, raw_text):
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
@@ -254,7 +273,7 @@ def generate_network_graph(df, raw_text):
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(
-                x=[None], y=[None], mode='markers', marker=dict(size=10, color=color),name=f"{label.capitalize()}", showlegend=True
             ))
     for trace in legend_traces:
         fig.add_trace(trace)
@@ -270,8 +289,161 @@ def generate_network_graph(df, raw_text):
         margin=dict(t=50, b=10, l=10, r=10),
         height=600
     )
     return fig
 # --- NEW CSV GENERATION FUNCTION ---
 def generate_entity_csv(df):
     """
@@ -287,12 +459,14 @@ def generate_entity_csv(df):
 # -----------------------------------
 # --- Existing App Functionality (HTML) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
-    Generates a full HTML report containing all analysis results and
-    visualizations. (Simplified HTML generation for brevity in code)
     """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     fig_treemap = px.treemap(
         df,
@@ -322,6 +496,7 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     word_counts.columns = ['Entity', 'Count']
     repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
         fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
@@ -340,7 +515,7 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
         else:
             topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
     else:
-        topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #cccccc;">'
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
         topic_charts_html += '</div>'
@@ -349,12 +524,9 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
     # 3. Entity Tables (Pandas to HTML)
-    # The grouped by category table is used here for the HTML export
-    grouped_entity_table_df = df.groupby(['category', 'label']).size().reset_index(name='Count')
-    grouped_entity_table_df.columns = ['Category', 'Entity', 'Count'] # Column Renaming
-    grouped_entity_table_html = grouped_entity_table_df.to_html(
-         classes='table table-striped',
-         index=False
     )
     # 4. Construct the Final HTML
@@ -366,15 +538,15 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     <style>
         body {{ font-family: 'Inter', sans-serif; margin: 0; padding: 20px; background-color: #f4f4f9; color: #333; }}
         .container {{ max-width: 1200px; margin: 0 auto; background-color: #ffffff; padding: 30px; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }}
-        h1 {{ color: #007bff; border-bottom: 3px solid #007bff; padding-bottom: 10px; margin-top: 0; }}
         h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
         h3 {{ color: #555; margin-top: 20px; }}
-        .metadata {{ background-color: #e6f7ff; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
         .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
         table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
         table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
         table th {{ background-color: #f0f0f0; }}
-        .highlighted-text {{ border: 1px solid #CCCCCC; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
@@ -387,45 +559,46 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
         <div class="highlighted-text-container">
             {highlighted_text_html}
         </div>
-        <h2>2. Entities Count by Category and Entity</h2>
-        {grouped_entity_table_html}
         <h2>3. Data Visualizations</h2>
         <h3>3.1 Entity Distribution Treemap</h3>
         <div class="chart-box">{treemap_html}</div>
-        <h3>3.2 Comparative Charts</h3>
         <div class="chart-box">{pie_html}</div>
         <div class="chart-box">{bar_category_html}</div>
         <div class="chart-box">{bar_freq_html}</div>
-        <h3>3.3 Entity Relationship Map</h3>
         <div class="chart-box">{network_html}</div>
-        <h2>4. Topic Modeling</h2>
         {topic_charts_html}
     </div></body></html>
     """
     return html_content
-# --- Page Configuration and Styling (No Sidebar, Removed Pink) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
 st.markdown(
     """
     <style>
     /* Overall app container - NO SIDEBAR */
     .main {
-        background-color: #f0f2f6; /* Light Grey/Default */
         color: #333333; /* Dark grey text for contrast */
     }
     .stApp {
-        background-color: #f0f2f6;
     }
     /* Text Area background and text color (input fields) */
     .stTextArea textarea {
-        background-color: #FFFFFF; /* White for input fields */
         color: #000000; /* Black text for input */
-        border: 1px solid #CCCCCC; /* Neutral border */
     }
     /* Button styling */
     .stButton > button {
-        background-color: #007bff; /* Blue for the button */
         color: #FFFFFF; /* White text for contrast */
         border: none;
         padding: 10px 20px;
@@ -433,20 +606,19 @@ st.markdown(
     }
     /* Expander header and content background */
     .streamlit-expanderHeader, .streamlit-expanderContent {
-        background-color: #e6f7ff; /* Very Light Blue/Neutral */
         color: #333333;
     }
     </style>
     """,
     unsafe_allow_html=True)
-st.subheader("Entity and Topic Analysis Report Generator", divider="blue")
-st.link_button("by nlpblogs", "https://nlpblogs.com", type="secondary")
-# CORRECTION 2: Removed duplicated expander. The following is the second, correct one.
 expander = st.expander("**Important notes**")
-expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"**Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.**How to Use:** Type or paste your text (max. 1000 words) into the text area below, press Ctrl + Enter, and then click the 'Results' button.**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
 # --- Comet ML Setup (Placeholder/Conditional) ---
@@ -481,9 +653,9 @@ DEFAULT_TEXT = (
     "end of the year. The platform is designed to be compatible with both Windows and Linux operating systems. "
     "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
     "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
-    "general public by October 1st. The goal is to deploy the Astra v2 platform before the next solar eclipse event in 2026.")
 # -----------------------------------
 # --- Session State Initialization (CRITICAL FIX) ---
 if 'show_results' not in st.session_state:
     st.session_state.show_results = False
@@ -515,6 +687,7 @@ text = st.text_area(
     height=250,
     key='my_text_area',
     value=st.session_state.my_text_area)
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
 st.button("Clear text", on_click=clear_text)
@@ -532,20 +705,25 @@ if st.button("Results"):
             if text != st.session_state.last_text:
                 st.session_state.last_text = text
                 start_time = time.time()
                 # --- Model Prediction & Dataframe Creation ---
                 entities = model.predict_entities(text, labels)
                 df = pd.DataFrame(entities)
                 if not df.empty:
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
                     df['category'] = df['label'].map(reverse_category_mapping)
                     st.session_state.results_df = df
                     unique_entity_count = len(df['text'].unique())
                     N_TOP_WORDS_TO_USE = min(10, unique_entity_count)
                     st.session_state.topic_results = perform_topic_modeling(
                         df,
                         num_topics=2,
                         num_top_words=N_TOP_WORDS_TO_USE
                     )
                     if comet_initialized:
                         experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
                         experiment.log_parameter("input_text", text)
@@ -554,153 +732,147 @@ if st.button("Results"):
                 else:
                     st.session_state.results_df = pd.DataFrame()
                     st.session_state.topic_results = None
                 end_time = time.time()
                 st.session_state.elapsed_time = end_time - start_time
-                st.session_state.show_results = True
-# --- Results Display ---
-if st.session_state.show_results and not st.session_state.results_df.empty:
-    st.success(f"Processing complete in {st.session_state.elapsed_time:.2f} seconds! 🎉")
     df = st.session_state.results_df
-    text_input = st.session_state.last_text
-    elapsed_time = st.session_state.elapsed_time
     df_topic_data = st.session_state.topic_results
-    # --- Highlighted Text and Download Buttons (Above Tabs) ---
-    st.subheader("1. Analyzed Text & Extracted Entities", divider="blue")
-    st.markdown(
-        highlight_entities(text_input, df),
-        unsafe_allow_html=True
-    )
-    st.subheader("Downloads", divider="blue")
-    col1, col2, col3 = st.columns([1, 1, 3])
-    # 1. Download CSV
-    csv_buffer = generate_entity_csv(df)
-    col1.download_button(
-        label="Download Entities as CSV",
-        data=csv_buffer.getvalue(),
-        file_name="ner_entities.csv",
-        mime="text/csv"
-    )
-    # 2. Download HTML Report
-    html_content = generate_html_report(df, text_input, elapsed_time, df_topic_data)
-    col2.download_button(
-        label="Download Full HTML Report",
-        data=html_content.encode('utf-8'),
-        file_name="ner_analysis_report.html",
-        mime="text/html"
-    )
-    st.markdown("---")
-    # CORRECTION 1: Tabs Implementation
-    tab1, tab2 = st.tabs(["📊 Entity Data (Table) & Glossary", "📈 Visualizations & Topics"])
-    with tab1:
-        # Create the summary table with the requested column name changes
-        grouped_entity_table = df.groupby(['category', 'label']).size().reset_index(name='Count')
-        grouped_entity_table.columns = ['Category', 'Entity', 'Count']
-        st.markdown("## Entity Counts by Category and Entity")
-        st.dataframe(grouped_entity_table.sort_values(by=['Category', 'Count'], ascending=[True, False]), use_container_width=True)
         st.markdown("---")
-        st.markdown("## Glossary of Tags and Category Mapping")
-        # Display Category Mapping (forward and reverse)
-        st.markdown("### Category to Entity Label Mapping (`category_mapping`)")
-        st.json(category_mapping)
-        # Display the requested reverse mapping below the table
-        st.markdown("### Entity Label to Category Mapping (Reverse Glossary) (`reverse_category_mapping`)")
-        st.json(reverse_category_mapping) # Display the reverse mapping which was moved to the top
-        # Display general glossary
-        st.markdown("### General Glossary for Extracted Entities")
-        st.write("""
-        - **start**: Index of the start of the corresponding entity.
-        - **end**: Index of the end of the corresponding entity.
-        - **text**: Entity extracted from your text data.
-        - **label**: The entity tag assigned to the extracted entity.
-        - **category**: The broad category (e.g., 'People') derived from the 'label'.
-        - **score**: Accuracy score; how accurately a tag has been assigned to a given entity.
-        """)
-    with tab2:
-        st.markdown("## Visualizations")
-        # 3a. Treemap (As requested in Tab 2)
-        fig_treemap = px.treemap(
-            df,
-            path=[px.Constant("All Entities"), 'category', 'label', 'text'],
-            values='score',
-            color='category',
-            title="Entity Distribution by Category and Label",
-            color_discrete_sequence=px.colors.qualitative.Dark24
-        )
-        fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
-        st.markdown("### Entity Distribution Treemap")
-        st.plotly_chart(fig_treemap, use_container_width=True)
         st.markdown("---")
-        # 3b. Pie Chart and Category Bar Chart side-by-side
-        col_pie, col_bar_cat = st.columns(2)
-        # Pie Chart
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
-        fig_pie = px.pie(grouped_counts, values='Count', names='Category',
-                         title='Distribution of Entities by Category',
-                         color_discrete_sequence=px.colors.sequential.RdBu)
-        fig_pie.update_layout(margin=dict(t=50, b=10))
-        with col_pie:
-            st.markdown("### Distribution of Entities by Category")
             st.plotly_chart(fig_pie, use_container_width=True)
-        # Category Bar Chart
-        fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',
-                                  color='Category', title='Total Entities per Category',
-                                  color_discrete_sequence=px.colors.qualitative.Pastel)
-        fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10))
-        with col_bar_cat:
-            st.markdown("### Total Entities per Category")
             st.plotly_chart(fig_bar_category, use_container_width=True)
-        st.markdown("---")
-        # 3c. Most Frequent Entities Bar Chart
-        word_counts = df['text'].value_counts().reset_index()
-        word_counts.columns = ['Entity', 'Count']
-        repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
-        st.markdown("### Top 10 Most Frequent Entities")
-        if not repeating_entities.empty:
-            fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',
-                                  color='Entity', title='Top 10 Most Frequent Entities',
-                                  color_discrete_sequence=px.colors.sequential.Plasma)
-            fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=100))
-            st.plotly_chart(fig_bar_freq, use_container_width=True)
-        else:
-            st.info("No entities appear more than once in the text for visualization.")
         st.markdown("---")
-        # 3d. Network Graph
-        st.markdown("### Entity Relationship Map")
-        network_fig = generate_network_graph(df, text_input)
         st.plotly_chart(network_fig, use_container_width=True)
         st.markdown("---")
-        # 4. Topic Modeling
-        st.markdown("## Topic Modeling")
         if df_topic_data is not None and not df_topic_data.empty:
-            st.markdown("### Bubble size = word weight")
             bubble_figure = create_topic_word_bubbles(df_topic_data)
-            st.plotly_chart(bubble_figure, use_container_width=True)
-            st.markdown("### Top Words by Topic")
-            # Simple table display of topic words
-            st.dataframe(df_topic_data.rename(columns={'Topic_ID': 'Topic ID', 'Word': 'Top Word', 'Weight': 'Weight'}), use_container_width=True, hide_index=True)
         else:
-            st.info("Topic Modeling requires text containing at least two unique entities.")

 import re
 import string
 import json
+# --- PPTX Imports ---
 from io import BytesIO
+from pptx import Presentation
+from pptx.util import Inches, Pt
+from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
+import plotly.io as pio # Required for image export
 # ---------------------------
 # --- Stable Scikit-learn LDA Imports ---
 from sklearn.feature_extraction.text import TfidfVectorizer
 # Set HF_HOME environment variable to a writable path
 os.environ['HF_HOME'] = '/tmp'
+# --- Color Map for Highlighting and Network Graph Nodes ---
 entity_color_map = {
     "person": "#10b981",
+    "username": "#3b82f6",
+    "hashtag": "#4ade80",
+    "mention" : "#f97316",
     "organization": "#f59e0b",
+    "community": "#8b5cf6",
+    "position": "#ec4899",
+    "location": "#06b6d4",
+    "event": "#f43f5e",
+    "product": "#a855f7",
+    "platform": "#eab308",
+    "date": "#6366f1",
+    "media_type": "#14b8a6",
+    "url": "#60a5fa",
+    "nationality_religion": "#fb7185"
 }
+# --- Label Definitions and Category Mapping (Used by the App and PPTX) ---
 labels = list(entity_color_map.keys())
 category_mapping = {
+    "People & Groups": ["person", "username", "hashtag", "mention", "community", "position", "nationality_religion"],
+    "Location & Organization": ["location", "organization"],
+    "Temporal & Events": ["event", "date"],
+    "Digital & Products": ["platform", "product", "media_type", "url"],
+}
+reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 # --- Utility Functions for Analysis and Plotly ---
 def extract_label(node_name):
     """Generates HTML to display text with entities highlighted and colored."""
     if df_entities.empty:
         return text
     # Sort entities by start index descending to insert highlights without affecting subsequent indices
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
         start = entity['start']
         end = entity['end']
         label = entity['label']
         entity_text = entity['text']
         color = entity_color_map.get(label, '#000000')
         # Create a span with background color and tooltip
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
+    return f'<div style="border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
 def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     """
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
         tfidf_vectorizer = TfidfVectorizer(
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
         lda = LatentDirichletAllocation(
             n_components=num_topics, max_iter=5, learning_method='online',random_state=42, n_jobs=-1
         )
     # Renaming columns to match the output of perform_topic_modeling
     df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
     df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         xaxis={'tickangle': -45, 'showgrid': False},
         yaxis={'showgrid': True},
         showlegend=True,
+        plot_bgcolor='#FFF0F5',
+        paper_bgcolor='#FFF0F5',
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
+    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>', marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
     Generates a network graph visualization (Node Plot) with edges
+    based on entity co-occurrence in sentences. (Content omitted for brevity but assumed to be here).
     """
+    # Using the existing generate_network_graph logic from previous context...
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
+    pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
     edges = set()
+    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         unique_entities_in_sentence = list(set(entities_in_sentence))
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(
+                x=[None], y=[None], mode='markers', marker=dict(size=10, color=color), name=f"{label.capitalize()}", showlegend=True
             ))
     for trace in legend_traces:
         fig.add_trace(trace)
         margin=dict(t=50, b=10, l=10, r=10),
         height=600
     )
     return fig
+# --- PPTX HELPER FUNCTIONS (Integrated from generate_report.py) ---
+def fig_to_image_buffer(fig):
+    """
+    Converts a Plotly figure object into a BytesIO buffer containing PNG data.
+    Requires 'kaleido' to be installed for image export.
+    Returns None if export fails.
+    """
+    try:
+        # Use pio.to_image to convert the figure to a PNG byte array
+        img_bytes = pio.to_image(fig, format="png", width=900, height=500, scale=2)
+        img_buffer = BytesIO(img_bytes)
+        return img_buffer
+    except Exception as e:
+        # In a Streamlit environment, we can't show this error directly in the app execution flow
+        print(f"Error converting Plotly figure to image: {e}")
+        return None
+# --- PPTX GENERATION FUNCTION (Integrated and Adapted) ---
+def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_category_mapping):
+    """
+    Generates a PowerPoint presentation (.pptx) file containing key analysis results.
+    Returns the file content as a BytesIO buffer.
+    """
+    prs = Presentation()
+    # Layout 5: Title and Content (often good for charts)
+    chart_layout = prs.slide_layouts[5]
+    # 1. Title Slide
+    title_slide_layout = prs.slide_layouts[0]
+    slide = prs.slides.add_slide(title_slide_layout)
+    title = slide.shapes.title
+    subtitle = slide.placeholders[1]
+    title.text = "NER & Topic Analysis Report"
+    subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
+    # 2. Source Text Slide
+    slide = prs.slides.add_slide(chart_layout)
+    slide.shapes.title.text = "Analyzed Source Text"
+    # Add the raw text to a text box
+    left = Inches(0.5)
+    top = Inches(1.5)
+    width = Inches(9.0)
+    height = Inches(5.0)
+    txBox = slide.shapes.add_textbox(left, top, width, height)
+    tf = txBox.text_frame
+    tf.margin_top = Inches(0.1)
+    tf.margin_bottom = Inches(0.1)
+    tf.word_wrap = True
+    p = tf.add_paragraph()
+    p.text = text_input
+    p.font.size = Pt(14)
+    p.font.name = 'Arial'
+    # 3. Entity Summary Slide (Table)
+    slide = prs.slides.add_slide(chart_layout)
+    slide.shapes.title.text = "Entity Summary (Count by Category and Label)"
+    # Create the summary table using the app's established logic
+    grouped_entity_table = df['label'].value_counts().reset_index()
+    grouped_entity_table.columns = ['Entity Label', 'Count']
+    grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(
+        lambda x: reverse_category_mapping.get(x, 'Other')
+    )
+    grouped_entity_table = grouped_entity_table[['Category', 'Entity Label', 'Count']]
+    # Simple way to insert a table:
+    rows, cols = grouped_entity_table.shape
+    x, y, cx, cy = Inches(1), Inches(1.5), Inches(8), Inches(4.5)
+    # Add 1 row for the header
+    table = slide.shapes.add_table(rows + 1, cols, x, y, cx, cy).table
+    # Set column widths
+    table.columns[0].width = Inches(2.7)
+    table.columns[1].width = Inches(2.8)
+    table.columns[2].width = Inches(2.5)
+    # Set column headers
+    for i, col in enumerate(grouped_entity_table.columns):
+        cell = table.cell(0, i)
+        cell.text = col
+        cell.fill.solid()
+        # Optional: Add simple styling to header
+    # Fill in the data
+    for i in range(rows):
+        for j in range(cols):
+            cell = table.cell(i+1, j)
+            cell.text = str(grouped_entity_table.iloc[i, j])
+            # Optional: Style data cells
+    # 4. Treemap Slide (Visualization)
+    fig_treemap = px.treemap(
+        df,
+        path=[px.Constant("All Entities"), 'category', 'label', 'text'],
+        values='score',
+        color='category',
+        title="Entity Distribution by Category and Label",
+        color_discrete_sequence=px.colors.qualitative.Dark24
+    )
+    fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
+    treemap_image = fig_to_image_buffer(fig_treemap)
+    if treemap_image:
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Entity Distribution Treemap"
+        slide.shapes.add_picture(treemap_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
+    # 5. Entity Count Bar Chart Slide (Visualization)
+    grouped_counts = df['category'].value_counts().reset_index()
+    grouped_counts.columns = ['Category', 'Count']
+    fig_bar_category = px.bar(
+        grouped_counts,
+        x='Category',
+        y='Count',
+        color='Category',
+        title='Total Entities per Category',
+        color_discrete_sequence=px.colors.qualitative.Pastel
+    )
+    fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
+    bar_category_image = fig_to_image_buffer(fig_bar_category)
+    if bar_category_image:
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Total Entities per Category"
+        slide.shapes.add_picture(bar_category_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
+    # 6. Topic Modeling Bubble Chart Slide
+    if df_topic_data is not None and not df_topic_data.empty:
+        # Ensure data frame is in the format expected by create_topic_word_bubbles
+        df_topic_data_pptx = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
+        bubble_figure = create_topic_word_bubbles(df_topic_data_pptx)
+        bubble_image = fig_to_image_buffer(bubble_figure)
+        if bubble_image:
+            slide = prs.slides.add_slide(chart_layout)
+            slide.shapes.title.text = "Topic Word Weights (Bubble Chart)"
+            slide.shapes.add_picture(bubble_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
+    else:
+        # Placeholder slide if topic modeling is not available
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Topic Modeling Results"
+        slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
+    # Save the presentation to an in-memory buffer
+    pptx_buffer = BytesIO()
+    prs.save(pptx_buffer)
+    pptx_buffer.seek(0)
+    return pptx_buffer
 # --- NEW CSV GENERATION FUNCTION ---
 def generate_entity_csv(df):
     """
 # -----------------------------------
 # --- Existing App Functionality (HTML) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
+    Generates a full HTML report containing all analysis results and visualizations.
+    (Content omitted for brevity but assumed to be here).
     """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     fig_treemap = px.treemap(
         df,
     word_counts.columns = ['Entity', 'Count']
     repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
         fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
         else:
             topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
     else:
+        topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #FF69B4;">'
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
         topic_charts_html += '</div>'
     highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
     # 3. Entity Tables (Pandas to HTML)
+    entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
+        classes='table table-striped',
+        index=False
     )
     # 4. Construct the Final HTML
     <style>
         body {{ font-family: 'Inter', sans-serif; margin: 0; padding: 20px; background-color: #f4f4f9; color: #333; }}
         .container {{ max-width: 1200px; margin: 0 auto; background-color: #ffffff; padding: 30px; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }}
+        h1 {{ color: #FF69B4; border-bottom: 3px solid #FF69B4; padding-bottom: 10px; margin-top: 0; }}
         h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
         h3 {{ color: #555; margin-top: 20px; }}
+        .metadata {{ background-color: #FFE4E1; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
         .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
         table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
         table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
         table th {{ background-color: #f0f0f0; }}
+        .highlighted-text {{ border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
         <div class="highlighted-text-container">
             {highlighted_text_html}
         </div>
+        <h2>2. Full Extracted Entities Table</h2>
+        {entity_table_html}
         <h2>3. Data Visualizations</h2>
         <h3>3.1 Entity Distribution Treemap</h3>
         <div class="chart-box">{treemap_html}</div>
+        <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
         <div class="chart-box">{pie_html}</div>
         <div class="chart-box">{bar_category_html}</div>
         <div class="chart-box">{bar_freq_html}</div>
+        <h3>3.3 Entity Co-occurrence Network (Edges = Same Sentence)</h3>
         <div class="chart-box">{network_html}</div>
+        <h2>4. Topic Modeling (LDA on Entities)</h2>
         {topic_charts_html}
     </div></body></html>
     """
     return html_content
+# --- Page Configuration and Styling (No Sidebar) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
 st.markdown(
     """
     <style>
     /* Overall app container - NO SIDEBAR */
     .main {
+        background-color: #FFF0F5; /* Blanched Almond/Light Pink */
         color: #333333; /* Dark grey text for contrast */
     }
     .stApp {
+        background-color: #FFF0F5;
     }
     /* Text Area background and text color (input fields) */
     .stTextArea textarea {
+        background-color: #FFFAF0; /* Floral White/Near white for input fields */
         color: #000000; /* Black text for input */
+        border: 1px solid #FF69B4; /* Deep Pink border */
     }
     /* Button styling */
     .stButton > button {
+        background-color: #FF69B4; /* Deep Pink for the button */
         color: #FFFFFF; /* White text for contrast */
         border: none;
         padding: 10px 20px;
     }
     /* Expander header and content background */
     .streamlit-expanderHeader, .streamlit-expanderContent {
+        background-color: #FFE4E1; /* Misty Rose/Lighter Pink */
         color: #333333;
     }
     </style>
     """,
     unsafe_allow_html=True)
+st.subheader("NER and Topic Analysis Report Generator", divider="rainbow")
+st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
+expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
+**Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and `kaleido`.
+**Results:** Results are compiled into a single, comprehensive **HTML report**, a **PowerPoint (.pptx) file**, and a **CSV file** for easy download and sharing.
+**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
 # --- Comet ML Setup (Placeholder/Conditional) ---
     "end of the year. The platform is designed to be compatible with both Windows and Linux operating systems. "
     "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
     "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
+    "general public by October 1st. The goal is to deploy the Astra v2 platform before the next solar eclipse event in 2026."
+)
 # -----------------------------------
 # --- Session State Initialization (CRITICAL FIX) ---
 if 'show_results' not in st.session_state:
     st.session_state.show_results = False
     height=250,
     key='my_text_area',
     value=st.session_state.my_text_area)
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
 st.button("Clear text", on_click=clear_text)
             if text != st.session_state.last_text:
                 st.session_state.last_text = text
                 start_time = time.time()
                 # --- Model Prediction & Dataframe Creation ---
                 entities = model.predict_entities(text, labels)
                 df = pd.DataFrame(entities)
                 if not df.empty:
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
                     df['category'] = df['label'].map(reverse_category_mapping)
                     st.session_state.results_df = df
                     unique_entity_count = len(df['text'].unique())
                     N_TOP_WORDS_TO_USE = min(10, unique_entity_count)
                     st.session_state.topic_results = perform_topic_modeling(
                         df,
                         num_topics=2,
                         num_top_words=N_TOP_WORDS_TO_USE
                     )
                     if comet_initialized:
                         experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
                         experiment.log_parameter("input_text", text)
                 else:
                     st.session_state.results_df = pd.DataFrame()
                     st.session_state.topic_results = None
                 end_time = time.time()
                 st.session_state.elapsed_time = end_time - start_time
+                st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
+            st.session_state.show_results = True
+# --- Display Download Link and Results ---
+if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
+    if df.empty:
+        st.warning("No entities were found in the provided text.")
+    else:
+        st.subheader("Analysis Results", divider="blue")
+        # 1. Highlighted Text
+        st.markdown("### 1. Analyzed Text with Highlighted Entities")
+        st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
+        # 2. Entity Summary Table
+        st.markdown("### 2. Entity Summary Table (Count by Label)")
+        grouped_entity_table = df['label'].value_counts().reset_index()
+        grouped_entity_table.columns = ['Entity Label', 'Count']
+        grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(reverse_category_mapping)
+        st.dataframe(grouped_entity_table[['Category', 'Entity Label', 'Count']], use_container_width=True)
         st.markdown("---")
+        # 3. Detailed Entity Analysis Tabs
+        st.markdown("### 3. Detailed Entity Analysis")
+        tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
+        with tab_category_details:
+            st.markdown("#### Detailed Entities Table (Grouped by Category)")
+            unique_categories = list(category_mapping.keys())
+            tabs_category = st.tabs(unique_categories)
+            for category, tab in zip(unique_categories, tabs_category):
+                df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
+                with tab:
+                    st.markdown(f"##### {category} Entities ({len(df_category)} total)")
+                    if not df_category.empty:
+                        st.dataframe(
+                            df_category,
+                            use_container_width=True,
+                            column_config={'score': st.column_config.NumberColumn(format="%.4f")}
+                        )
+                    else:
+                        st.info(f"No entities of category **{category}** were found in the text.")
+        with tab_treemap_viz:
+            st.markdown("#### Treemap: Entity Distribution")
+            fig_treemap = px.treemap(
+                df,
+                path=[px.Constant("All Entities"), 'category', 'label', 'text'],
+                values='score',
+                color='category',
+                title="Entity Distribution by Category and Label",
+                color_discrete_sequence=px.colors.qualitative.Dark24
+            )
+            fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
+            st.plotly_chart(fig_treemap, use_container_width=True)
+        # 4. Comparative Charts
         st.markdown("---")
+        st.markdown("### 4. Comparative Charts")
+        col1, col2, col3 = st.columns(3)
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
+        with col1: # Pie Chart
+            fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
+            fig_pie.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_pie, use_container_width=True)
+        with col2: # Bar Chart (Category Count)
+            fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
+            fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_bar_category, use_container_width=True)
+        with col3: # Bar Chart (Most Frequent Entities)
+            word_counts = df['text'].value_counts().reset_index()
+            word_counts.columns = ['Entity', 'Count']
+            repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
+            if not repeating_entities.empty:
+                fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
+                fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
+                st.plotly_chart(fig_bar_freq, use_container_width=True)
+            else:
+                st.info("No entities repeat for frequency chart.")
         st.markdown("---")
+        st.markdown("### 5. Entity Co-occurrence Network")
+        network_fig = generate_network_graph(df, st.session_state.last_text)
         st.plotly_chart(network_fig, use_container_width=True)
         st.markdown("---")
+        st.markdown("### 6. Topic Modeling Analysis")
         if df_topic_data is not None and not df_topic_data.empty:
             bubble_figure = create_topic_word_bubbles(df_topic_data)
+            if bubble_figure:
+                st.plotly_chart(bubble_figure, use_container_width=True)
+            else:
+                st.error("Error generating Topic Word Bubble Chart.")
         else:
+            st.info("Topic modeling requires more unique input (at least two unique entities).")
+        # --- Report Download ---
+        st.markdown("---")
+        st.markdown("### Download Full Report Artifacts")
+        # 1. HTML Report Download (Retained)
+        html_report = generate_html_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data)
+        st.download_button(
+            label="Download Comprehensive HTML Report",
+            data=html_report,
+            file_name="ner_topic_report.html",
+            mime="text/html",
+            type="primary"
+        )
+        # 2. PowerPoint PPTX Download (Retained)
+        pptx_buffer = generate_pptx_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data, reverse_category_mapping)
+        st.download_button(
+            label="Download Presentation Slides (.pptx)",
+            data=pptx_buffer,
+            file_name="ner_topic_report.pptx",
+            mime="application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            type="primary"
+        )
+        # 3. CSV Data Download (NEW)
+        csv_buffer = generate_entity_csv(df)
+        st.download_button(
+            label="Download Extracted Entities (CSV)",
+            data=csv_buffer,
+            file_name="extracted_entities.csv",
+            mime="text/csv",
+            type="secondary"
+        )