Spaces:

AIEcosystem
/

relationship-map

Sleeping

App Files Files Community

AIEcosystem commited on Oct 8, 2025

Commit

4dbacfd

verified ·

1 Parent(s): 46fc5df

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +112 -255

src/streamlit_app.py CHANGED Viewed

@@ -16,7 +16,7 @@ from io import BytesIO
 from pptx import Presentation
 from pptx.util import Inches, Pt
 from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
-import plotly.io as pio # Required for image export (needs kaleido!)
 # ---------------------------
 # --- Stable Scikit-learn LDA Imports ---
 from sklearn.feature_extraction.text import TfidfVectorizer
@@ -66,7 +66,8 @@ category_mapping = {
     "Temporal & Events": ["event", "date"],
     "Digital & Products": ["platform", "product", "media_type", "url"],
 }
-reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 # --- Utility Functions for Analysis and Plotly ---
@@ -178,7 +179,8 @@ def create_topic_word_bubbles(df_topic_data):
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
-    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>', marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
@@ -191,7 +193,6 @@ def generate_network_graph(df, raw_text):
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
-        # Return a blank figure if not enough entities
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
@@ -293,7 +294,7 @@ def generate_network_graph(df, raw_text):
     return fig
-# --- PPTX HELPER FUNCTIONS (Integrated from generate_report.py) ---
 def fig_to_image_buffer(fig):
     """
@@ -307,12 +308,11 @@ def fig_to_image_buffer(fig):
         img_buffer = BytesIO(img_bytes)
         return img_buffer
     except Exception as e:
-        # Print the error for debugging purposes in the Streamlit console
-        # This message is CRITICAL for the user to understand why plots are missing
-        print(f"ERROR: Failed to convert Plotly figure to image for PPTX. This usually means 'kaleido' is missing. Error: {e}")
         return None
-# --- PPTX GENERATION FUNCTION (Integrated and Adapted) ---
 def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_category_mapping):
     """
@@ -323,7 +323,7 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
     # Layout 5: Title and Content (often good for charts)
     chart_layout = prs.slide_layouts[5]
-    # --- 1. Title Slide ---
     title_slide_layout = prs.slide_layouts[0]
     slide = prs.slides.add_slide(title_slide_layout)
     title = slide.shapes.title
@@ -331,9 +331,9 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
     title.text = "NER & Topic Analysis Report"
     subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
-    # --- 2. Source Text Slide ---
     slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Analyzed Source Text (Raw)"
     # Add the raw text to a text box
     left = Inches(0.5)
@@ -350,83 +350,44 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
     p.font.size = Pt(14)
     p.font.name = 'Arial'
-    # --- 3. Highlighted Text Slide ---
     slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Analyzed Source Text with Entity Highlights"
-    # Generate the HTML for highlighting (we need to strip the HTML formatting for PPTX text box)
-    highlighted_html = highlight_entities(text_input, df)
-    # Simple regex to remove the HTML tags, keeping only the text content
-    highlighted_clean_text = re.sub(r'<[^>]*>', '', highlighted_html)
-    highlighted_clean_text = highlighted_clean_text.replace("div style", "").strip()
-    # Add the text to a text box
-    left = Inches(0.5)
-    top = Inches(1.5)
-    width = Inches(9.0)
-    height = Inches(5.0)
-    txBox = slide.shapes.add_textbox(left, top, width, height)
-    tf = txBox.text_frame
-    tf.margin_top = Inches(0.1)
-    tf.margin_bottom = Inches(0.1)
-    tf.word_wrap = True
-    p = tf.add_paragraph()
-    p.text = highlighted_clean_text
-    p.font.size = Pt(12)
-    p.font.name = 'Arial'
-    p.font.color.rgb = prs.theme.theme_color_scheme.get_color(0) # Default text color
-    # --- 4. Extracted Entities Table Slide ---
-    slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Extracted Entities Table"
-    # Prepare the dataframe for the table
-    table_df = df[['category', 'label', 'text', 'score']].sort_values(by=['category', 'label', 'score'], ascending=[True, True, False])
     # Simple way to insert a table:
-    rows, cols = table_df.shape
-    # Cap the table size for the slide, otherwise it gets too cramped
-    max_rows = 15
-    table_to_display = table_df.head(max_rows)
-    rows_display = len(table_to_display)
-    x, y, cx, cy = Inches(0.2), Inches(1.2), Inches(9.6), Inches(6.0)
     # Add 1 row for the header
-    table = slide.shapes.add_table(rows_display + 1, cols, x, y, cx, cy).table
     # Set column widths
-    table.columns[0].width = Inches(2.0) # Category
-    table.columns[1].width = Inches(2.0) # Label
-    table.columns[2].width = Inches(4.0) # Text
-    table.columns[3].width = Inches(1.6) # Score
     # Set column headers
-    header_cols = ['Category', 'Label', 'Text', 'Score']
-    for i, col in enumerate(header_cols):
         cell = table.cell(0, i)
         cell.text = col
         # Optional: Add simple styling to header
     # Fill in the data
-    for i in range(rows_display):
         for j in range(cols):
             cell = table.cell(i+1, j)
-            if table_df.columns[j] == 'score':
-                cell.text = f"{table_to_display.iloc[i, j]:.4f}"
-            else:
-                cell.text = str(table_to_display.iloc[i, j])
             # Optional: Style data cells
-    if rows > max_rows:
-        slide.placeholders[1].text = f"... Table truncated for slide readability. Full data contains {rows} entries. See CSV file for all data."
-        slide.placeholders[1].top = Inches(6.5)
-        slide.placeholders[1].left = Inches(0.5)
-        slide.placeholders[1].width = Inches(9.0)
-        slide.placeholders[1].height = Inches(0.5)
-    # --- 5. Treemap Slide (Visualization) ---
     fig_treemap = px.treemap(
         df,
         path=[px.Constant("All Entities"), 'category', 'label', 'text'],
@@ -438,31 +399,20 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
     treemap_image = fig_to_image_buffer(fig_treemap)
-    slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Entity Distribution Treemap"
     if treemap_image:
         slide.shapes.add_picture(treemap_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
     else:
-        slide.placeholders[1].text = "Chart generation failed. Ensure the 'kaleido' library is installed for Plotly image export."
-    # --- 6. Pie Chart Slide (Visualization) ---
     grouped_counts = df['category'].value_counts().reset_index()
     grouped_counts.columns = ['Category', 'Count']
-    fig_pie = px.pie(grouped_counts, values='Count', names='Category', title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
-    fig_pie.update_layout(margin=dict(t=50, b=10))
-    pie_image = fig_to_image_buffer(fig_pie)
-    slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Entity Distribution Pie Chart"
-    if pie_image:
-        # Pie charts often look better centered on the slide
-        slide.shapes.add_picture(pie_image, Inches(1.5), Inches(1.5), width=Inches(7.0))
-    else:
-        slide.placeholders[1].text = "Chart generation failed. Ensure the 'kaleido' library is installed for Plotly image export."
-    # --- 7. Category Count Bar Chart Slide (Visualization) ---
     fig_bar_category = px.bar(
         grouped_counts,
         x='Category',
@@ -474,47 +424,17 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
     fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
     bar_category_image = fig_to_image_buffer(fig_bar_category)
-    slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Total Entities per Category Bar Chart"
     if bar_category_image:
-        slide.shapes.add_picture(bar_category_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
-    else:
-        slide.placeholders[1].text = "Chart generation failed. Ensure the 'kaleido' library is installed for Plotly image export."
-    # --- 8. Most Frequent Entities Bar Chart Slide (Visualization) ---
-    word_counts = df['text'].value_counts().reset_index()
-    word_counts.columns = ['Entity', 'Count']
-    repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
-    if not repeating_entities.empty:
-        fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
-        fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
-        bar_freq_image = fig_to_image_buffer(fig_bar_freq)
         slide = prs.slides.add_slide(chart_layout)
-        slide.shapes.title.text = "Top 10 Most Frequent Entities Bar Chart"
-        if bar_freq_image:
-            slide.shapes.add_picture(bar_freq_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
-        else:
-            slide.placeholders[1].text = "Chart generation failed. Ensure the 'kaleido' library is installed for Plotly image export."
     else:
         slide = prs.slides.add_slide(chart_layout)
-        slide.shapes.title.text = "Top 10 Most Frequent Entities Bar Chart"
-        slide.placeholders[1].text = "No entities repeat in the text, so a frequency chart was not generated."
-    # --- 9. Network Graph Slide (Visualization) ---
-    network_fig = generate_network_graph(df, text_input)
-    network_image = fig_to_image_buffer(network_fig)
-    slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Entity Co-occurrence Network"
-    if network_image:
-        slide.shapes.add_picture(network_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
-    else:
-        slide.placeholders[1].text = "Chart generation failed. Ensure the 'kaleido' library is installed for Plotly image export."
-    # --- 10. Topic Modeling Bubble Chart Slide ---
     if df_topic_data is not None and not df_topic_data.empty:
         # Ensure data frame is in the format expected by create_topic_word_bubbles
         df_topic_data_pptx = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
@@ -526,9 +446,11 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
             slide.shapes.add_picture(bubble_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
         else:
             slide = prs.slides.add_slide(chart_layout)
-            slide.shapes.title.text = "Topic Word Weights (Bubble Chart)"
-            slide.placeholders[1].text = "Chart generation failed. Ensure the 'kaleido' library is installed for Plotly image export."
     else:
         slide = prs.slides.add_slide(chart_layout)
         slide.shapes.title.text = "Topic Modeling Results"
         slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
@@ -539,7 +461,7 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
     pptx_buffer.seek(0)
     return pptx_buffer
-# --- NEW CSV GENERATION FUNCTION (Retained) ---
 def generate_entity_csv(df):
     """
     Generates a CSV file of the extracted entities in an in-memory buffer,
@@ -553,12 +475,10 @@ def generate_entity_csv(df):
     return csv_buffer
 # -----------------------------------
-# --- Existing App Functionality (HTML) (Retained) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     Generates a full HTML report containing all analysis results and visualizations.
-    (Content omitted for brevity but assumed to be here).
     """
     # 1. Generate Visualizations (Plotly HTML)
@@ -645,7 +565,6 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
         <div class="metadata">
             <p><strong>Generated At:</strong> {time.strftime('%Y-%m-%d %H:%M:%S')}</p>
             <p><strong>Processing Time:</strong> {elapsed_time:.2f} seconds</p>
@@ -655,25 +574,19 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
         <div class="highlighted-text-container">
             {highlighted_text_html}
         </div>
         <h2>2. Full Extracted Entities Table</h2>
         {entity_table_html}
         <h2>3. Data Visualizations</h2>
         <h3>3.1 Entity Distribution Treemap</h3>
         <div class="chart-box">{treemap_html}</div>
         <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
         <div class="chart-box">{pie_html}</div>
         <div class="chart-box">{bar_category_html}</div>
         <div class="chart-box">{bar_freq_html}</div>
         <h3>3.3 Entity Co-occurrence Network (Edges = Same Sentence)</h3>
         <div class="chart-box">{network_html}</div>
         <h2>4. Topic Modeling (LDA on Entities)</h2>
         {topic_charts_html}
     </div></body></html>
     """
     return html_content
@@ -705,6 +618,10 @@ st.markdown(
         border: none;
         padding: 10px 20px;
         border-radius: 5px;
     }
     /* Expander header and content background */
     .streamlit-expanderHeader, .streamlit-expanderContent {
@@ -715,10 +632,10 @@ st.markdown(
     """,
     unsafe_allow_html=True)
 st.subheader("NER and Topic Analysis Report Generator", divider="rainbow")
-st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
 expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
-**Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and crucially, **`kaleido`** (for converting Plotly charts into static images).
 **Results:** Results are compiled into a single, comprehensive **HTML report**, a **PowerPoint (.pptx) file**, and a **CSV file** for easy download and sharing.
 **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
@@ -841,139 +758,79 @@ if st.button("Results"):
                 st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
             st.session_state.show_results = True
-# --- Display Download Link and Results ---
 if st.session_state.show_results:
     df = st.session_state.results_df
-    df_topic_data = st.session_state.topic_results
     if df.empty:
-        st.warning("No entities were found in the provided text.")
     else:
-        st.subheader("Analysis Results", divider="blue")
-        # 1. Highlighted Text
-        st.markdown("### 1. Analyzed Text with Highlighted Entities")
-        st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
-        # 2. Entity Summary Table
-        st.markdown("### 2. Entity Summary Table (Count by Label)")
-        grouped_entity_table = df['label'].value_counts().reset_index()
-        grouped_entity_table.columns = ['Entity Label', 'Count']
-        grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(reverse_category_mapping)
-        st.dataframe(grouped_entity_table[['Category', 'Entity Label', 'Count']], use_container_width=True)
-        st.markdown("---")
-        # 3. Detailed Entity Analysis Tabs
-        st.markdown("### 3. Detailed Entity Analysis")
-        tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
-        with tab_category_details:
-            st.markdown("#### Detailed Entities Table (Grouped by Category)")
-            unique_categories = list(category_mapping.keys())
-            tabs_category = st.tabs(unique_categories)
-            for category, tab in zip(unique_categories, tabs_category):
-                df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
-                with tab:
-                    st.markdown(f"##### {category} Entities ({len(df_category)} total)")
-                    if not df_category.empty:
-                        st.dataframe(
-                            df_category,
-                            use_container_width=True,
-                            column_config={'score': st.column_config.NumberColumn(format="%.4f")}
-                        )
-                    else:
-                        st.info(f"No entities of category **{category}** were found in the text.")
-        with tab_treemap_viz:
-            st.markdown("#### Treemap: Entity Distribution")
-            fig_treemap = px.treemap(
                 df,
-                path=[px.Constant("All Entities"), 'category', 'label', 'text'],
-                values='score',
-                color='category',
-                title="Entity Distribution by Category and Label",
-                color_discrete_sequence=px.colors.qualitative.Dark24
             )
-            fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
-            st.plotly_chart(fig_treemap, use_container_width=True)
-        # 4. Comparative Charts
         st.markdown("---")
-        st.markdown("### 4. Comparative Charts")
         col1, col2, col3 = st.columns(3)
-        grouped_counts = df['category'].value_counts().reset_index()
-        grouped_counts.columns = ['Category', 'Count']
-        with col1: # Pie Chart
-            fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
-            fig_pie.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350)
-            st.plotly_chart(fig_pie, use_container_width=True)
-        with col2: # Bar Chart (Category Count)
-            fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
-            fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
-            st.plotly_chart(fig_bar_category, use_container_width=True)
-        with col3: # Bar Chart (Most Frequent Entities)
-            word_counts = df['text'].value_counts().reset_index()
-            word_counts.columns = ['Entity', 'Count']
-            repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
-            if not repeating_entities.empty:
-                fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
-                fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
-                st.plotly_chart(fig_bar_freq, use_container_width=True)
-            else:
-                st.info("No entities repeat for frequency chart.")
         st.markdown("---")
-        st.markdown("### 5. Entity Co-occurrence Network")
-        network_fig = generate_network_graph(df, st.session_state.last_text)
-        st.plotly_chart(network_fig, use_container_width=True)
-        st.markdown("---")
-        st.markdown("### 6. Topic Modeling Analysis")
-        if df_topic_data is not None and not df_topic_data.empty:
-            bubble_figure = create_topic_word_bubbles(df_topic_data)
-            if bubble_figure:
-                st.plotly_chart(bubble_figure, use_container_width=True)
-            else:
-                st.error("Error generating Topic Word Bubble Chart.")
-        else:
-            st.info("Topic modeling requires more unique input (at least two unique entities).")
-        # --- Report Download ---
-        st.markdown("---")
-        st.markdown("### Download Full Report Artifacts")
-        # 1. HTML Report Download (Retained)
-        html_report = generate_html_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data)
-        st.download_button(
-            label="Download Comprehensive HTML Report",
-            data=html_report,
-            file_name="ner_topic_report.html",
-            mime="text/html",
-            type="primary"
         )
-        # 2. PowerPoint PPTX Download (Retained)
-        pptx_buffer = generate_pptx_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data, reverse_category_mapping)
-        st.download_button(
-            label="Download Presentation Slides (.pptx)",
-            data=pptx_buffer,
-            file_name="ner_topic_report.pptx",
-            mime="application/vnd.openxmlformats-officedocument.presentationml.presentation",
-            type="primary"
-        )
-        # 3. CSV Data Download (NEW)
-        csv_buffer = generate_entity_csv(df)
-        st.download_button(
-            label="Download Extracted Entities (CSV)",
-            data=csv_buffer,
-            file_name="extracted_entities.csv",
-            mime="text/csv",
-            type="secondary"
-        )

 from pptx import Presentation
 from pptx.util import Inches, Pt
 from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
+import plotly.io as pio # Required for image export (needs kaleido installed)
 # ---------------------------
 # --- Stable Scikit-learn LDA Imports ---
 from sklearn.feature_extraction.text import TfidfVectorizer
     "Temporal & Events": ["event", "date"],
     "Digital & Products": ["platform", "product", "media_type", "url"],
 }
+reverse_category_mapping = {label: category
+for category, label_list in category_mapping.items() for label in label in label_list}
 # --- Utility Functions for Analysis and Plotly ---
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
+    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',
+marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     return fig
+# --- PPTX HELPER FUNCTIONS ---
 def fig_to_image_buffer(fig):
     """
         img_buffer = BytesIO(img_bytes)
         return img_buffer
     except Exception as e:
+        # Print error to console/logs, as Streamlit elements cannot be used here
+        print(f"Error converting Plotly figure to image (Check Kaleido installation/permissions): {e}")
         return None
+# --- PPTX GENERATION FUNCTION ---
 def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_category_mapping):
     """
     # Layout 5: Title and Content (often good for charts)
     chart_layout = prs.slide_layouts[5]
+    # 1. Title Slide
     title_slide_layout = prs.slide_layouts[0]
     slide = prs.slides.add_slide(title_slide_layout)
     title = slide.shapes.title
     title.text = "NER & Topic Analysis Report"
     subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
+    # 2. Source Text Slide
     slide = prs.slides.add_slide(chart_layout)
+    slide.shapes.title.text = "Analyzed Source Text"
     # Add the raw text to a text box
     left = Inches(0.5)
     p.font.size = Pt(14)
     p.font.name = 'Arial'
+    # 3. Entity Summary Slide (Table)
     slide = prs.slides.add_slide(chart_layout)
+    slide.shapes.title.text = "Entity Summary (Count by Category and Label)"
+    # Create the summary table using the app's established logic
+    grouped_entity_table = df['label'].value_counts().reset_index()
+    grouped_entity_table.columns = ['Entity Label', 'Count']
+    grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(
+        lambda x: reverse_category_mapping.get(x, 'Other')
+    )
+    grouped_entity_table = grouped_entity_table[['Category', 'Entity Label', 'Count']]
     # Simple way to insert a table:
+    rows, cols = grouped_entity_table.shape
+    x, y, cx, cy = Inches(1), Inches(1.5), Inches(8), Inches(4.5)
     # Add 1 row for the header
+    table = slide.shapes.add_table(rows + 1, cols, x, y, cx, cy).table
     # Set column widths
+    table.columns[0].width = Inches(2.7)
+    table.columns[1].width = Inches(2.8)
+    table.columns[2].width = Inches(2.5)
     # Set column headers
+    for i, col in enumerate(grouped_entity_table.columns):
         cell = table.cell(0, i)
         cell.text = col
+        cell.fill.solid()
         # Optional: Add simple styling to header
     # Fill in the data
+    for i in range(rows):
         for j in range(cols):
             cell = table.cell(i+1, j)
+            cell.text = str(grouped_entity_table.iloc[i, j])
             # Optional: Style data cells
+    # 4. Treemap Slide (Visualization)
     fig_treemap = px.treemap(
         df,
         path=[px.Constant("All Entities"), 'category', 'label', 'text'],
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
     treemap_image = fig_to_image_buffer(fig_treemap)
     if treemap_image:
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Entity Distribution Treemap"
         slide.shapes.add_picture(treemap_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
     else:
+        # Placeholder if image conversion failed (e.g., Kaleido issue)
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Entity Distribution Treemap (Chart Failed)"
+        slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
+    # 5. Entity Count Bar Chart Slide (Visualization)
     grouped_counts = df['category'].value_counts().reset_index()
     grouped_counts.columns = ['Category', 'Count']
     fig_bar_category = px.bar(
         grouped_counts,
         x='Category',
     fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
     bar_category_image = fig_to_image_buffer(fig_bar_category)
     if bar_category_image:
         slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Total Entities per Category"
+        slide.shapes.add_picture(bar_category_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
     else:
         slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Total Entities per Category (Chart Failed)"
+        slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
+    # 6. Topic Modeling Bubble Chart Slide
     if df_topic_data is not None and not df_topic_data.empty:
         # Ensure data frame is in the format expected by create_topic_word_bubbles
         df_topic_data_pptx = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
             slide.shapes.add_picture(bubble_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
         else:
             slide = prs.slides.add_slide(chart_layout)
+            slide.shapes.title.text = "Topic Word Weights (Chart Failed)"
+            slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
     else:
+        # Placeholder slide if topic modeling is not available
         slide = prs.slides.add_slide(chart_layout)
         slide.shapes.title.text = "Topic Modeling Results"
         slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
     pptx_buffer.seek(0)
     return pptx_buffer
+# --- NEW CSV GENERATION FUNCTION ---
 def generate_entity_csv(df):
     """
     Generates a CSV file of the extracted entities in an in-memory buffer,
     return csv_buffer
 # -----------------------------------
+# --- Existing App Functionality (HTML) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     Generates a full HTML report containing all analysis results and visualizations.
     """
     # 1. Generate Visualizations (Plotly HTML)
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
         <div class="metadata">
             <p><strong>Generated At:</strong> {time.strftime('%Y-%m-%d %H:%M:%S')}</p>
             <p><strong>Processing Time:</strong> {elapsed_time:.2f} seconds</p>
         <div class="highlighted-text-container">
             {highlighted_text_html}
         </div>
         <h2>2. Full Extracted Entities Table</h2>
         {entity_table_html}
         <h2>3. Data Visualizations</h2>
         <h3>3.1 Entity Distribution Treemap</h3>
         <div class="chart-box">{treemap_html}</div>
         <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
         <div class="chart-box">{pie_html}</div>
         <div class="chart-box">{bar_category_html}</div>
         <div class="chart-box">{bar_freq_html}</div>
         <h3>3.3 Entity Co-occurrence Network (Edges = Same Sentence)</h3>
         <div class="chart-box">{network_html}</div>
         <h2>4. Topic Modeling (LDA on Entities)</h2>
         {topic_charts_html}
     </div></body></html>
     """
     return html_content
         border: none;
         padding: 10px 20px;
         border-radius: 5px;
+        transition: background-color 0.3s;
+    }
+    .stButton > button:hover {
+        background-color: #E05C9E; /* Slightly darker pink on hover */
     }
     /* Expander header and content background */
     .streamlit-expanderHeader, .streamlit-expanderContent {
     """,
     unsafe_allow_html=True)
 st.subheader("NER and Topic Analysis Report Generator", divider="rainbow")
+st.link_button("by nlpblogs", "https://nlpblogs.com", type="secondary")
 expander = st.expander("**Important notes**")
 expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
+**Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and `kaleido`. If charts in the PPTX are blank, please check your environment's $\text{kaleido}$ installation/permissions.
 **Results:** Results are compiled into a single, comprehensive **HTML report**, a **PowerPoint (.pptx) file**, and a **CSV file** for easy download and sharing.
 **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
                 st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
             st.session_state.show_results = True
+# --- Display Download Link and Results (The missing logic that was completed) ---
 if st.session_state.show_results:
     df = st.session_state.results_df
     if df.empty:
+        st.error("No entities were extracted from the text. The report cannot be generated.")
     else:
+        # --- Generate All Report Files/Buffers ---
+        with st.spinner("Generating Report Files (HTML, PPTX, CSV)..."):
+            # 1. HTML Report Generation
+            html_report_content = generate_html_report(
+                df,
+                st.session_state.last_text,
+                st.session_state.elapsed_time,
+                st.session_state.topic_results
+            )
+            # 2. PPTX Report Generation
+            pptx_buffer = generate_pptx_report(
                 df,
+                st.session_state.last_text,
+                st.session_state.elapsed_time,
+                st.session_state.topic_results,
+                reverse_category_mapping
             )
+            # 3. CSV Report Generation
+            csv_buffer = generate_entity_csv(df)
+        # --- Display Downloads and Preview ---
+        st.markdown("## Download Analysis Reports", anchor=False)
         st.markdown("---")
         col1, col2, col3 = st.columns(3)
+        with col1:
+            st.download_button(
+                label="Download HTML Report 🌐",
+                data=html_report_content,
+                file_name="entity_topic_report.html",
+                mime="text/html",
+                help="A full, interactive report with all charts."
+            )
+        with col2:
+            st.download_button(
+                label="Download PowerPoint (.pptx) 📊",
+                data=pptx_buffer,
+                file_name="entity_topic_slides.pptx",
+                mime="application/vnd.openxmlformats-officedocument.presentationml.presentation",
+                help="A summary presentation with static charts."
+            )
+        with col3:
+            st.download_button(
+                label="Download Raw Entities (.csv) 📋",
+                data=csv_buffer,
+                file_name="extracted_entities.csv",
+                mime="text/csv",
+                help="Raw data table of all extracted entities."
+            )
         st.markdown("---")
+        # --- Display Interactive Preview ---
+        st.markdown("## Interactive HTML Report Preview", anchor=False)
+        st.info("Scroll within the box below to see the complete report and interactive charts.")
+        # Display the HTML report using the Streamlit component
+        components.html(
+            html_report_content,
+            height=800,
+            scrolling=True
         )