Spaces:

AIEcosystem
/

relationship-map

Sleeping

App Files Files Community

AIEcosystem commited on Oct 8, 2025

Commit

adf3d87

verified ·

1 Parent(s): f91c3e9

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +307 -139

src/streamlit_app.py CHANGED Viewed

@@ -10,6 +10,14 @@ import plotly.graph_objects as go
 import numpy as np
 import re
 import string
 # --- Stable Scikit-learn LDA Imports ---
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
@@ -50,7 +58,18 @@ entity_color_map = {
     "nationality_religion": "#fb7185"
 }
-# --- Utility Functions ---
 def extract_label(node_name):
     """Extracts the label from a node string like 'Text (Label)'."""
     match = re.search(r'\(([^)]+)\)$', node_name)
@@ -88,22 +107,17 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     """
     Performs basic Topic Modeling using LDA on the extracted entities
     and returns structured data for visualization.
-    Includes updated TF-IDF parameters (stop_words='english', max_df=0.95, min_df=1).
     """
-    # Aggregate all unique entity text into a single document list
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
-        # UPDATED: Added stop_words='english' to filter common words tokenized
-        # from multi-word entities (e.g., "The" from "The White House").
         tfidf_vectorizer = TfidfVectorizer(
             max_df=0.95,
-            min_df=1, # Retained at 1 to keep all unique entities
-            stop_words='english' # <-- THIS IS THE KEY ADDITION
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
@@ -130,113 +144,102 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
 def create_topic_word_bubbles(df_topic_data):
     """Generates a Plotly Bubble Chart for top words across all topics."""
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         df_topic_data,
-        x='Word',
-        y='Topic_ID',
-        size='Weight',
-        color='Topic_ID',
         size_max=80,
         title='Topic Word Weights (Bubble Chart)',
         color_discrete_sequence=px.colors.qualitative.Bold,
-        hover_data={'Word': True, 'Weight': ':.3f', 'Topic_ID': False}
     )
     fig.update_layout(
         xaxis_title="Entity/Word (Bubble size = Word Weight)",
-        yaxis_title="Topic ID",
         xaxis={'tickangle': -45, 'showgrid': False},
-        yaxis={'showgrid': True, 'autorange': 'reversed'},
         showlegend=True,
         plot_bgcolor='#FFF0F5',
         paper_bgcolor='#FFF0F5',
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
-    fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
     Generates a network graph visualization (Node Plot) with edges
-    based on entity co-occurrence in sentences.
     """
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
-    # Merge counts with unique entities (text + label)
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
-        # Return a simple figure with a message if not enough data
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
-    # Assign circular positions + a little randomness
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
-    # Map entity text to its coordinates for easy lookup
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
-    # ----------------------------------------------------------------------
-    # 1. Identify Edges (Co-occurrence in sentences)
-    # ----------------------------------------------------------------------
     edges = set()
-    # Simple sentence segmentation (handles standard punctuation followed by space)
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
-        # Find unique entities that are substrings of this sentence
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
-        # Create edges (pairs) based on co-occurrence
         unique_entities_in_sentence = list(set(entities_in_sentence))
-        # Create all unique pairs (edges)
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
                 node2 = unique_entities_in_sentence[j]
-                # Ensure consistent order for the set to avoid duplicates like (A, B) and (B, A)
                 edge_tuple = tuple(sorted((node1, node2)))
                 edges.add(edge_tuple)
-    # ----------------------------------------------------------------------
-    # 2. Create Plotly Trace Data for Edges
-    # ----------------------------------------------------------------------
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
-            # Append coordinates for line segment: [x1, x2, None] for separation
             edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None])
             edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None])
     fig = go.Figure()
-    # Add Edge Trace (Lines)
     edge_trace = go.Scatter(
         x=edge_x, y=edge_y,
         line=dict(width=0.5, color='#888'),
         hoverinfo='none',
         mode='lines',
         name='Co-occurrence Edges',
-        showlegend=False # Edges don't need a legend entry
     )
     fig.add_trace(edge_trace)
-    # ----------------------------------------------------------------------
-    # 3. Add Node Trace (Markers)
-    # ----------------------------------------------------------------------
     fig.add_trace(go.Scatter(
         x=unique_entities['x'],
         y=unique_entities['y'],
@@ -244,8 +247,6 @@ def generate_network_graph(df, raw_text):
         name='Entities',
         text=unique_entities['text'],
         textposition="top center",
-        # FIX: Explicitly set showlegend=False for the main node trace
-        # as we are creating separate traces for the legend colors below.
         showlegend=False,
         marker=dict(
             size=unique_entities['frequency'] * 5 + 10,
@@ -264,7 +265,6 @@ def generate_network_graph(df, raw_text):
         )
     ))
-    # Adding discrete traces for the legend based on unique labels
     legend_traces = []
     seen_labels = set()
     for index, row in unique_entities.iterrows():
@@ -273,12 +273,7 @@ def generate_network_graph(df, raw_text):
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(
-                x=[None],
-                y=[None],
-                mode='markers',
-                marker=dict(size=10, color=color),
-                name=f"{label.capitalize()}",
-                showlegend=True # Ensure legend traces are explicitly visible
             ))
     for trace in legend_traces:
         fig.add_trace(trace)
@@ -287,7 +282,6 @@ def generate_network_graph(df, raw_text):
         title='Entity Co-occurrence Network (Edges = Same Sentence)',
         showlegend=True,
         hovermode='closest',
-        # Set explicit range to ensure padding for text labels on the edge
         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]),
         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]),
         plot_bgcolor='#f9f9f9',
@@ -298,27 +292,175 @@ def generate_network_graph(df, raw_text):
     return fig
-def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
-    Generates a full HTML report containing all analysis results and visualizations.
-    FIX APPLIED: Removed the CSS Grid layout for the three comparative charts
-    (Pie, Category Count, Frequency) and stacked them vertically to prevent
-    overlapping and ensure reliable rendering across devices.
-    FIX 2 APPLIED: Increased the bottom margin (b) for both bar charts to prevent X-axis labels from being cut off.
     """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
-    # FIX 1: Explicitly set a color_discrete_sequence to prevent the Treemap from being black
     fig_treemap = px.treemap(
         df,
         path=[px.Constant("All Entities"), 'category', 'label', 'text'],
         values='score',
         color='category',
         title="Entity Distribution by Category and Label",
-        color_discrete_sequence=px.colors.qualitative.Dark24 # Use a robust color sequence
     )
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
     treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
@@ -332,28 +474,25 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     # 1c. Bar Chart (Category Count)
     fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
-    # FIX 2: Increased bottom margin from b=10 to b=100
     fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
     bar_category_html = fig_bar_category.to_html(full_html=False,include_plotlyjs='cdn')
     # 1d. Bar Chart (Most Frequent Entities)
     word_counts = df['text'].value_counts().reset_index()
     word_counts.columns = ['Entity', 'Count']
-    # Top 10 repeating entities
     repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
         fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
-        # FIX 2: Increased bottom margin from b=10 to b=100
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
         bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
-    # 1e. Network Graph HTML - UPDATED to pass text_input
     network_fig = generate_network_graph(df, text_input)
     network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
-    # 1f. Topic Charts HTML (Now a single Bubble Chart with Placeholder logic)
     topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
     if df_topic_data is not None and not df_topic_data.empty:
         bubble_figure = create_topic_word_bubbles(df_topic_data)
@@ -362,7 +501,6 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
         else:
             topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
     else:
-        # Placeholder for low data
         topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #FF69B4;">'
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
@@ -390,32 +528,11 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
         h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
         h3 {{ color: #555; margin-top: 20px; }}
         .metadata {{ background-color: #FFE4E1; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
-        /* The 'grid' class is kept for potential future use or the network graph, but not used for 3.2 */
-        .grid {{
-            display: grid;
-            grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
-            gap: 20px;
-            margin-top: 20px;
-        }}
-        .chart-box {{
-            background-color: #f9f9f9;
-            padding: 15px;
-            border-radius: 8px;
-            box-shadow: 0 2px 4px rgba(0,0,0,0.05);
-            /* Important: Set a minimum width for the chart box, and margin for stacking */
-            min-width: 0;
-            margin-bottom: 20px; /* NEW: Added margin for separation when stacked */
-        }}
         table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
         table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
         table th {{ background-color: #f0f0f0; }}
-        /* Specific styling for highlighted text element */
         .highlighted-text {{ border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
-        @media (max-width: 1050px) {{ /* Increased breakpoint to help prevent overlap */
-             .grid {{
-                 grid-template-columns: 1fr; /* Stack charts vertically on smaller screens */
-            }}
-        }}
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
@@ -438,7 +555,6 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
         <div class="chart-box">{treemap_html}</div>
         <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
-        <!-- FIX: Charts are now in separate chart-box divs (not a 'grid') for guaranteed vertical stacking -->
         <div class="chart-box">{pie_html}</div>
         <div class="chart-box">{bar_category_html}</div>
         <div class="chart-box">{bar_freq_html}</div>
@@ -453,6 +569,66 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     return html_content
 # --- Page Configuration and Styling (No Sidebar) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
 st.markdown(
@@ -492,7 +668,8 @@ st.subheader("NER and Topic Analysis Report Generator", divider="rainbow")
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
 expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
-**Results:** Results are compiled into a single, comprehensive **HTML report** for easy download and sharing.
 **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
@@ -502,22 +679,11 @@ COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
-# --- Label Definitions and Category Mapping ---
-labels = list(entity_color_map.keys())
-category_mapping = {
-    "People & Groups": ["person", "username", "hashtag", "mention", "community", "position", "nationality_religion"],
-    "Location & Organization": ["location", "organization"],
-    "Temporal & Events": ["event", "date"],
-    "Digital & Products": ["platform", "product", "media_type", "url"],
-}
-reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 # --- Model Loading ---
-@st.cache_resource
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
-        # Use nested_ner=True and num_gen_sequences=2 for potentially higher recall
         return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
@@ -553,14 +719,12 @@ if 'elapsed_time' not in st.session_state:
     st.session_state.elapsed_time = 0.0
 if 'topic_results' not in st.session_state:
     st.session_state.topic_results = None
-# FIX: Initialize the text area key with default text before st.text_area is called
 if 'my_text_area' not in st.session_state:
     st.session_state.my_text_area = DEFAULT_TEXT
 # --- Clear Button Function (MODIFIED) ---
 def clear_text():
     """Clears the text area (sets it to an empty string) and hides results."""
-    # MODIFIED: Set to empty string for true clearing
     st.session_state['my_text_area'] = ""
     st.session_state.show_results = False
     st.session_state.last_text = ""
@@ -570,7 +734,6 @@ def clear_text():
 # --- Text Input and Clear Button ---
 word_limit = 1000
-# The text area now safely uses the pre-initialized session state value
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
     height=250,
@@ -628,7 +791,7 @@ if st.button("Results"):
                 st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
             st.session_state.show_results = True
-# --- Display Download Link and Results (FIXED INDENTATION AND NEW LAYOUT) ---
 if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
@@ -642,7 +805,7 @@ if st.session_state.show_results:
         st.markdown("### 1. Analyzed Text with Highlighted Entities")
         st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
-        # 2. Entity Summary Table (Count by Label - kept outside tabs)
         st.markdown("### 2. Entity Summary Table (Count by Label)")
         grouped_entity_table = df['label'].value_counts().reset_index()
         grouped_entity_table.columns = ['Entity Label', 'Count']
@@ -650,80 +813,63 @@ if st.session_state.show_results:
         st.dataframe(grouped_entity_table[['Category', 'Entity Label', 'Count']], use_container_width=True)
         st.markdown("---")
         st.markdown("### 3. Detailed Entity Analysis")
-        # 3. New Tabs: Tab 1: Category Details Table | Tab 2: Treemap
         tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
-        # TAB 1: Detailed Entities Table Grouped by Category
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
-            # Get the unique categories for creating inner tabs
             unique_categories = list(category_mapping.keys())
-            # Create inner tabs dynamically based on the available categories
             tabs_category = st.tabs(unique_categories)
-            # We iterate over the categories and tabs simultaneously
             for category, tab in zip(unique_categories, tabs_category):
-                # Filter the main DataFrame for the current category
                 df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                 with tab:
                     st.markdown(f"##### {category} Entities ({len(df_category)} total)")
                     if not df_category.empty:
-                        # Display the DataFrame for the current category
                         st.dataframe(
                             df_category,
                             use_container_width=True,
-                            # Format the score for better readability
                             column_config={'score': st.column_config.NumberColumn(format="%.4f")}
                         )
                     else:
                         st.info(f"No entities of category **{category}** were found in the text.")
-        # TAB 2: Treemap
         with tab_treemap_viz:
             st.markdown("#### Treemap: Entity Distribution")
-            # Treemap
-            # FIX 1 (Streamlit): Added a robust color sequence here too for consistency in the Streamlit plot
             fig_treemap = px.treemap(
                 df,
                 path=[px.Constant("All Entities"), 'category', 'label', 'text'],
                 values='score',
                 color='category',
                 title="Entity Distribution by Category and Label",
-                color_discrete_sequence=px.colors.qualitative.Dark24 # Applied fix here
             )
             fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
             st.plotly_chart(fig_treemap, use_container_width=True)
-        # 4. Comparative Charts (Keep outside the new tabs, as in original code structure)
         st.markdown("---")
         st.markdown("### 4. Comparative Charts")
-        # FIX: The three comparative charts are generated here and will be stacked vertically
-        # in the HTML report output.
-        col1, col2, col3 = st.columns(3) # Use Streamlit columns for the *Streamlit* preview
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
-        # Pie Chart
-        with col1:
             fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
             fig_pie.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_pie, use_container_width=True)
-        # Bar Chart (Category Count)
-        with col2:
             fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
             fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_bar_category, use_container_width=True)
-        # Bar Chart (Most Frequent Entities)
-        word_counts = df['text'].value_counts().reset_index()
-        word_counts.columns = ['Entity', 'Count']
-        repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
-        with col3:
             if not repeating_entities.empty:
                 fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
                 fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
@@ -733,15 +879,12 @@ if st.session_state.show_results:
         st.markdown("---")
         st.markdown("### 5. Entity Co-occurrence Network")
-        # 5. Network Graph
         network_fig = generate_network_graph(df, st.session_state.last_text)
         st.plotly_chart(network_fig, use_container_width=True)
         st.markdown("---")
         st.markdown("### 6. Topic Modeling Analysis")
-        # 6. Topic Modeling Bubble Chart
         if df_topic_data is not None and not df_topic_data.empty:
             bubble_figure = create_topic_word_bubbles(df_topic_data)
             if bubble_figure:
@@ -753,14 +896,39 @@ if st.session_state.show_results:
         # --- Report Download ---
         st.markdown("---")
-        st.markdown("### Download Full HTML Report")
         html_report = generate_html_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data)
         st.download_button(
-            label="Download HTML Report",
             data=html_report,
             file_name="ner_topic_report.html",
             mime="text/html",
             type="primary"
         )

 import numpy as np
 import re
 import string
+import json
+# --- PPTX Imports (NEW) ---
+from io import BytesIO
+from pptx import Presentation
+from pptx.util import Inches, Pt
+from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
+import plotly.io as pio # Required for image export
+# ---------------------------
 # --- Stable Scikit-learn LDA Imports ---
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.decomposition import LatentDirichletAllocation
     "nationality_religion": "#fb7185"
 }
+# --- Label Definitions and Category Mapping (Used by the App and PPTX) ---
+labels = list(entity_color_map.keys())
+category_mapping = {
+    "People & Groups": ["person", "username", "hashtag", "mention", "community", "position", "nationality_religion"],
+    "Location & Organization": ["location", "organization"],
+    "Temporal & Events": ["event", "date"],
+    "Digital & Products": ["platform", "product", "media_type", "url"],
+}
+reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
+# --- Utility Functions for Analysis and Plotly ---
 def extract_label(node_name):
     """Extracts the label from a node string like 'Text (Label)'."""
     match = re.search(r'\(([^)]+)\)$', node_name)
     """
     Performs basic Topic Modeling using LDA on the extracted entities
     and returns structured data for visualization.
     """
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
         tfidf_vectorizer = TfidfVectorizer(
             max_df=0.95,
+            min_df=1,
+            stop_words='english'
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
 def create_topic_word_bubbles(df_topic_data):
     """Generates a Plotly Bubble Chart for top words across all topics."""
+    # Renaming columns to match the output of perform_topic_modeling
+    df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
+    df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         df_topic_data,
+        x='x_pos',
+        y='weight',
+        size='weight',
+        color='topic',
+        hover_name='word',
         size_max=80,
         title='Topic Word Weights (Bubble Chart)',
         color_discrete_sequence=px.colors.qualitative.Bold,
+        labels={
+            'x_pos': 'Entity/Word Index',
+            'weight': 'Word Weight',
+            'topic': 'Topic ID'
+        },
+        custom_data=['word', 'weight', 'topic']
     )
     fig.update_layout(
         xaxis_title="Entity/Word (Bubble size = Word Weight)",
+        yaxis_title="Word Weight",
         xaxis={'tickangle': -45, 'showgrid': False},
+        yaxis={'showgrid': True},
         showlegend=True,
         plot_bgcolor='#FFF0F5',
         paper_bgcolor='#FFF0F5',
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
+    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>', marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
     Generates a network graph visualization (Node Plot) with edges
+    based on entity co-occurrence in sentences. (Content omitted for brevity but assumed to be here).
     """
+    # Using the existing generate_network_graph logic from previous context...
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
     edges = set()
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         unique_entities_in_sentence = list(set(entities_in_sentence))
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
                 node2 = unique_entities_in_sentence[j]
                 edge_tuple = tuple(sorted((node1, node2)))
                 edges.add(edge_tuple)
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
             edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None])
             edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None])
     fig = go.Figure()
     edge_trace = go.Scatter(
         x=edge_x, y=edge_y,
         line=dict(width=0.5, color='#888'),
         hoverinfo='none',
         mode='lines',
         name='Co-occurrence Edges',
+        showlegend=False
     )
     fig.add_trace(edge_trace)
     fig.add_trace(go.Scatter(
         x=unique_entities['x'],
         y=unique_entities['y'],
         name='Entities',
         text=unique_entities['text'],
         textposition="top center",
         showlegend=False,
         marker=dict(
             size=unique_entities['frequency'] * 5 + 10,
         )
     ))
     legend_traces = []
     seen_labels = set()
     for index, row in unique_entities.iterrows():
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(
+                x=[None], y=[None], mode='markers', marker=dict(size=10, color=color), name=f"{label.capitalize()}", showlegend=True
             ))
     for trace in legend_traces:
         fig.add_trace(trace)
         title='Entity Co-occurrence Network (Edges = Same Sentence)',
         showlegend=True,
         hovermode='closest',
         xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]),
         yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]),
         plot_bgcolor='#f9f9f9',
     return fig
+# --- PPTX HELPER FUNCTIONS (Integrated from generate_report.py) ---
+def fig_to_image_buffer(fig):
     """
+    Converts a Plotly figure object into a BytesIO buffer containing PNG data.
+    Requires 'kaleido' to be installed for image export.
+    Returns None if export fails.
+    """
+    try:
+        # Use pio.to_image to convert the figure to a PNG byte array
+        img_bytes = pio.to_image(fig, format="png", width=900, height=500, scale=2)
+        img_buffer = BytesIO(img_bytes)
+        return img_buffer
+    except Exception as e:
+        # In a Streamlit environment, we can't show this error directly in the app execution flow
+        print(f"Error converting Plotly figure to image: {e}")
+        return None
+# --- PPTX GENERATION FUNCTION (Integrated and Adapted) ---
+def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_category_mapping):
+    """
+    Generates a PowerPoint presentation (.pptx) file containing key analysis results.
+    Returns the file content as a BytesIO buffer.
     """
+    prs = Presentation()
+    # Layout 5: Title and Content (often good for charts)
+    chart_layout = prs.slide_layouts[5]
+    # 1. Title Slide
+    title_slide_layout = prs.slide_layouts[0]
+    slide = prs.slides.add_slide(title_slide_layout)
+    title = slide.shapes.title
+    subtitle = slide.placeholders[1]
+    title.text = "NER & Topic Analysis Report"
+    subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
+    # 2. Source Text Slide
+    slide = prs.slides.add_slide(chart_layout)
+    slide.shapes.title.text = "Analyzed Source Text"
+    # Add the raw text to a text box
+    left = Inches(0.5)
+    top = Inches(1.5)
+    width = Inches(9.0)
+    height = Inches(5.0)
+    txBox = slide.shapes.add_textbox(left, top, width, height)
+    tf = txBox.text_frame
+    tf.margin_top = Inches(0.1)
+    tf.margin_bottom = Inches(0.1)
+    tf.word_wrap = True
+    p = tf.add_paragraph()
+    p.text = text_input
+    p.font.size = Pt(14)
+    p.font.name = 'Arial'
+    # 3. Entity Summary Slide (Table)
+    slide = prs.slides.add_slide(chart_layout)
+    slide.shapes.title.text = "Entity Summary (Count by Category and Label)"
+    # Create the summary table using the app's established logic
+    grouped_entity_table = df['label'].value_counts().reset_index()
+    grouped_entity_table.columns = ['Entity Label', 'Count']
+    grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(
+        lambda x: reverse_category_mapping.get(x, 'Other')
+    )
+    grouped_entity_table = grouped_entity_table[['Category', 'Entity Label', 'Count']]
+    # Simple way to insert a table:
+    rows, cols = grouped_entity_table.shape
+    x, y, cx, cy = Inches(1), Inches(1.5), Inches(8), Inches(4.5)
+    # Add 1 row for the header
+    table = slide.shapes.add_table(rows + 1, cols, x, y, cx, cy).table
+    # Set column widths
+    table.columns[0].width = Inches(2.7)
+    table.columns[1].width = Inches(2.8)
+    table.columns[2].width = Inches(2.5)
+    # Set column headers
+    for i, col in enumerate(grouped_entity_table.columns):
+        cell = table.cell(0, i)
+        cell.text = col
+        cell.fill.solid()
+        # Optional: Add simple styling to header
+    # Fill in the data
+    for i in range(rows):
+        for j in range(cols):
+            cell = table.cell(i+1, j)
+            cell.text = str(grouped_entity_table.iloc[i, j])
+            # Optional: Style data cells
+    # 4. Treemap Slide (Visualization)
+    fig_treemap = px.treemap(
+        df,
+        path=[px.Constant("All Entities"), 'category', 'label', 'text'],
+        values='score',
+        color='category',
+        title="Entity Distribution by Category and Label",
+        color_discrete_sequence=px.colors.qualitative.Dark24
+    )
+    fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
+    treemap_image = fig_to_image_buffer(fig_treemap)
+    if treemap_image:
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Entity Distribution Treemap"
+        slide.shapes.add_picture(treemap_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
+    # 5. Entity Count Bar Chart Slide (Visualization)
+    grouped_counts = df['category'].value_counts().reset_index()
+    grouped_counts.columns = ['Category', 'Count']
+    fig_bar_category = px.bar(
+        grouped_counts,
+        x='Category',
+        y='Count',
+        color='Category',
+        title='Total Entities per Category',
+        color_discrete_sequence=px.colors.qualitative.Pastel
+    )
+    fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
+    bar_category_image = fig_to_image_buffer(fig_bar_category)
+    if bar_category_image:
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Total Entities per Category"
+        slide.shapes.add_picture(bar_category_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
+    # 6. Topic Modeling Bubble Chart Slide
+    if df_topic_data is not None and not df_topic_data.empty:
+        # Ensure data frame is in the format expected by create_topic_word_bubbles
+        df_topic_data_pptx = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
+        bubble_figure = create_topic_word_bubbles(df_topic_data_pptx)
+        bubble_image = fig_to_image_buffer(bubble_figure)
+        if bubble_image:
+            slide = prs.slides.add_slide(chart_layout)
+            slide.shapes.title.text = "Topic Word Weights (Bubble Chart)"
+            slide.shapes.add_picture(bubble_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
+    else:
+        # Placeholder slide if topic modeling is not available
+        slide = prs.slides.add_slide(chart_layout)
+        slide.shapes.title.text = "Topic Modeling Results"
+        slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
+    # Save the presentation to an in-memory buffer
+    pptx_buffer = BytesIO()
+    prs.save(pptx_buffer)
+    pptx_buffer.seek(0)
+    return pptx_buffer
+# --- Existing App Functionality (HTML and JSON) ---
+def generate_html_report(df, text_input, elapsed_time, df_topic_data):
+    """
+    Generates a full HTML report containing all analysis results and visualizations.
+    (Content omitted for brevity but assumed to be here).
+    """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     fig_treemap = px.treemap(
         df,
         path=[px.Constant("All Entities"), 'category', 'label', 'text'],
         values='score',
         color='category',
         title="Entity Distribution by Category and Label",
+        color_discrete_sequence=px.colors.qualitative.Dark24
     )
     fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
     treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
     # 1c. Bar Chart (Category Count)
     fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
     fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
     bar_category_html = fig_bar_category.to_html(full_html=False,include_plotlyjs='cdn')
     # 1d. Bar Chart (Most Frequent Entities)
     word_counts = df['text'].value_counts().reset_index()
     word_counts.columns = ['Entity', 'Count']
     repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
         fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
         bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
+    # 1e. Network Graph HTML
     network_fig = generate_network_graph(df, text_input)
     network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
+    # 1f. Topic Charts HTML
     topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
     if df_topic_data is not None and not df_topic_data.empty:
         bubble_figure = create_topic_word_bubbles(df_topic_data)
         else:
             topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
     else:
         topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #FF69B4;">'
         topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
         topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
         h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
         h3 {{ color: #555; margin-top: 20px; }}
         .metadata {{ background-color: #FFE4E1; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
+        .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
         table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
         table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
         table th {{ background-color: #f0f0f0; }}
         .highlighted-text {{ border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
     </style></head><body>
     <div class="container">
         <h1>Entity and Topic Analysis Report</h1>
         <div class="chart-box">{treemap_html}</div>
         <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
         <div class="chart-box">{pie_html}</div>
         <div class="chart-box">{bar_category_html}</div>
         <div class="chart-box">{bar_freq_html}</div>
     """
     return html_content
+def generate_presentation_json(df, elapsed_time, df_topic_data):
+    """
+    Generates a structured dictionary of all analysis results suitable for
+    importing into a presentation tool, then serializes it to JSON.
+    """
+    if df.empty:
+        return {"error": "No entities found for presentation export."}
+    total_entities = len(df)
+    unique_entities = len(df['text'].unique())
+    category_counts = df['category'].value_counts()
+    top_categories = category_counts.head(3).to_dict()
+    summary_stats = {
+        "Total Entities Found": total_entities,
+        "Unique Entities Found": unique_entities,
+        "Top_3_Entity_Categories": top_categories
+    }
+    grouped_entity_table = category_counts.reset_index()
+    grouped_entity_table.columns = ['Category', 'Count']
+    word_counts = df['text'].value_counts().reset_index()
+    word_counts.columns = ['Entity', 'Count']
+    repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
+    topic_data = "Not enough unique data for topic modeling."
+    if df_topic_data is not None and not df_topic_data.empty:
+        topic_data = df_topic_data.to_dict('records')
+    presentation_data = {
+        "ReportTitle": "NER and Topic Analysis Presentation Data",
+        "GeneratedAt": time.strftime('%Y-%m-%d %H:%M:%S'),
+        "ProcessingTimeSeconds": f"{elapsed_time:.2f}",
+        "Slides": [
+            {
+                "SlideTitle": "1. Analysis Overview and Key Metrics",
+                "Metrics": summary_stats,
+                "Note": "This data can be used for the introductory slide."
+            },
+            {
+                "SlideTitle": "2. Entity Category Distribution (Chart Data)",
+                "Data": grouped_entity_table.to_dict('records'),
+                "Note": "Data for Pie Chart and Category Count Bar Chart."
+            },
+            {
+                "SlideTitle": "3. Most Frequent Entities (Top 10)",
+                "Data": repeating_entities.to_dict('records'),
+                "Note": "Data for the Top 10 Frequent Entities Bar Chart."
+            },
+            {
+                "SlideTitle": "4. Topic Modeling Results (Key Words)",
+                "Data": topic_data,
+                "Note": "Key entities and their weights per topic from LDA."
+            }
+        ]
+    }
+    return presentation_data
 # --- Page Configuration and Styling (No Sidebar) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
 st.markdown(
 st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
 expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
+**Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and `kaleido`.
+**Results:** Results are compiled into a single, comprehensive **HTML report** and a **PowerPoint (.pptx) file** for easy download and sharing.
 **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
 COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
+@st.cache_resourced
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
         return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
     except Exception as e:
         st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
     st.session_state.elapsed_time = 0.0
 if 'topic_results' not in st.session_state:
     st.session_state.topic_results = None
 if 'my_text_area' not in st.session_state:
     st.session_state.my_text_area = DEFAULT_TEXT
 # --- Clear Button Function (MODIFIED) ---
 def clear_text():
     """Clears the text area (sets it to an empty string) and hides results."""
     st.session_state['my_text_area'] = ""
     st.session_state.show_results = False
     st.session_state.last_text = ""
 # --- Text Input and Clear Button ---
 word_limit = 1000
 text = st.text_area(
     f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
     height=250,
                 st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
             st.session_state.show_results = True
+# --- Display Download Link and Results ---
 if st.session_state.show_results:
     df = st.session_state.results_df
     df_topic_data = st.session_state.topic_results
         st.markdown("### 1. Analyzed Text with Highlighted Entities")
         st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
+        # 2. Entity Summary Table
         st.markdown("### 2. Entity Summary Table (Count by Label)")
         grouped_entity_table = df['label'].value_counts().reset_index()
         grouped_entity_table.columns = ['Entity Label', 'Count']
         st.dataframe(grouped_entity_table[['Category', 'Entity Label', 'Count']], use_container_width=True)
         st.markdown("---")
+        # 3. Detailed Entity Analysis Tabs
         st.markdown("### 3. Detailed Entity Analysis")
         tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
         with tab_category_details:
             st.markdown("#### Detailed Entities Table (Grouped by Category)")
             unique_categories = list(category_mapping.keys())
             tabs_category = st.tabs(unique_categories)
             for category, tab in zip(unique_categories, tabs_category):
                 df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False)
                 with tab:
                     st.markdown(f"##### {category} Entities ({len(df_category)} total)")
                     if not df_category.empty:
                         st.dataframe(
                             df_category,
                             use_container_width=True,
                             column_config={'score': st.column_config.NumberColumn(format="%.4f")}
                         )
                     else:
                         st.info(f"No entities of category **{category}** were found in the text.")
         with tab_treemap_viz:
             st.markdown("#### Treemap: Entity Distribution")
             fig_treemap = px.treemap(
                 df,
                 path=[px.Constant("All Entities"), 'category', 'label', 'text'],
                 values='score',
                 color='category',
                 title="Entity Distribution by Category and Label",
+                color_discrete_sequence=px.colors.qualitative.Dark24
             )
             fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
             st.plotly_chart(fig_treemap, use_container_width=True)
+        # 4. Comparative Charts
         st.markdown("---")
         st.markdown("### 4. Comparative Charts")
+        col1, col2, col3 = st.columns(3)
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
+        with col1: # Pie Chart
             fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
             fig_pie.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_pie, use_container_width=True)
+        with col2: # Bar Chart (Category Count)
             fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
             fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
             st.plotly_chart(fig_bar_category, use_container_width=True)
+        with col3: # Bar Chart (Most Frequent Entities)
+            word_counts = df['text'].value_counts().reset_index()
+            word_counts.columns = ['Entity', 'Count']
+            repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
             if not repeating_entities.empty:
                 fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
                 fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=30, b=10, l=10, r=10), height=350)
         st.markdown("---")
         st.markdown("### 5. Entity Co-occurrence Network")
         network_fig = generate_network_graph(df, st.session_state.last_text)
         st.plotly_chart(network_fig, use_container_width=True)
         st.markdown("---")
         st.markdown("### 6. Topic Modeling Analysis")
         if df_topic_data is not None and not df_topic_data.empty:
             bubble_figure = create_topic_word_bubbles(df_topic_data)
             if bubble_figure:
         # --- Report Download ---
         st.markdown("---")
+        st.markdown("### Download Full Report Artifacts")
+        # 1. HTML Report Download
         html_report = generate_html_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data)
         st.download_button(
+            label="Download Comprehensive HTML Report",
             data=html_report,
             file_name="ner_topic_report.html",
             mime="text/html",
             type="primary"
         )
+        # 2. PowerPoint PPTX Download (NEW)
+        pptx_buffer = generate_pptx_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data, reverse_category_mapping)
+        st.download_button(
+            label="Download Presentation Slides (.pptx)",
+            data=pptx_buffer,
+            file_name="ner_topic_report.pptx",
+            mime="application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            type="primary"
+        )
+        # 3. Presentation JSON Data Download
+        presentation_data = generate_presentation_json(df, st.session_state.elapsed_time, df_topic_data)
+        presentation_json_data = json.dumps(presentation_data, indent=4)
+        st.download_button(
+            label="Download Presentation Data (JSON)",
+            data=presentation_json_data,
+            file_name="ner_presentation_data.json",
+            mime="application/json",
+            type="secondary"
+        )