Spaces:

AIEcosystem
/

relationship-map

Sleeping

App Files Files Community

AIEcosystem commited on Oct 8, 2025

Commit

e64256b

verified ·

1 Parent(s): f18fcc7

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +57 -832

src/streamlit_app.py CHANGED Viewed

@@ -1,841 +1,66 @@
-import os
-os.environ['HF_HOME'] = '/tmp'
-import time
 import streamlit as st
-import streamlit.components.v1 as components
-import pandas as pd
-import io
 import plotly.express as px
-import plotly.graph_objects as go
-import numpy as np
-import re
-import string
-import json
-# --- PPTX Imports ---
 from io import BytesIO
 from pptx import Presentation
-from pptx.util import Inches, Pt
-from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
-import plotly.io as pio # Required for image export (needs kaleido installed)
-# ---------------------------
-# --- Stable Scikit-learn LDA Imports ---
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.decomposition import LatentDirichletAllocation
-# ------------------------------
-from gliner import GLiNER
-from streamlit_extras.stylable_container import stylable_container
-# Using a try/except for comet_ml import
-try:
-    from comet_ml import Experiment
-except ImportError:
-    class Experiment:
-        def __init__(self, **kwargs): pass
-        def log_parameter(self, *args): pass
-        def log_table(self, *args): pass
-        def end(self): pass
-# --- Model Home Directory (Fix for deployment environments) ---
-# Set HF_HOME environment variable to a writable path
-os.environ['HF_HOME'] = '/tmp'
-# --- Color Map for Highlighting and Network Graph Nodes (Monochrome Palette) ---
-entity_color_map = {
-    "person": "#444444",         # Dark Gray
-    "username": "#666666",       # Medium-Dark Gray
-    "hashtag": "#888888",        # Medium Gray
-    "mention" : "#aaaaaa",       # Medium-Light Gray
-    "organization": "#333333",   # Very Dark Gray
-    "community": "#bbbbbb",      # Light Gray
-    "position": "#555555",       # Slightly Dark Gray
-    "location": "#777777",       # Neutral Gray
-    "event": "#999999",          # Silver
-    "product": "#cccccc",        # Light Gray/Silver
-    "platform": "#222222",       # Black-ish
-    "date": "#dddddd",           # Very Light Gray
-    "media_type": "#333333",     # Very Dark Gray
-    "url": "#666666",            # Medium-Dark Gray
-    "nationality_religion": "#aaaaaa" # Medium-Light Gray
-}
-# --- Label Definitions and Category Mapping (Used by the App and PPTX) ---
-labels = list(entity_color_map.keys())
-category_mapping = {
-    "People & Groups": ["person", "username", "hashtag", "mention", "community", "position", "nationality_religion"],
-    "Location & Organization": ["location", "organization"],
-    "Temporal & Events": ["event", "date"],
-    "Digital & Products": ["platform", "product", "media_type", "url"],
-}
-# FIX: Corrected the dictionary comprehension to avoid redundant iteration variable (preventing UnboundLocalError)
-reverse_category_mapping = {label: category
-for category, label_list in category_mapping.items() for label in label_list}
-# --- Utility Functions for Analysis and Plotly ---
-def extract_label(node_name):
-    """Extracts the label from a node string like 'Text (Label)'."""
-    match = re.search(r'\(([^)]+)\)$', node_name)
-    return match.group(1) if match else "Unknown"
-def remove_trailing_punctuation(text_string):
-    """Removes trailing punctuation from a string."""
-    return text_string.rstrip(string.punctuation)
-def highlight_entities(text, df_entities):
-    """Generates HTML to display text with entities highlighted and colored."""
-    if df_entities.empty:
-        return text
-    # Sort entities by start index descending to insert highlights without affecting subsequent indices
-    entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
-    highlighted_text = text
-    for entity in entities:
-        start = entity['start']
-        end = entity['end']
-        label = entity['label']
-        entity_text = entity['text']
-        # Use monochrome map
-        color = entity_color_map.get(label, '#000000')
-        # Create a span with background color and tooltip
-        highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
-        # Replace the original text segment with the highlighted HTML
-        highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
-    # Use a div to mimic the Streamlit input box style for the report - now in monochrome
-    return f'<div style="border: 1px solid #AAAAAA; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
-def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
-    """
-    Performs basic Topic Modeling using LDA on the extracted entities
-    and returns structured data for visualization.
-    """
-    documents = df_entities['text'].unique().tolist()
-    if len(documents) < 2:
-        return None
-    N = min(num_top_words, len(documents))
-    try:
-        tfidf_vectorizer = TfidfVectorizer(
-            max_df=0.95,
-            min_df=1,
-            stop_words='english'
-        )
-        tfidf = tfidf_vectorizer.fit_transform(documents)
-        tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
-        lda = LatentDirichletAllocation(
-            n_components=num_topics, max_iter=5, learning_method='online',random_state=42, n_jobs=-1
-        )
-        lda.fit(tfidf)
-        topic_data_list = []
-        for topic_idx, topic in enumerate(lda.components_):
-            top_words_indices = topic.argsort()[:-N - 1:-1]
-            top_words = [tfidf_feature_names[i] for i in top_words_indices]
-            word_weights = [topic[i] for i in top_words_indices]
-            for word, weight in zip(top_words, word_weights):
-                topic_data_list.append({
-                    'Topic_ID': f'Topic #{topic_idx + 1}',
-                    'Word': word,
-                    'Weight': weight,
-                })
-        return pd.DataFrame(topic_data_list)
-    except Exception as e:
-        st.error(f"Topic modeling failed: {e}")
-        return None
-def create_topic_word_bubbles(df_topic_data):
-    """Generates a Plotly Bubble Chart for top words across all topics."""
-    # Renaming columns to match the output of perform_topic_modeling
-    df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
-    df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
-    if df_topic_data.empty:
-        return None
-    fig = px.scatter(
-        df_topic_data,
-        x='x_pos',
-        y='weight',
-        size='weight',
-        color='topic',
-        hover_name='word',
-        size_max=80,
-        title='Topic Word Weights (Bubble Chart)',
-        color_discrete_sequence=px.colors.sequential.Greys, # Using grayscale palette
-        labels={
-            'x_pos': 'Entity/Word Index',
-            'weight': 'Word Weight',
-            'topic': 'Topic ID'
-        },
-        custom_data=['word', 'weight', 'topic']
-    )
-    fig.update_layout(
-        xaxis_title="Entity/Word (Bubble size = Word Weight)",
-        yaxis_title="Word Weight",
-        xaxis={'tickangle': -45, 'showgrid': False},
-        yaxis={'showgrid': True},
-        showlegend=True,
-        plot_bgcolor='#f9f9f9', # Neutral background
-        paper_bgcolor='#f9f9f9', # Neutral background
-        height=600,
-        margin=dict(t=50, b=100, l=50, r=10),
-    )
-    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',
-marker=dict(line=dict(width=1, color='DarkSlateGrey')))
-    return fig
-def generate_network_graph(df, raw_text):
-    """
-    Generates a network graph visualization (Node Plot) with edges
-    based on entity co-occurrence in sentences.
-    """
-    entity_counts = df['text'].value_counts().reset_index()
-    entity_counts.columns = ['text', 'frequency']
-    unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
-    if unique_entities.shape[0] < 2:
-        return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
-    num_nodes = len(unique_entities)
-    thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
-    radius = 10
-    unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
-    unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
-    pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
-    edges = set()
-    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
-    for sentence in sentences:
-        entities_in_sentence = []
-        for entity_text in unique_entities['text'].unique():
-            if entity_text.lower() in sentence.lower():
-                entities_in_sentence.append(entity_text)
-        unique_entities_in_sentence = list(set(entities_in_sentence))
-        for i in range(len(unique_entities_in_sentence)):
-            for j in range(i + 1, len(unique_entities_in_sentence)):
-                node1 = unique_entities_in_sentence[i]
-                node2 = unique_entities_in_sentence[j]
-                edge_tuple = tuple(sorted((node1, node2)))
-                edges.add(edge_tuple)
-    edge_x = []
-    edge_y = []
-    for edge in edges:
-        n1, n2 = edge
-        if n1 in pos_map and n2 in pos_map:
-            edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None])
-            edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None])
-    fig = go.Figure()
-    edge_trace = go.Scatter(
-        x=edge_x, y=edge_y,
-        line=dict(width=0.5, color='#888'),
-        hoverinfo='none',
-        mode='lines',
-        name='Co-occurrence Edges',
-        showlegend=False
-    )
-    fig.add_trace(edge_trace)
-    fig.add_trace(go.Scatter(
-        x=unique_entities['x'],
-        y=unique_entities['y'],
-        mode='markers+text',
-        name='Entities',
-        text=unique_entities['text'],
-        textposition="top center",
-        showlegend=False,
-        marker=dict(
-            size=unique_entities['frequency'] * 5 + 10,
-            color=[entity_color_map.get(label, '#cccccc') for label in unique_entities['label']], # Use monochrome map
-            line_width=1,
-            line_color='black',
-            opacity=0.9
-        ),
-        textfont=dict(size=10),
-        customdata=unique_entities[['label', 'score', 'frequency']],
-        hovertemplate=(
-            "<b>%{text}</b><br>" +
-            "Label: %{customdata[0]}<br>" +
-            "Score: %{customdata[1]:.2f}<br>" +
-            "Frequency: %{customdata[2]}<extra></extra>"
-        )
-    ))
-    legend_traces = []
-    seen_labels = set()
-    for index, row in unique_entities.iterrows():
-        label = row['label'] # 'label' is defined here
-        if label not in seen_labels:
-            seen_labels.add(label)
-            color = entity_color_map.get(label, '#cccccc')
-            legend_traces.append(go.Scatter(
-                x=[None], y=[None], mode='markers', marker=dict(size=10, color=color), name=f"{label.capitalize()}", showlegend=True
-            ))
-    for trace in legend_traces:
-        fig.add_trace(trace)
-    fig.update_layout(
-        title='Entity Co-occurrence Network (Edges = Same Sentence)',
-        showlegend=True,
-        hovermode='closest',
-        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]),
-        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]),
-        plot_bgcolor='#f9f9f9',
-        paper_bgcolor='#f9f9f9',
-        margin=dict(t=50, b=10, l=10, r=10),
-        height=600
-    )
-    return fig
-# --- PPTX HELPER FUNCTIONS ---
-def fig_to_image_buffer(fig):
-    """
-    Converts a Plotly figure object into a BytesIO buffer containing PNG data.
-    Requires 'kaleido' to be installed for image export.
-    Returns None if export fails.
-    """
-    try:
-        # Use pio.to_image to convert the figure to a PNG byte array
-        img_bytes = pio.to_image(fig, format="png", width=900, height=500, scale=2)
-        img_buffer = BytesIO(img_bytes)
-        return img_buffer
-    except Exception as e:
-        # Changed the error message to be more explicit about the Kaleido dependency issue
-        print(f"Plotly image export failed (Kaleido dependency error): {e}. This means the PPTX will contain placeholder slides where charts should be.")
-        return None
-# --- PPTX GENERATION FUNCTION ---
-def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_category_mapping):
-    """
-    Generates a PowerPoint presentation (.pptx) file containing key analysis results.
-    Returns the file content as a BytesIO buffer.
-    """
     prs = Presentation()
-    # Layout 5: Title and Content (often good for charts)
-    chart_layout = prs.slide_layouts[5]
-    # 1. Title Slide (Layout 0)
-    title_slide_layout = prs.slide_layouts[0]
-    slide = prs.slides.add_slide(title_slide_layout)
-    slide.shapes.title.text = "NER & Topic Analysis Report"
-    # FIX: Add safety check for placeholder index 1 (subtitle)
-    if len(slide.placeholders) > 1:
-        subtitle = slide.placeholders[1]
-        subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
-    # End FIX
-    # 2. Source Text Slide
-    slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Analyzed Source Text"
-    # Add the raw text to a text box
-    left = Inches(0.5)
     top = Inches(1.5)
-    width = Inches(9.0)
-    height = Inches(5.0)
-    txBox = slide.shapes.add_textbox(left, top, width, height)
-    tf = txBox.text_frame
-    tf.margin_top = Inches(0.1)
-    tf.margin_bottom = Inches(0.1)
-    tf.word_wrap = True
-    p = tf.add_paragraph()
-    p.text = text_input
-    p.font.size = Pt(14)
-    p.font.name = 'Arial'
-    # 3. Entity Summary Slide (Table)
-    slide = prs.slides.add_slide(chart_layout)
-    slide.shapes.title.text = "Entity Summary (Count by Category and Label)"
-    # Create the summary table using the app's established logic
-    grouped_entity_table = df['label'].value_counts().reset_index()
-    grouped_entity_table.columns = ['Entity Label', 'Count']
-    grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(
-        lambda x: reverse_category_mapping.get(x, 'Other')
-    )
-    grouped_entity_table = grouped_entity_table[['Category', 'Entity Label', 'Count']]
-    # Simple way to insert a table:
-    rows, cols = grouped_entity_table.shape
-    x, y, cx, cy = Inches(1), Inches(1.5), Inches(8), Inches(4.5)
-    # Add 1 row for the header
-    table = slide.shapes.add_table(rows + 1, cols, x, y, cx, cy).table
-    # Set column widths
-    table.columns[0].width = Inches(2.7)
-    table.columns[1].width = Inches(2.8)
-    table.columns[2].width = Inches(2.5)
-    # Set column headers
-    for i, col in enumerate(grouped_entity_table.columns):
-        cell = table.cell(0, i)
-        cell.text = col
-        cell.fill.solid()
-        # Optional: Add simple styling to header
-    # 4. Treemap Slide (Visualization)
-    fig_treemap = px.treemap(
-        df,
-        path=[px.Constant("All Entities"), 'category', 'label', 'text'],
-        values='score',
-        color='category',
-        title="Entity Distribution by Category and Label",
-        color_discrete_sequence=px.colors.sequential.Greys # Monochrome palette
-    )
-    fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
-    treemap_image = fig_to_image_buffer(fig_treemap)
-    if treemap_image:
-        slide = prs.slides.add_slide(chart_layout)
-        slide.shapes.title.text = "Entity Distribution Treemap"
-        slide.shapes.add_picture(treemap_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
-    else:
-        # Placeholder if image conversion failed (e.g., Kaleido issue)
-        slide = prs.slides.add_slide(chart_layout)
-        slide.shapes.title.text = "Entity Distribution Treemap (Chart Failed)"
-        # FIX: Safety check for placeholder index 1
-        if len(slide.placeholders) > 1:
-            slide.placeholders[1].text = "Chart generation failed, likely due to a missing 'kaleido' dependency for static image export."
-    # 5. Entity Count Bar Chart Slide (Visualization)
-    grouped_counts = df['category'].value_counts().reset_index()
-    grouped_counts.columns = ['Category', 'Count']
-    fig_bar_category = px.bar(
-        grouped_counts,
-        x='Category',
-        y='Count',
-        color='Category',
-        title='Total Entities per Category',
-        color_discrete_sequence=px.colors.sequential.Greys # Monochrome palette
-    )
-    fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
-    bar_category_image = fig_to_image_buffer(fig_bar_category)
-    if bar_category_image:
-        slide = prs.slides.add_slide(chart_layout)
-        slide.shapes.title.text = "Total Entities per Category"
-        slide.shapes.add_picture(bar_category_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
-    else:
-        slide = prs.slides.add_slide(chart_layout)
-        slide.shapes.title.text = "Total Entities per Category (Chart Failed)"
-        # FIX: Safety check for placeholder index 1
-        if len(slide.placeholders) > 1:
-            slide.placeholders[1].text = "Chart generation failed, likely due to a missing 'kaleido' dependency for static image export."
-    # 6. Topic Modeling Bubble Chart Slide
-    if df_topic_data is not None and not df_topic_data.empty:
-        # Ensure data frame is in the format expected by create_topic_word_bubbles
-        df_topic_data_pptx = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
-        bubble_figure = create_topic_word_bubbles(df_topic_data_pptx)
-        bubble_image = fig_to_image_buffer(bubble_figure)
-        if bubble_image:
-            slide = prs.slides.add_slide(chart_layout)
-            slide.shapes.title.text = "Topic Word Weights (Bubble Chart)"
-            slide.shapes.add_picture(bubble_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
-        else:
-            slide = prs.slides.add_slide(chart_layout)
-            slide.shapes.title.text = "Topic Word Weights (Chart Failed)"
-            # FIX: Safety check for placeholder index 1
-            if len(slide.placeholders) > 1:
-                slide.placeholders[1].text = "Chart generation failed, likely due to a missing 'kaleido' dependency for static image export."
-    else:
-        # Placeholder slide if topic modeling is not available
-        slide = prs.slides.add_slide(chart_layout)
-        slide.shapes.title.text = "Topic Modeling Results"
-        # FIX: Safety check for placeholder index 1
-        if len(slide.placeholders) > 1:
-            slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
-    # Save the presentation to an in-memory buffer
-    pptx_buffer = BytesIO()
-    prs.save(pptx_buffer)
-    pptx_buffer.seek(0)
-    return pptx_buffer
-# --- NEW CSV GENERATION FUNCTION ---
-def generate_entity_csv(df):
-    """
-    Generates a CSV file of the extracted entities in an in-memory buffer,
-    including text, label, category, score, start, and end indices.
-    """
-    csv_buffer = BytesIO()
-    # Select desired columns and write to buffer
-    df_export = df[['text', 'label', 'category', 'score', 'start', 'end']]
-    csv_buffer.write(df_export.to_csv(index=False).encode('utf-8'))
-    csv_buffer.seek(0)
-    return csv_buffer
-# -----------------------------------
-# --- Existing App Functionality (HTML) ---
-def generate_html_report(df, text_input, elapsed_time, df_topic_data):
-    """
-    Generates a full HTML report containing all analysis results and visualizations.
-    """
-    # 1. Generate Visualizations (Plotly HTML)
-    # 1a. Treemap
-    fig_treemap = px.treemap(
-        df,
-        path=[px.Constant("All Entities"), 'category', 'label', 'text'],
-        values='score',
-        color='category',
-        title="Entity Distribution by Category and Label",
-        color_discrete_sequence=px.colors.sequential.Greys # Monochrome palette
-    )
-    fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
-    treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
-    # 1b. Pie Chart
-    grouped_counts = df['category'].value_counts().reset_index()
-    grouped_counts.columns = ['Category', 'Count']
-    fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.Greys) # Monochrome palette
-    fig_pie.update_layout(margin=dict(t=50, b=10))
-    pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
-    # 1c. Bar Chart (Category Count)
-    fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.sequential.Greys) # Monochrome palette
-    fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
-    bar_category_html = fig_bar_category.to_html(full_html=False,include_plotlyjs='cdn')
-    # 1d. Bar Chart (Most Frequent Entities)
-    word_counts = df['text'].value_counts().reset_index()
-    word_counts.columns = ['Entity', 'Count']
-    repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
-    bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
-    if not repeating_entities.empty:
-        fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Greys) # Monochrome palette
-        fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
-        bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
-    # 1e. Network Graph HTML
-    network_fig = generate_network_graph(df, text_input)
-    network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
-    # 1f. Topic Charts HTML
-    topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
-    if df_topic_data is not None and not df_topic_data.empty:
-        bubble_figure = create_topic_word_bubbles(df_topic_data)
-        if bubble_figure:
-            topic_charts_html += f'<div class="chart-box">{bubble_figure.to_html(full_html=False, include_plotlyjs="cdn")}</div>'
-        else:
-            topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
-    else:
-        topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #AAAAAA;">'
-        topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
-        topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
-        topic_charts_html += '</div>'
-    # 2. Get Highlighted Text
-    # The div style is now monochrome/neutral (border: #AAAAAA, background: #FFFFFF)
-    highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
-    # 3. Entity Tables (Pandas to HTML)
-    entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
-        classes='table table-striped',
-        index=False
-    )
-    # 4. Construct the Final HTML
-    # Updated CSS to remove all color/pink references
-    html_content = f"""<!DOCTYPE html><html lang="en"><head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Entity and Topic Analysis Report</title>
-    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
-    <style>
-        body {{ font-family: 'Inter', sans-serif; margin: 0; padding: 20px; background-color: #f4f4f4; color: #333; }}
-        .container {{ max-width: 1200px; margin: 0 auto; background-color: #ffffff; padding: 30px; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }}
-        h1 {{ color: #333333; border-bottom: 3px solid #666666; padding-bottom: 10px; margin-top: 0; }}
-        h2 {{ color: #555555; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
-        h3 {{ color: #555; margin-top: 20px; }}
-        .metadata {{ background-color: #eeeeee; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
-        .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
-        table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
-        table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
-        table th {{ background-color: #f0f0f0; }}
-        .highlighted-text {{ border: 1px solid #AAAAAA; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
-    </style></head><body>
-    <div class="container">
-        <h1>Entity and Topic Analysis Report</h1>
-        <div class="metadata">
-            <p><strong>Generated At:</strong> {time.strftime('%Y-%m-%d %H:%M:%S')}</p>
-            <p><strong>Processing Time:</strong> {elapsed_time:.2f} seconds</p>
-        </div>
-        <h2>1. Analyzed Text & Extracted Entities</h2>
-        <h3>Original Text with Highlighted Entities</h3>
-        <div class="highlighted-text-container">
-            {highlighted_text_html}
-        </div>
-        <h2>2. Full Extracted Entities Table</h2>
-        {entity_table_html}
-        <h2>3. Data Visualizations</h2>
-        <h3>3.1 Entity Distribution Treemap</h3>
-        <div class="chart-box">{treemap_html}</div>
-        <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
-        <div class="chart-box">{pie_html}</div>
-        <div class="chart-box">{bar_category_html}</div>
-        <div class="chart-box">{bar_freq_html}</div>
-        <h3>3.3 Entity Co-occurrence Network (Edges = Same Sentence)</h3>
-        <div class="chart-box">{network_html}</div>
-        <h2>4. Topic Modeling (LDA on Entities)</h2>
-        {topic_charts_html}
-    </div></body></html>
-    """
-    return html_content
-# --- Page Configuration and Styling (No Sidebar) ---
-st.set_page_config(layout="wide", page_title="NER & Topic Report App")
-st.markdown(
-    """
-    <style>
-    /* Overall app container - NO SIDEBAR */
-    .main {
-        background-color: #F8F8F8; /* Near White/Lightest Gray */
-        color: #333333; /* Dark grey text for contrast */
-    }
-    .stApp {
-        background-color: #F8F8F8;
-    }
-    /* Text Area background and text color (input fields) */
-    .stTextArea textarea {
-        background-color: #FFFFFF; /* Pure White for input fields */
-        color: #000000; /* Black text for input */
-        border: 1px solid #AAAAAA; /* Gray border */
-    }
-    /* Button styling */
-    .stButton > button {
-        background-color: #666666; /* Medium Gray for the button */
-        color: #FFFFFF; /* White text for contrast */
-        border: none;
-        padding: 10px 20px;
-        border-radius: 5px;
-        transition: background-color 0.3s;
-    }
-    .stButton > button:hover {
-        background-color: #444444; /* Darker Gray on hover */
-    }
-    /* Expander header and content background */
-    .streamlit-expanderHeader, .streamlit-expanderContent {
-        background-color: #EEEEEE; /* Very Light Gray */
-        color: #333333;
-    }
-    </style>
-    """,
-    unsafe_allow_html=True)
-st.subheader("NER and Topic Analysis Report Generator", divider="gray") # Divider is now gray
-st.link_button("by nlpblogs", "https://nlpblogs.com", type="secondary")
-expander = st.expander("**Important notes**")
-expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
-**Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and **`kaleido`**. If charts in the PPTX are blank, please check your environment's $\text{kaleido}$ installation/permissions.
-**Results:** Results are compiled into a single, comprehensive **HTML report**, a **PowerPoint (.pptx) file**, and a **CSV file** for easy download and sharing.
-**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
-st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
-# --- Comet ML Setup (Placeholder/Conditional) ---
-COMET_API_KEY = os.environ.get("COMET_API_KEY")
-COMET_WORKSPACE = os.environ.get("COMET_WORKSPACE")
-COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
-comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
-# --- Model Loading ---
-@st.cache_resource
-def load_ner_model():
-    """Loads the GLiNER model and caches it."""
-    try:
-        return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels)
-    except Exception as e:
-        st.error(f"Failed to load NER model. Please check your internet connection or model availability: {e}")
-        st.stop()
-model = load_ner_model()
-# --- LONG DEFAULT TEXT (178 Words) ---
-DEFAULT_TEXT = (
-    "In June 2024, the founder, Dr. Emily Carter, officially announced a new, expansive partnership between "
-    "TechSolutions Inc. and the European Space Agency (ESA). This strategic alliance represents a significant "
-    "leap forward for commercial space technology across the entire European Union. The agreement, finalized "
-    "on Monday in Paris, France, focuses specifically on jointly developing the next generation of the 'Astra' "
-    "software platform. This platform is critical for processing and managing the vast amounts of data being sent "
-    "back from the recent Mars rover mission. The core team, including lead engineer Marcus Davies, will hold "
-    "their first collaborative workshop in Berlin, Germany, on August 15th. The community response on social "
-    "media platform X (under the username @TechSolutionsCEO) was overwhelmingly positive, with many major tech "
-    "publications, including Wired Magazine, predicting a major impact on the space technology industry by the "
-    "end of the year. The platform is designed to be compatible with both Windows and Linux operating systems. "
-    "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
-    "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
-    "general public by October 1st. The goal is to deploy the Astra v2 platform before the next solar eclipse event in 2026."
-)
-# -----------------------------------
-# --- Session State Initialization (CRITICAL FIX) ---
-if 'show_results' not in st.session_state:
-    st.session_state.show_results = False
-if 'last_text' not in st.session_state:
-    st.session_state.last_text = ""
-if 'results_df' not in st.session_state:
-    st.session_state.results_df = pd.DataFrame()
-if 'elapsed_time' not in st.session_state:
-    st.session_state.elapsed_time = 0.0
-if 'topic_results' not in st.session_state:
-    st.session_state.topic_results = None
-if 'my_text_area' not in st.session_state:
-    st.session_state.my_text_area = DEFAULT_TEXT
-# --- Clear Button Function (MODIFIED) ---
-def clear_text():
-    """Clears the text area (sets it to an empty string) and hides results."""
-    st.session_state['my_text_area'] = ""
-    st.session_state.show_results = False
-    st.session_state.last_text = ""
-    st.session_state.results_df = pd.DataFrame()
-    st.session_state.elapsed_time = 0.0
-    st.session_state.topic_results = None
-# --- Text Input and Clear Button ---
-word_limit = 1000
-text = st.text_area(
-    f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter",
-    height=250,
-    key='my_text_area',
-    value=st.session_state.my_text_area)
-word_count = len(text.split())
-st.markdown(f"**Word count:** {word_count}/{word_limit}")
-st.button("Clear text", on_click=clear_text)
-# --- Results Trigger and Processing (Updated Logic) ---
-if st.button("Results"):
-    if not text.strip():
-        st.warning("Please enter some text to extract entities.")
-        st.session_state.show_results = False
-    elif word_count > word_limit:
-        st.warning(f"Your text exceeds the {word_limit} word limit. Please shorten it to continue.")
-        st.session_state.show_results = False
-    else:
-        with st.spinner("Extracting entities and generating report data...", show_time=True):
-            if text != st.session_state.last_text:
-                st.session_state.last_text = text
-                start_time = time.time()
-                # --- Model Prediction & Dataframe Creation ---
-                entities = model.predict_entities(text, labels)
-                df = pd.DataFrame(entities)
-                if not df.empty:
-                    df['text'] = df['text'].apply(remove_trailing_punctuation)
-                    df['category'] = df['label'].map(reverse_category_mapping)
-                    st.session_state.results_df = df
-                    unique_entity_count = len(df['text'].unique())
-                    N_TOP_WORDS_TO_USE = min(10, unique_entity_count)
-                    st.session_state.topic_results = perform_topic_modeling(
-                        df,
-                        num_topics=2,
-                        num_top_words=N_TOP_WORDS_TO_USE
-                    )
-                    if comet_initialized:
-                        experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
-                        experiment.log_parameter("input_text", text)
-                        experiment.log_table("predicted_entities", df)
-                        experiment.end()
-                else:
-                    st.session_state.results_df = pd.DataFrame()
-                    st.session_state.topic_results = None
-                end_time = time.time()
-                st.session_state.elapsed_time = end_time - start_time
-                st.info(f"Report data generated in **{st.session_state.elapsed_time:.2f} seconds**.")
-            st.session_state.show_results = True
-# --- Display Download Link and Results (The missing logic that was completed) ---
-if st.session_state.show_results:
-    df = st.session_state.results_df
-    if df.empty:
-        st.error("No entities were extracted from the text. The report cannot be generated.")
-    else:
-        # --- Generate All Report Files/Buffers ---
-        with st.spinner("Generating Report Files (HTML, PPTX, CSV)..."):
-            # 1. HTML Report Generation
-            html_report_content = generate_html_report(
-                df,
-                st.session_state.last_text,
-                st.session_state.elapsed_time,
-                st.session_state.topic_results
-            )
-            # 2. PPTX Report Generation
-            pptx_buffer = generate_pptx_report(
-                df,
-                st.session_state.last_text,
-                st.session_state.elapsed_time,
-                st.session_state.topic_results,
-                reverse_category_mapping
-            )
-            # 3. CSV Report Generation
-            csv_buffer = generate_entity_csv(df)
-        # --- Display Downloads and Preview ---
-        st.markdown("## Download Analysis Reports", anchor=False)
-        st.markdown("---")
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            st.download_button(
-                label="Download HTML Report 🌐",
-                data=html_report_content,
-                file_name="entity_topic_report.html",
-                mime="text/html",
-                help="A full, interactive report with all charts."
-            )
-        with col2:
-            st.download_button(
-                label="Download PowerPoint (.pptx) 📊",
-                data=pptx_buffer,
-                file_name="entity_topic_slides.pptx",
-                mime="application/vnd.openxmlformats-officedocument.presentationml.presentation",
-                help="A summary presentation with static charts."
-            )
-        with col3:
-            st.download_button(
-                label="Download Raw Entities (.csv) 📋",
-                data=csv_buffer,
-                file_name="extracted_entities.csv",
-                mime="text/csv",
-                help="Raw data table of all extracted entities."
-            )
-        st.markdown("---")
-        # --- Display Interactive Preview ---
-        st.markdown("## Interactive HTML Report Preview", anchor=False)
-        st.info("Scroll within the box below to see the complete report and interactive charts.")
-        # Display the HTML report using the Streamlit component
-        components.html(
-            html_report_content,
-            height=800,
-            scrolling=True
-        )

 import streamlit as st
 import plotly.express as px
+import pandas as pd
 from io import BytesIO
 from pptx import Presentation
+from pptx.util import Inches
+# Sample data and Plotly graph
+df = pd.DataFrame({'Category': ['A', 'B', 'C'], 'Value': [10, 20, 30]})
+fig = px.bar(df, x='Category', y='Value', title='Sample Plotly Bar Chart')
+# Convert Plotly figure to image
+img_buffer = BytesIO()
+fig.write_image(img_buffer, format='png', width=800, height=400)
+img_buffer.seek(0)
+img_data = img_buffer.getvalue()
+# Function to create PPTX
+def create_presentation():
     prs = Presentation()
+    # Title slide
+    slide = prs.slides.add_slide(prs.slide_layouts[0])
+    title = slide.shapes.title
+    title.text = "Streamlit Plotly Export"
+    # Slide with Plotly image and table
+    slide = prs.slides.add_slide(prs.slide_layouts[1])
+    title = slide.shapes.title
+    title.text = "Plotly Chart and Data"
+    # Add Plotly image
+    left = Inches(1)
     top = Inches(1.5)
+    slide.shapes.add_picture(BytesIO(img_data), left, top, width=Inches(6))
+    # Add table
+    rows, cols = df.shape
+    left = Inches(1)
+    top = Inches(4)
+    width = Inches(6)
+    height = Inches(0.8)
+    table = slide.shapes.add_table(rows + 1, cols, left, top, width, height).table
+    table.cell(0, 0).text = 'Category'
+    table.cell(0, 1).text = 'Value'
+    for i in range(rows):
+        table.cell(i + 1, 0).text = df.iloc[i]['Category']
+        table.cell(i + 1, 1).text = str(df.iloc[i]['Value'])
+    # Save to bytes
+    bio = BytesIO()
+    prs.save(bio)
+    bio.seek(0)
+    return bio.getvalue()
+# Streamlit UI
+st.title("Export Plotly Graph to PPTX")
+st.plotly_chart(fig)  # Display the Plotly chart in the app
+if st.button("Generate and Download Slides"):
+    pptx_data = create_presentation()
+    st.download_button(
+        label="Download PPTX",
+        data=pptx_data,
+        file_name="plotly_slides.pptx",
+        mime="application/vnd.openxmlformats-officedocument.presentationml.presentation"
+    )