Spaces:

AIEcosystem
/

relationship-map

Sleeping

App Files Files Community

AIEcosystem commited on Oct 8, 2025

Commit

471ac48

verified ·

1 Parent(s): f6ebbfc

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +28 -73

src/streamlit_app.py CHANGED Viewed

@@ -11,7 +11,7 @@ import numpy as np
 import re
 import string
 import json
-# --- PPTX Imports (NEW) ---
 from io import BytesIO
 from pptx import Presentation
 from pptx.util import Inches, Pt
@@ -444,7 +444,21 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
     pptx_buffer.seek(0)
     return pptx_buffer
-# --- Existing App Functionality (HTML and JSON) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
@@ -569,65 +583,6 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     return html_content
-def generate_presentation_json(df, elapsed_time, df_topic_data):
-    """
-    Generates a structured dictionary of all analysis results suitable for
-    importing into a presentation tool, then serializes it to JSON.
-    """
-    if df.empty:
-        return {"error": "No entities found for presentation export."}
-    total_entities = len(df)
-    unique_entities = len(df['text'].unique())
-    category_counts = df['category'].value_counts()
-    top_categories = category_counts.head(3).to_dict()
-    summary_stats = {
-        "Total Entities Found": total_entities,
-        "Unique Entities Found": unique_entities,
-        "Top_3_Entity_Categories": top_categories
-    }
-    grouped_entity_table = category_counts.reset_index()
-    grouped_entity_table.columns = ['Category', 'Count']
-    word_counts = df['text'].value_counts().reset_index()
-    word_counts.columns = ['Entity', 'Count']
-    repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
-    topic_data = "Not enough unique data for topic modeling."
-    if df_topic_data is not None and not df_topic_data.empty:
-        topic_data = df_topic_data.to_dict('records')
-    presentation_data = {
-        "ReportTitle": "NER and Topic Analysis Presentation Data",
-        "GeneratedAt": time.strftime('%Y-%m-%d %H:%M:%S'),
-        "ProcessingTimeSeconds": f"{elapsed_time:.2f}",
-        "Slides": [
-            {
-                "SlideTitle": "1. Analysis Overview and Key Metrics",
-                "Metrics": summary_stats,
-                "Note": "This data can be used for the introductory slide."
-            },
-            {
-                "SlideTitle": "2. Entity Category Distribution (Chart Data)",
-                "Data": grouped_entity_table.to_dict('records'),
-                "Note": "Data for Pie Chart and Category Count Bar Chart."
-            },
-            {
-                "SlideTitle": "3. Most Frequent Entities (Top 10)",
-                "Data": repeating_entities.to_dict('records'),
-                "Note": "Data for the Top 10 Frequent Entities Bar Chart."
-            },
-            {
-                "SlideTitle": "4. Topic Modeling Results (Key Words)",
-                "Data": topic_data,
-                "Note": "Key entities and their weights per topic from LDA."
-            }
-        ]
-    }
-    return presentation_data
 # --- Page Configuration and Styling (No Sidebar) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
@@ -669,7 +624,7 @@ st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
 expander = st.expander("**Important notes**")
 expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
 **Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and `kaleido`.
-**Results:** Results are compiled into a single, comprehensive **HTML report** and a **PowerPoint (.pptx) file** for easy download and sharing.
 **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
@@ -680,7 +635,7 @@ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
-@st.cache_resource
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
@@ -898,7 +853,7 @@ if st.session_state.show_results:
         st.markdown("---")
         st.markdown("### Download Full Report Artifacts")
-        # 1. HTML Report Download
         html_report = generate_html_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data)
         st.download_button(
             label="Download Comprehensive HTML Report",
@@ -908,7 +863,7 @@ if st.session_state.show_results:
             type="primary"
         )
-        # 2. PowerPoint PPTX Download (NEW)
         pptx_buffer = generate_pptx_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data, reverse_category_mapping)
         st.download_button(
             label="Download Presentation Slides (.pptx)",
@@ -918,17 +873,17 @@ if st.session_state.show_results:
             type="primary"
         )
-        # 3. Presentation JSON Data Download
-        presentation_data = generate_presentation_json(df, st.session_state.elapsed_time, df_topic_data)
-        presentation_json_data = json.dumps(presentation_data, indent=4)
         st.download_button(
-            label="Download Presentation Data (JSON)",
-            data=presentation_json_data,
-            file_name="ner_presentation_data.json",
-            mime="application/json",
             type="secondary"
         )

 import re
 import string
 import json
+# --- PPTX Imports ---
 from io import BytesIO
 from pptx import Presentation
 from pptx.util import Inches, Pt
     pptx_buffer.seek(0)
     return pptx_buffer
+# --- NEW CSV GENERATION FUNCTION ---
+def generate_entity_csv(df):
+    """
+    Generates a CSV file of the extracted entities in an in-memory buffer,
+    including text, label, category, score, start, and end indices.
+    """
+    csv_buffer = BytesIO()
+    # Select desired columns and write to buffer
+    df_export = df[['text', 'label', 'category', 'score', 'start', 'end']]
+    csv_buffer.write(df_export.to_csv(index=False).encode('utf-8'))
+    csv_buffer.seek(0)
+    return csv_buffer
+# -----------------------------------
+# --- Existing App Functionality (HTML) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     """
     return html_content
 # --- Page Configuration and Styling (No Sidebar) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
 expander = st.expander("**Important notes**")
 expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
 **Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and `kaleido`.
+**Results:** Results are compiled into a single, comprehensive **HTML report**, a **PowerPoint (.pptx) file**, and a **CSV file** for easy download and sharing.
 **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
+@st.cache_resourced
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
         st.markdown("---")
         st.markdown("### Download Full Report Artifacts")
+        # 1. HTML Report Download (Retained)
         html_report = generate_html_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data)
         st.download_button(
             label="Download Comprehensive HTML Report",
             type="primary"
         )
+        # 2. PowerPoint PPTX Download (Retained)
         pptx_buffer = generate_pptx_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data, reverse_category_mapping)
         st.download_button(
             label="Download Presentation Slides (.pptx)",
             type="primary"
         )
+        # 3. CSV Data Download (NEW)
+        csv_buffer = generate_entity_csv(df)
         st.download_button(
+            label="Download Extracted Entities (CSV)",
+            data=csv_buffer,
+            file_name="extracted_entities.csv",
+            mime="text/csv",
             type="secondary"
         )