Spaces:

AIEcosystem
/

relationship-map

Sleeping

App Files Files Community

AIEcosystem commited on Oct 8, 2025

Commit

f4ad236

verified ·

1 Parent(s): f89b757

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +64 -131

src/streamlit_app.py CHANGED Viewed

@@ -41,37 +41,26 @@ entity_color_map = {
     "person": "#10b981",
     "country": "#3b82f6",
     "city": "#4ade80",
     "organization": "#f59e0b",
     "date": "#8b5cf6",
     "time": "#ec4899",
     "cardinal": "#06b6d4",
     "money": "#f43f5e",
     "position": "#a855f7",
 }
 # --- Label Definitions and Category Mapping (Used by the App) ---
 labels = list(entity_color_map.keys())
 labels = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
 category_mapping = {
    "People": ["person", "organization", "position"],
    "Locations": ["country", "city"],
    "Time": ["date", "time"],
-   "Numbers": ["money", "cardinal"]
-}
-reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
 # --- Utility Functions for Analysis and Plotly ---
 def extract_label(node_name):
@@ -87,23 +76,19 @@ def highlight_entities(text, df_entities):
     """Generates HTML to display text with entities highlighted and colored."""
     if df_entities.empty:
         return text
     # Sort entities by start index descending to insert highlights without affecting subsequent indices
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
         start = entity['start']
         end = entity['end']
         label = entity['label']
         entity_text = entity['text']
         color = entity_color_map.get(label, '#000000')
         # Create a span with background color and tooltip
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
     return f'<div style="border: 1px solid #CCCCCC; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
@@ -115,7 +100,6 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
         tfidf_vectorizer = TfidfVectorizer(
@@ -125,7 +109,6 @@ def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
         lda = LatentDirichletAllocation(
             n_components=num_topics, max_iter=5, learning_method='online',random_state=42, n_jobs=-1
         )
@@ -151,7 +134,6 @@ def create_topic_word_bubbles(df_topic_data):
     # Renaming columns to match the output of perform_topic_modeling
     df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
     df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
     if df_topic_data.empty:
         return None
     fig = px.scatter(
@@ -182,8 +164,7 @@ def create_topic_word_bubbles(df_topic_data):
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
-    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',
-marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
@@ -193,29 +174,26 @@ def generate_network_graph(df, raw_text):
     """
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
-    edges = set()
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         unique_entities_in_sentence = list(set(entities_in_sentence))
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
@@ -225,7 +203,6 @@ def generate_network_graph(df, raw_text):
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
@@ -277,8 +254,7 @@ def generate_network_graph(df, raw_text):
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(
-                x=[None], y=[None], mode='markers', marker=dict(size=10, color=color),
-name=f"{label.capitalize()}", showlegend=True
             ))
     for trace in legend_traces:
         fig.add_trace(trace)
@@ -294,10 +270,8 @@ name=f"{label.capitalize()}", showlegend=True
         margin=dict(t=50, b=10, l=10, r=10),
         height=600
     )
     return fig
 # --- NEW CSV GENERATION FUNCTION ---
 def generate_entity_csv(df):
     """
@@ -313,19 +287,12 @@ def generate_entity_csv(df):
 # -----------------------------------
 # --- Existing App Functionality (HTML) ---
-# NOTE: Removed the 'grouped_entity_table_html' generation that counted by label,
-# keeping only the grouped by category table generation if needed for the HTML report,
-# but prioritizing the Streamlit display of the grouped-by-category table.
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     Generates a full HTML report containing all analysis results and
     visualizations. (Simplified HTML generation for brevity in code)
     """
-    # ... (Plotly chart HTML generation code remains largely the same)
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     fig_treemap = px.treemap(
         df,
@@ -355,7 +322,6 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     word_counts.columns = ['Entity', 'Count']
     repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
         fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
@@ -390,7 +356,7 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
          classes='table table-striped',
          index=False
     )
     # 4. Construct the Final HTML
     html_content = f"""<!DOCTYPE html><html lang="en"><head>
     <meta charset="UTF-8">
@@ -438,7 +404,6 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     return html_content
 # --- Page Configuration and Styling (No Sidebar, Removed Pink) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
 st.markdown(
@@ -474,25 +439,13 @@ st.markdown(
     </style>
     """,
     unsafe_allow_html=True)
-st.subheader("NER and Topic Analysis Report Generator", divider="blue")
-st.link_button("by nlpblogs", "https://nlpblogs.com", type="secondary")
-expander = st.expander("**Important notes**")
-expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
-**Dependencies:** Note that **image export** requires the Python libraries `plotly` and `kaleido`.
-**How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
 expander = st.expander("**Important notes**")
-expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
-**Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
-**How to Use:** Type or paste your text (max. 1000 words) into the text area below, press Ctrl + Enter, and then click the 'Results' button.
-**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
@@ -503,7 +456,7 @@ COMET_PROJECT_NAME = os.environ.get("COMET_PROJECT_NAME")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
-@st.cache_resource
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
@@ -528,9 +481,9 @@ DEFAULT_TEXT = (
     "end of the year. The platform is designed to be compatible with both Windows and Linux operating systems. "
     "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
     "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
-    "general public by October 1st. The goal is to deploy the Astra v2 platform before the next solar eclipse event in 2026."
-)
 # -----------------------------------
 # --- Session State Initialization (CRITICAL FIX) ---
 if 'show_results' not in st.session_state:
     st.session_state.show_results = False
@@ -562,7 +515,6 @@ text = st.text_area(
     height=250,
     key='my_text_area',
     value=st.session_state.my_text_area)
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
 st.button("Clear text", on_click=clear_text)
@@ -580,25 +532,20 @@ if st.button("Results"):
             if text != st.session_state.last_text:
                 st.session_state.last_text = text
                 start_time = time.time()
                 # --- Model Prediction & Dataframe Creation ---
                 entities = model.predict_entities(text, labels)
                 df = pd.DataFrame(entities)
                 if not df.empty:
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
                     df['category'] = df['label'].map(reverse_category_mapping)
                     st.session_state.results_df = df
                     unique_entity_count = len(df['text'].unique())
                     N_TOP_WORDS_TO_USE = min(10, unique_entity_count)
                     st.session_state.topic_results = perform_topic_modeling(
                         df,
                         num_topics=2,
                         num_top_words=N_TOP_WORDS_TO_USE
                     )
                     if comet_initialized:
                         experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
                         experiment.log_parameter("input_text", text)
@@ -607,31 +554,29 @@ if st.button("Results"):
                 else:
                     st.session_state.results_df = pd.DataFrame()
                     st.session_state.topic_results = None
                 end_time = time.time()
                 st.session_state.elapsed_time = end_time - start_time
-            st.session_state.show_results = True
 # --- Results Display ---
 if st.session_state.show_results and not st.session_state.results_df.empty:
     st.success(f"Processing complete in {st.session_state.elapsed_time:.2f} seconds! 🎉")
     df = st.session_state.results_df
     text_input = st.session_state.last_text
     elapsed_time = st.session_state.elapsed_time
     df_topic_data = st.session_state.topic_results
     # --- Highlighted Text and Download Buttons (Above Tabs) ---
     st.subheader("1. Analyzed Text & Extracted Entities", divider="blue")
     st.markdown(
         highlight_entities(text_input, df),
         unsafe_allow_html=True
     )
     st.subheader("Downloads", divider="blue")
     col1, col2, col3 = st.columns([1, 1, 3])
     # 1. Download CSV
     csv_buffer = generate_entity_csv(df)
     col1.download_button(
@@ -640,7 +585,6 @@ if st.session_state.show_results and not st.session_state.results_df.empty:
         file_name="ner_entities.csv",
         mime="text/csv"
     )
     # 2. Download HTML Report
     html_content = generate_html_report(df, text_input, elapsed_time, df_topic_data)
     col2.download_button(
@@ -649,32 +593,45 @@ if st.session_state.show_results and not st.session_state.results_df.empty:
         file_name="ner_analysis_report.html",
         mime="text/html"
     )
     st.markdown("---")
-    # --- Tabs Implementation ---
-    tab1, tab2 = st.tabs(["📊 Entity Data (Table)", "📈 Visualizations & Topics"])
     with tab1:
         # Create the summary table with the requested column name changes
         grouped_entity_table = df.groupby(['category', 'label']).size().reset_index(name='Count')
         grouped_entity_table.columns = ['Category', 'Entity', 'Count']
         st.markdown("## Entity Counts by Category and Entity")
         st.dataframe(grouped_entity_table.sort_values(by=['Category', 'Count'], ascending=[True, False]), use_container_width=True)
-        with st.expander("See Glossary of tags"):
-            st.write('''
-           - **start**: ['index of the start of the corresponding entity']
-           - **end**: ['index of the end of the corresponding entity']
-           - **text**: ['entity extracted from your text data']
-           - **label**: ['label (tag) assigned to a given extracted entity']
-           - **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
-           ''')
     with tab2:
         st.markdown("## Visualizations")
         # 3a. Treemap (As requested in Tab 2)
         fig_treemap = px.treemap(
             df,
@@ -687,12 +644,11 @@ if st.session_state.show_results and not st.session_state.results_df.empty:
         fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
         st.markdown("### Entity Distribution Treemap")
         st.plotly_chart(fig_treemap, use_container_width=True)
-        st.markdown("---")
         # 3b. Pie Chart and Category Bar Chart side-by-side
         col_pie, col_bar_cat = st.columns(2)
         # Pie Chart
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
@@ -703,71 +659,48 @@ if st.session_state.show_results and not st.session_state.results_df.empty:
         with col_pie:
             st.markdown("### Distribution of Entities by Category")
             st.plotly_chart(fig_pie, use_container_width=True)
         # Category Bar Chart
         fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',
-                                 color='Category', title='Total Entities per Category',
-                                 color_discrete_sequence=px.colors.qualitative.Pastel)
         fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10))
         with col_bar_cat:
             st.markdown("### Total Entities per Category")
             st.plotly_chart(fig_bar_category, use_container_width=True)
-        st.markdown("---")
         # 3c. Most Frequent Entities Bar Chart
         word_counts = df['text'].value_counts().reset_index()
         word_counts.columns = ['Entity', 'Count']
         repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
         st.markdown("### Top 10 Most Frequent Entities")
         if not repeating_entities.empty:
             fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',
-                                 color='Entity', title='Top 10 Most Frequent Entities',
-                                 color_discrete_sequence=px.colors.sequential.Plasma)
             fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=100))
             st.plotly_chart(fig_bar_freq, use_container_width=True)
         else:
             st.info("No entities appear more than once in the text for visualization.")
-        st.markdown("---")
         # 3d. Network Graph
         st.markdown("### Entity Relationship Map")
         network_fig = generate_network_graph(df, text_input)
         st.plotly_chart(network_fig, use_container_width=True)
         st.markdown("---")
         # 4. Topic Modeling
         st.markdown("## Topic Modeling")
         if df_topic_data is not None and not df_topic_data.empty:
             st.markdown("### Bubble size = word weight")
             bubble_figure = create_topic_word_bubbles(df_topic_data)
             st.plotly_chart(bubble_figure, use_container_width=True)
             st.markdown("### Top Words by Topic")
-            # Simple table display for topic data
-            st.dataframe(df_topic_data, use_container_width=True)
         else:
-            st.info("Topic Modeling requires more unique input (at least two unique entities) to be performed.")
-elif st.session_state.show_results and st.session_state.results_df.empty:
-    st.warning("No entities were extracted from the provided text.")
-st.write("Use the following code to embed the DataHarvest web app on your website. Feel free to adjust the width and height values to fit your page.")
-code = '''
-<iframe
-src="https://aiecosystem-dataharvest.hf.space"
-frameborder="0"
-width="850"
-height="450"
-></iframe>
-'''
-st.code(code, language="html")

     "person": "#10b981",
     "country": "#3b82f6",
     "city": "#4ade80",
     "organization": "#f59e0b",
     "date": "#8b5cf6",
     "time": "#ec4899",
     "cardinal": "#06b6d4",
     "money": "#f43f5e",
     "position": "#a855f7",
 }
 # --- Label Definitions and Category Mapping (Used by the App) ---
 labels = list(entity_color_map.keys())
 labels = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"]
 category_mapping = {
    "People": ["person", "organization", "position"],
    "Locations": ["country", "city"],
    "Time": ["date", "time"],
+   "Numbers": ["money", "cardinal"]}
+# CORRECTION 1: Reverse category mapping definition moved here for app-wide access
+reverse_category_mapping = {label: category
+for category, label_list in category_mapping.items() for label in label_list}
 # --- Utility Functions for Analysis and Plotly ---
 def extract_label(node_name):
     """Generates HTML to display text with entities highlighted and colored."""
     if df_entities.empty:
         return text
     # Sort entities by start index descending to insert highlights without affecting subsequent indices
     entities = df_entities.sort_values(by='start', ascending=False).to_dict('records')
     highlighted_text = text
     for entity in entities:
         start = entity['start']
         end = entity['end']
         label = entity['label']
         entity_text = entity['text']
         color = entity_color_map.get(label, '#000000')
         # Create a span with background color and tooltip
         highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text}</span>'
         # Replace the original text segment with the highlighted HTML
         highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
     # Use a div to mimic the Streamlit input box style for the report
     return f'<div style="border: 1px solid #CCCCCC; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
     documents = df_entities['text'].unique().tolist()
     if len(documents) < 2:
         return None
     N = min(num_top_words, len(documents))
     try:
         tfidf_vectorizer = TfidfVectorizer(
         )
         tfidf = tfidf_vectorizer.fit_transform(documents)
         tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
         lda = LatentDirichletAllocation(
             n_components=num_topics, max_iter=5, learning_method='online',random_state=42, n_jobs=-1
         )
     # Renaming columns to match the output of perform_topic_modeling
     df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
     df_topic_data['x_pos'] = df_topic_data.index # Use index for x-position in the app
     if df_topic_data.empty:
         return None
     fig = px.scatter(
         height=600,
         margin=dict(t=50, b=100, l=50, r=10),
     )
+    fig.update_traces(hovertemplate='<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<extra></extra>',marker=dict(line=dict(width=1, color='DarkSlateGrey')))
     return fig
 def generate_network_graph(df, raw_text):
     """
     entity_counts = df['text'].value_counts().reset_index()
     entity_counts.columns = ['text', 'frequency']
     unique_entities = df.drop_duplicates(subset=['text', 'label']).merge(entity_counts, on='text')
     if unique_entities.shape[0] < 2:
         return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.")
     num_nodes = len(unique_entities)
     thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False)
     radius = 10
     unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes)
     unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes)
     pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index')
+    edges = set()
     sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text)
     for sentence in sentences:
         entities_in_sentence = []
         for entity_text in unique_entities['text'].unique():
             if entity_text.lower() in sentence.lower():
                 entities_in_sentence.append(entity_text)
         unique_entities_in_sentence = list(set(entities_in_sentence))
         for i in range(len(unique_entities_in_sentence)):
             for j in range(i + 1, len(unique_entities_in_sentence)):
                 node1 = unique_entities_in_sentence[i]
     edge_x = []
     edge_y = []
     for edge in edges:
         n1, n2 = edge
         if n1 in pos_map and n2 in pos_map:
             seen_labels.add(label)
             color = entity_color_map.get(label, '#cccccc')
             legend_traces.append(go.Scatter(
+                x=[None], y=[None], mode='markers', marker=dict(size=10, color=color),name=f"{label.capitalize()}", showlegend=True
             ))
     for trace in legend_traces:
         fig.add_trace(trace)
         margin=dict(t=50, b=10, l=10, r=10),
         height=600
     )
     return fig
 # --- NEW CSV GENERATION FUNCTION ---
 def generate_entity_csv(df):
     """
 # -----------------------------------
 # --- Existing App Functionality (HTML) ---
 def generate_html_report(df, text_input, elapsed_time, df_topic_data):
     """
     Generates a full HTML report containing all analysis results and
     visualizations. (Simplified HTML generation for brevity in code)
     """
     # 1. Generate Visualizations (Plotly HTML)
     # 1a. Treemap
     fig_treemap = px.treemap(
         df,
     word_counts.columns = ['Entity', 'Count']
     repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
     bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
     if not repeating_entities.empty:
         fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
         fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
          classes='table table-striped',
          index=False
     )
     # 4. Construct the Final HTML
     html_content = f"""<!DOCTYPE html><html lang="en"><head>
     <meta charset="UTF-8">
     """
     return html_content
 # --- Page Configuration and Styling (No Sidebar, Removed Pink) ---
 st.set_page_config(layout="wide", page_title="NER & Topic Report App")
 st.markdown(
     </style>
     """,
     unsafe_allow_html=True)
+st.subheader("Entity and Topic Analysis Report Generator", divider="blue")
+st.link_button("by nlpblogs", "https://nlpblogs.com", type="secondary")
+# CORRECTION 2: Removed duplicated expander. The following is the second, correct one.
 expander = st.expander("**Important notes**")
+expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"**Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.**How to Use:** Type or paste your text (max. 1000 words) into the text area below, press Ctrl + Enter, and then click the 'Results' button.**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.""")
 st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
 comet_initialized = bool(COMET_API_KEY and COMET_WORKSPACE and COMET_PROJECT_NAME)
 # --- Model Loading ---
+@st.cache_resourced
 def load_ner_model():
     """Loads the GLiNER model and caches it."""
     try:
     "end of the year. The platform is designed to be compatible with both Windows and Linux operating systems. "
     "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
     "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
+    "general public by October 1st. The goal is to deploy the Astra v2 platform before the next solar eclipse event in 2026.")
 # -----------------------------------
 # --- Session State Initialization (CRITICAL FIX) ---
 if 'show_results' not in st.session_state:
     st.session_state.show_results = False
     height=250,
     key='my_text_area',
     value=st.session_state.my_text_area)
 word_count = len(text.split())
 st.markdown(f"**Word count:** {word_count}/{word_limit}")
 st.button("Clear text", on_click=clear_text)
             if text != st.session_state.last_text:
                 st.session_state.last_text = text
                 start_time = time.time()
                 # --- Model Prediction & Dataframe Creation ---
                 entities = model.predict_entities(text, labels)
                 df = pd.DataFrame(entities)
                 if not df.empty:
                     df['text'] = df['text'].apply(remove_trailing_punctuation)
                     df['category'] = df['label'].map(reverse_category_mapping)
                     st.session_state.results_df = df
                     unique_entity_count = len(df['text'].unique())
                     N_TOP_WORDS_TO_USE = min(10, unique_entity_count)
                     st.session_state.topic_results = perform_topic_modeling(
                         df,
                         num_topics=2,
                         num_top_words=N_TOP_WORDS_TO_USE
                     )
                     if comet_initialized:
                         experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
                         experiment.log_parameter("input_text", text)
                 else:
                     st.session_state.results_df = pd.DataFrame()
                     st.session_state.topic_results = None
                 end_time = time.time()
                 st.session_state.elapsed_time = end_time - start_time
+                        st.session_state.show_results = True
 # --- Results Display ---
 if st.session_state.show_results and not st.session_state.results_df.empty:
     st.success(f"Processing complete in {st.session_state.elapsed_time:.2f} seconds! 🎉")
     df = st.session_state.results_df
     text_input = st.session_state.last_text
     elapsed_time = st.session_state.elapsed_time
     df_topic_data = st.session_state.topic_results
     # --- Highlighted Text and Download Buttons (Above Tabs) ---
     st.subheader("1. Analyzed Text & Extracted Entities", divider="blue")
     st.markdown(
         highlight_entities(text_input, df),
         unsafe_allow_html=True
     )
     st.subheader("Downloads", divider="blue")
     col1, col2, col3 = st.columns([1, 1, 3])
     # 1. Download CSV
     csv_buffer = generate_entity_csv(df)
     col1.download_button(
         file_name="ner_entities.csv",
         mime="text/csv"
     )
     # 2. Download HTML Report
     html_content = generate_html_report(df, text_input, elapsed_time, df_topic_data)
     col2.download_button(
         file_name="ner_analysis_report.html",
         mime="text/html"
     )
     st.markdown("---")
+    # CORRECTION 1: Tabs Implementation
+    tab1, tab2 = st.tabs(["📊 Entity Data (Table) & Glossary", "📈 Visualizations & Topics"])
     with tab1:
         # Create the summary table with the requested column name changes
         grouped_entity_table = df.groupby(['category', 'label']).size().reset_index(name='Count')
         grouped_entity_table.columns = ['Category', 'Entity', 'Count']
         st.markdown("## Entity Counts by Category and Entity")
         st.dataframe(grouped_entity_table.sort_values(by=['Category', 'Count'], ascending=[True, False]), use_container_width=True)
+        st.markdown("---")
+        st.markdown("## Glossary of Tags and Category Mapping")
+        # Display Category Mapping (forward and reverse)
+        st.markdown("### Category to Entity Label Mapping (`category_mapping`)")
+        st.json(category_mapping)
+        # Display the requested reverse mapping below the table
+        st.markdown("### Entity Label to Category Mapping (Reverse Glossary) (`reverse_category_mapping`)")
+        st.json(reverse_category_mapping) # Display the reverse mapping which was moved to the top
+        # Display general glossary
+        st.markdown("### General Glossary for Extracted Entities")
+        st.write("""
+        - **start**: Index of the start of the corresponding entity.
+        - **end**: Index of the end of the corresponding entity.
+        - **text**: Entity extracted from your text data.
+        - **label**: The entity tag assigned to the extracted entity.
+        - **category**: The broad category (e.g., 'People') derived from the 'label'.
+        - **score**: Accuracy score; how accurately a tag has been assigned to a given entity.
+        """)
     with tab2:
         st.markdown("## Visualizations")
         # 3a. Treemap (As requested in Tab 2)
         fig_treemap = px.treemap(
             df,
         fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
         st.markdown("### Entity Distribution Treemap")
         st.plotly_chart(fig_treemap, use_container_width=True)
+        st.markdown("---")
         # 3b. Pie Chart and Category Bar Chart side-by-side
         col_pie, col_bar_cat = st.columns(2)
         # Pie Chart
         grouped_counts = df['category'].value_counts().reset_index()
         grouped_counts.columns = ['Category', 'Count']
         with col_pie:
             st.markdown("### Distribution of Entities by Category")
             st.plotly_chart(fig_pie, use_container_width=True)
         # Category Bar Chart
         fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',
+                                  color='Category', title='Total Entities per Category',
+                                  color_discrete_sequence=px.colors.qualitative.Pastel)
         fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10))
         with col_bar_cat:
             st.markdown("### Total Entities per Category")
             st.plotly_chart(fig_bar_category, use_container_width=True)
+        st.markdown("---")
         # 3c. Most Frequent Entities Bar Chart
         word_counts = df['text'].value_counts().reset_index()
         word_counts.columns = ['Entity', 'Count']
         repeating_entities = word_counts[word_counts['Count'] > 1].head(10)
         st.markdown("### Top 10 Most Frequent Entities")
         if not repeating_entities.empty:
             fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',
+                                  color='Entity', title='Top 10 Most Frequent Entities',
+                                  color_discrete_sequence=px.colors.sequential.Plasma)
             fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=100))
             st.plotly_chart(fig_bar_freq, use_container_width=True)
         else:
             st.info("No entities appear more than once in the text for visualization.")
+        st.markdown("---")
         # 3d. Network Graph
         st.markdown("### Entity Relationship Map")
         network_fig = generate_network_graph(df, text_input)
         st.plotly_chart(network_fig, use_container_width=True)
         st.markdown("---")
         # 4. Topic Modeling
         st.markdown("## Topic Modeling")
         if df_topic_data is not None and not df_topic_data.empty:
             st.markdown("### Bubble size = word weight")
             bubble_figure = create_topic_word_bubbles(df_topic_data)
             st.plotly_chart(bubble_figure, use_container_width=True)
             st.markdown("### Top Words by Topic")
+            # Simple table display of topic words
+            st.dataframe(df_topic_data.rename(columns={'Topic_ID': 'Topic ID', 'Word': 'Top Word', 'Weight': 'Weight'}), use_container_width=True, hide_index=True)
         else:
+            st.info("Topic Modeling requires text containing at least two unique entities.")