AIEcosystem commited on
Commit
2783c81
·
verified ·
1 Parent(s): 0c28caf

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +22 -20
src/streamlit_app.py CHANGED
@@ -263,7 +263,8 @@ def generate_entity_csv(df):
263
  # --- HTML REPORT GENERATION FUNCTION ---
264
  def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_color_map, report_title="Entity and Topic Analysis Report", branding_html=""):
265
  """
266
- Generates a full HTML report containing all analysis results and visualizations.
 
267
  """
268
  # 1. Generate Visualizations (Plotly HTML)
269
 
@@ -324,10 +325,17 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
324
  highlighted_text_html = highlight_entities(text_input, df, entity_color_map).replace("div style", "div class='highlighted-text' style")
325
 
326
  # 3. Entity Tables (Pandas to HTML)
327
- entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
328
- classes='table table-striped',
329
- index=False
 
 
 
 
 
 
330
  )
 
331
 
332
  # 4. Construct the Final HTML
333
  html_content = f"""<!DOCTYPE html><html lang="en"><head>
@@ -344,8 +352,9 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
344
  .metadata {{ background-color: #e6f0ff; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
345
  .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
346
  table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
347
- table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
348
- table th {{ background-color: #f0f0f0; }}
 
349
  .highlighted-text {{ border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
350
  </style>
351
  </head>
@@ -518,7 +527,7 @@ DEFAULT_TEXT = (
518
  "space capabilities within the **European Union**. The core team, including lead engineer Marcus Davies, will hold "
519
  "their first collaborative workshop in Berlin, Germany, on August 15th. The community response on social "
520
  "media platform X (under the username @TechCEO) was overwhelmingly positive, with many major tech "
521
- "publications, including Wired Magazine, predicting a major impact on the space technology industry by the "
522
  "end of the year, further strengthening the technological standing of the **European Union**. The platform is designed to be compatible with both Windows and Linux operating systems. "
523
  "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
524
  "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
@@ -565,7 +574,9 @@ custom_labels_text = st.text_area(
565
  "**Optional:** Enter your own comma-separated entity labels here (e.g., `product, symptom, client_id`). Leave blank for default labels.",
566
  height=60,
567
  key='custom_labels_input',
568
- placeholder="e.g., product, symptom, client_id"
 
 
569
  )
570
 
571
  col_results, col_clear = st.columns([1, 1])
@@ -681,14 +692,7 @@ if st.session_state.show_results:
681
  st.markdown("### 2. Detailed Entity Analysis")
682
  tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
683
 
684
- if st.session_state.is_custom_mode:
685
- unique_categories = ["User Defined Entities"]
686
- tabs_to_show = df['label'].unique().tolist()
687
- st.markdown(f"**Custom Labels Detected: {', '.join(tabs_to_show)}**")
688
- else:
689
- unique_categories = list(FIXED_CATEGORY_MAPPING.keys())
690
-
691
- # --- Function to Apply Conditional Coloring to Scores ---
692
  def color_score_gradient(df_input):
693
  """Applies a color gradient to the 'score' column using Pandas Styler."""
694
  return df_input.style.background_gradient(
@@ -712,6 +716,7 @@ if st.session_state.show_results:
712
  st.markdown(f"##### {label.capitalize()} Entities ({len(df_label)} total)")
713
  st.dataframe(styled_df_label, use_container_width=True)
714
  else:
 
715
  tabs_category = st.tabs(unique_categories)
716
 
717
  for category, tab in zip(unique_categories, tabs_category):
@@ -841,11 +846,9 @@ if st.session_state.show_results:
841
 
842
  df_topic_data = st.session_state.topic_results
843
 
844
- # --- CRITICAL: This is the conditional block that must have correct indentation ---
845
  if df_topic_data is not None and not df_topic_data.empty:
846
  st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
847
  st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
848
- # END CRITICAL BLOCK
849
  else:
850
  st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
851
 
@@ -900,5 +903,4 @@ if st.session_state.show_results:
900
  file_name="ner_topic_full_report.html",
901
  mime="text/html",
902
  use_container_width=True
903
- )
904
-
 
263
  # --- HTML REPORT GENERATION FUNCTION ---
264
  def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_color_map, report_title="Entity and Topic Analysis Report", branding_html=""):
265
  """
266
+ Generates a full HTML report containing all analysis results and visualizations,
267
+ including color gradient styling for the score column in the main table.
268
  """
269
  # 1. Generate Visualizations (Plotly HTML)
270
 
 
325
  highlighted_text_html = highlight_entities(text_input, df, entity_color_map).replace("div style", "div class='highlighted-text' style")
326
 
327
  # 3. Entity Tables (Pandas to HTML)
328
+ # --- FIX: Apply color gradient styling to the DataFrame BEFORE converting to HTML ---
329
+ styled_df = df[['text', 'label', 'score', 'start', 'end', 'category']].style.background_gradient(
330
+ cmap='YlGnBu',
331
+ subset=['score']
332
+ ).format({'score': '{:.4f}'})
333
+
334
+ entity_table_html = styled_df.to_html(
335
+ classes='table table-striped',
336
+ index=False,
337
  )
338
+ # --- END FIX ---
339
 
340
  # 4. Construct the Final HTML
341
  html_content = f"""<!DOCTYPE html><html lang="en"><head>
 
352
  .metadata {{ background-color: #e6f0ff; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
353
  .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
354
  table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
355
+ /* Target the cells generated by pandas styling */
356
+ table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
357
+ table th {{ border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f0f0f0; }}
358
  .highlighted-text {{ border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
359
  </style>
360
  </head>
 
527
  "space capabilities within the **European Union**. The core team, including lead engineer Marcus Davies, will hold "
528
  "their first collaborative workshop in Berlin, Germany, on August 15th. The community response on social "
529
  "media platform X (under the username @TechCEO) was overwhelmingly positive, with many major tech "
530
+ "publications, including Wired Magazine, predicting a major impact on the space technology industry by the "
531
  "end of the year, further strengthening the technological standing of the **European Union**. The platform is designed to be compatible with both Windows and Linux operating systems. "
532
  "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
533
  "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
 
574
  "**Optional:** Enter your own comma-separated entity labels here (e.g., `product, symptom, client_id`). Leave blank for default labels.",
575
  height=60,
576
  key='custom_labels_input',
577
+ # --- UPDATED PLACEHOLDER ENTITIES ---
578
+ placeholder="e.g., product_name, medical_symptom, customer_id, location_code, internal_project"
579
+ # --- END UPDATE ---
580
  )
581
 
582
  col_results, col_clear = st.columns([1, 1])
 
692
  st.markdown("### 2. Detailed Entity Analysis")
693
  tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
694
 
695
+ # --- Function to Apply Conditional Coloring to Scores (For Streamlit UI only) ---
 
 
 
 
 
 
 
696
  def color_score_gradient(df_input):
697
  """Applies a color gradient to the 'score' column using Pandas Styler."""
698
  return df_input.style.background_gradient(
 
716
  st.markdown(f"##### {label.capitalize()} Entities ({len(df_label)} total)")
717
  st.dataframe(styled_df_label, use_container_width=True)
718
  else:
719
+ unique_categories = list(FIXED_CATEGORY_MAPPING.keys())
720
  tabs_category = st.tabs(unique_categories)
721
 
722
  for category, tab in zip(unique_categories, tabs_category):
 
846
 
847
  df_topic_data = st.session_state.topic_results
848
 
 
849
  if df_topic_data is not None and not df_topic_data.empty:
850
  st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
851
  st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
 
852
  else:
853
  st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
854
 
 
903
  file_name="ner_topic_full_report.html",
904
  mime="text/html",
905
  use_container_width=True
906
+ )