AIEcosystem commited on
Commit
5b7dba1
·
verified ·
1 Parent(s): 6ae5b23

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +73 -64
src/streamlit_app.py CHANGED
@@ -39,23 +39,23 @@ except ImportError:
39
  # Set HF_HOME environment variable to a writable path
40
  os.environ['HF_HOME'] = '/tmp'
41
 
42
- # --- Color Map for Highlighting and Network Graph Nodes ---
43
  entity_color_map = {
44
- "person": "#10b981",
45
- "username": "#3b82f6",
46
- "hashtag": "#4ade80",
47
- "mention" : "#f97316",
48
- "organization": "#f59e0b",
49
- "community": "#8b5cf6",
50
- "position": "#ec4899",
51
- "location": "#06b6d4",
52
- "event": "#f43f5e",
53
- "product": "#a855f7",
54
- "platform": "#eab308",
55
- "date": "#6366f1",
56
- "media_type": "#14b8a6",
57
- "url": "#60a5fa",
58
- "nationality_religion": "#fb7185"
59
  }
60
 
61
  # --- Label Definitions and Category Mapping (Used by the App and PPTX) ---
@@ -96,6 +96,7 @@ def highlight_entities(text, df_entities):
96
  end = entity['end']
97
  label = entity['label']
98
  entity_text = entity['text']
 
99
  color = entity_color_map.get(label, '#000000')
100
 
101
  # Create a span with background color and tooltip
@@ -103,8 +104,8 @@ def highlight_entities(text, df_entities):
103
  # Replace the original text segment with the highlighted HTML
104
  highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
105
 
106
- # Use a div to mimic the Streamlit input box style for the report
107
- return f'<div style="border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
108
 
109
  def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
110
  """
@@ -162,7 +163,7 @@ def create_topic_word_bubbles(df_topic_data):
162
  hover_name='word',
163
  size_max=80,
164
  title='Topic Word Weights (Bubble Chart)',
165
- color_discrete_sequence=px.colors.qualitative.Bold,
166
  labels={
167
  'x_pos': 'Entity/Word Index',
168
  'weight': 'Word Weight',
@@ -176,8 +177,8 @@ def create_topic_word_bubbles(df_topic_data):
176
  xaxis={'tickangle': -45, 'showgrid': False},
177
  yaxis={'showgrid': True},
178
  showlegend=True,
179
- plot_bgcolor='#FFF0F5',
180
- paper_bgcolor='#FFF0F5',
181
  height=600,
182
  margin=dict(t=50, b=100, l=50, r=10),
183
  )
@@ -253,7 +254,7 @@ def generate_network_graph(df, raw_text):
253
  showlegend=False,
254
  marker=dict(
255
  size=unique_entities['frequency'] * 5 + 10,
256
- color=[entity_color_map.get(label, '#cccccc') for label in unique_entities['label']], # 'label' is safe here (local to list comprehension)
257
  line_width=1,
258
  line_color='black',
259
  opacity=0.9
@@ -325,13 +326,16 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
325
  # Layout 5: Title and Content (often good for charts)
326
  chart_layout = prs.slide_layouts[5]
327
 
328
- # 1. Title Slide
329
  title_slide_layout = prs.slide_layouts[0]
330
  slide = prs.slides.add_slide(title_slide_layout)
331
- title = slide.shapes.title
332
- subtitle = slide.placeholders[1]
333
- title.text = "NER & Topic Analysis Report"
334
- subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
 
 
 
335
 
336
  # 2. Source Text Slide
337
  slide = prs.slides.add_slide(chart_layout)
@@ -382,13 +386,6 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
382
  cell.fill.solid()
383
  # Optional: Add simple styling to header
384
 
385
- # Fill in the data
386
- for i in range(rows):
387
- for j in range(cols):
388
- cell = table.cell(i+1, j)
389
- cell.text = str(grouped_entity_table.iloc[i, j])
390
- # Optional: Style data cells
391
-
392
  # 4. Treemap Slide (Visualization)
393
  fig_treemap = px.treemap(
394
  df,
@@ -396,7 +393,7 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
396
  values='score',
397
  color='category',
398
  title="Entity Distribution by Category and Label",
399
- color_discrete_sequence=px.colors.qualitative.Dark24
400
  )
401
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
402
  treemap_image = fig_to_image_buffer(fig_treemap)
@@ -409,7 +406,9 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
409
  # Placeholder if image conversion failed (e.g., Kaleido issue)
410
  slide = prs.slides.add_slide(chart_layout)
411
  slide.shapes.title.text = "Entity Distribution Treemap (Chart Failed)"
412
- slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
 
 
413
 
414
 
415
  # 5. Entity Count Bar Chart Slide (Visualization)
@@ -421,7 +420,7 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
421
  y='Count',
422
  color='Category',
423
  title='Total Entities per Category',
424
- color_discrete_sequence=px.colors.qualitative.Pastel
425
  )
426
  fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
427
  bar_category_image = fig_to_image_buffer(fig_bar_category)
@@ -433,7 +432,9 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
433
  else:
434
  slide = prs.slides.add_slide(chart_layout)
435
  slide.shapes.title.text = "Total Entities per Category (Chart Failed)"
436
- slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
 
 
437
 
438
 
439
  # 6. Topic Modeling Bubble Chart Slide
@@ -449,13 +450,17 @@ def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_ca
449
  else:
450
  slide = prs.slides.add_slide(chart_layout)
451
  slide.shapes.title.text = "Topic Word Weights (Chart Failed)"
452
- slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
 
 
453
 
454
  else:
455
  # Placeholder slide if topic modeling is not available
456
  slide = prs.slides.add_slide(chart_layout)
457
  slide.shapes.title.text = "Topic Modeling Results"
458
- slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
 
 
459
 
460
  # Save the presentation to an in-memory buffer
461
  pptx_buffer = BytesIO()
@@ -490,8 +495,8 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
490
  path=[px.Constant("All Entities"), 'category', 'label', 'text'],
491
  values='score',
492
  color='category',
493
- title="Entity Distribution by Category and Label",
494
- color_discrete_sequence=px.colors.qualitative.Dark24
495
  )
496
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
497
  treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
@@ -499,12 +504,12 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
499
  # 1b. Pie Chart
500
  grouped_counts = df['category'].value_counts().reset_index()
501
  grouped_counts.columns = ['Category', 'Count']
502
- fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.RdBu)
503
  fig_pie.update_layout(margin=dict(t=50, b=10))
504
  pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
505
 
506
  # 1c. Bar Chart (Category Count)
507
- fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.qualitative.Pastel)
508
  fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
509
  bar_category_html = fig_bar_category.to_html(full_html=False,include_plotlyjs='cdn')
510
 
@@ -515,7 +520,7 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
515
  bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
516
 
517
  if not repeating_entities.empty:
518
- fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Plasma)
519
  fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
520
  bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
521
 
@@ -524,7 +529,7 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
524
  network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
525
 
526
  # 1f. Topic Charts HTML
527
- topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>'
528
  if df_topic_data is not None and not df_topic_data.empty:
529
  bubble_figure = create_topic_word_bubbles(df_topic_data)
530
  if bubble_figure:
@@ -532,12 +537,13 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
532
  else:
533
  topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
534
  else:
535
- topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #FF69B4;">'
536
  topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
537
  topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
538
  topic_charts_html += '</div>'
539
 
540
  # 2. Get Highlighted Text
 
541
  highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
542
 
543
  # 3. Entity Tables (Pandas to HTML)
@@ -547,23 +553,24 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
547
  )
548
 
549
  # 4. Construct the Final HTML
 
550
  html_content = f"""<!DOCTYPE html><html lang="en"><head>
551
  <meta charset="UTF-8">
552
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
553
  <title>Entity and Topic Analysis Report</title>
554
  <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
555
  <style>
556
- body {{ font-family: 'Inter', sans-serif; margin: 0; padding: 20px; background-color: #f4f4f9; color: #333; }}
557
  .container {{ max-width: 1200px; margin: 0 auto; background-color: #ffffff; padding: 30px; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }}
558
- h1 {{ color: #FF69B4; border-bottom: 3px solid #FF69B4; padding-bottom: 10px; margin-top: 0; }}
559
- h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
560
  h3 {{ color: #555; margin-top: 20px; }}
561
- .metadata {{ background-color: #FFE4E1; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
562
  .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
563
  table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
564
  table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
565
  table th {{ background-color: #f0f0f0; }}
566
- .highlighted-text {{ border: 1px solid #FF69B4; padding: 15px; border-radius: 5px; background-color: #FFFAF0; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
567
  </style></head><body>
568
  <div class="container">
569
  <h1>Entity and Topic Analysis Report</h1>
@@ -581,13 +588,13 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
581
  <h2>3. Data Visualizations</h2>
582
  <h3>3.1 Entity Distribution Treemap</h3>
583
  <div class="chart-box">{treemap_html}</div>
584
- <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3>
585
  <div class="chart-box">{pie_html}</div>
586
  <div class="chart-box">{bar_category_html}</div>
587
  <div class="chart-box">{bar_freq_html}</div>
588
- <h3>3.3 Entity Co-occurrence Network (Edges = Same Sentence)</h3>
589
  <div class="chart-box">{network_html}</div>
590
- <h2>4. Topic Modeling (LDA on Entities)</h2>
591
  {topic_charts_html}
592
  </div></body></html>
593
  """
@@ -601,21 +608,21 @@ st.markdown(
601
  <style>
602
  /* Overall app container - NO SIDEBAR */
603
  .main {
604
- background-color: #FFF0F5; /* Blanched Almond/Light Pink */
605
  color: #333333; /* Dark grey text for contrast */
606
  }
607
  .stApp {
608
- background-color: #FFF0F5;
609
  }
610
  /* Text Area background and text color (input fields) */
611
  .stTextArea textarea {
612
- background-color: #FFFAF0; /* Floral White/Near white for input fields */
613
  color: #000000; /* Black text for input */
614
- border: 1px solid #FF69B4; /* Deep Pink border */
615
  }
616
  /* Button styling */
617
  .stButton > button {
618
- background-color: #FF69B4; /* Deep Pink for the button */
619
  color: #FFFFFF; /* White text for contrast */
620
  border: none;
621
  padding: 10px 20px;
@@ -623,21 +630,23 @@ st.markdown(
623
  transition: background-color 0.3s;
624
  }
625
  .stButton > button:hover {
626
- background-color: #E05C9E; /* Slightly darker pink on hover */
627
  }
628
  /* Expander header and content background */
629
  .streamlit-expanderHeader, .streamlit-expanderContent {
630
- background-color: #FFE4E1; /* Misty Rose/Lighter Pink */
631
  color: #333333;
632
  }
633
  </style>
634
  """,
635
  unsafe_allow_html=True)
636
- st.subheader("NER and Topic Analysis Report Generator", divider="rainbow")
637
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="secondary")
638
  expander = st.expander("**Important notes**")
639
  expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
640
- """)
 
 
641
  st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
642
 
643
  # --- Comet ML Setup (Placeholder/Conditional) ---
 
39
  # Set HF_HOME environment variable to a writable path
40
  os.environ['HF_HOME'] = '/tmp'
41
 
42
+ # --- Color Map for Highlighting and Network Graph Nodes (Monochrome Palette) ---
43
  entity_color_map = {
44
+ "person": "#444444", # Dark Gray
45
+ "username": "#666666", # Medium-Dark Gray
46
+ "hashtag": "#888888", # Medium Gray
47
+ "mention" : "#aaaaaa", # Medium-Light Gray
48
+ "organization": "#333333", # Very Dark Gray
49
+ "community": "#bbbbbb", # Light Gray
50
+ "position": "#555555", # Slightly Dark Gray
51
+ "location": "#777777", # Neutral Gray
52
+ "event": "#999999", # Silver
53
+ "product": "#cccccc", # Light Gray/Silver
54
+ "platform": "#222222", # Black-ish
55
+ "date": "#dddddd", # Very Light Gray
56
+ "media_type": "#333333", # Very Dark Gray
57
+ "url": "#666666", # Medium-Dark Gray
58
+ "nationality_religion": "#aaaaaa" # Medium-Light Gray
59
  }
60
 
61
  # --- Label Definitions and Category Mapping (Used by the App and PPTX) ---
 
96
  end = entity['end']
97
  label = entity['label']
98
  entity_text = entity['text']
99
+ # Use monochrome map
100
  color = entity_color_map.get(label, '#000000')
101
 
102
  # Create a span with background color and tooltip
 
104
  # Replace the original text segment with the highlighted HTML
105
  highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:]
106
 
107
+ # Use a div to mimic the Streamlit input box style for the report - now in monochrome
108
+ return f'<div style="border: 1px solid #AAAAAA; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>'
109
 
110
  def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10):
111
  """
 
163
  hover_name='word',
164
  size_max=80,
165
  title='Topic Word Weights (Bubble Chart)',
166
+ color_discrete_sequence=px.colors.sequential.Greys, # Using grayscale palette
167
  labels={
168
  'x_pos': 'Entity/Word Index',
169
  'weight': 'Word Weight',
 
177
  xaxis={'tickangle': -45, 'showgrid': False},
178
  yaxis={'showgrid': True},
179
  showlegend=True,
180
+ plot_bgcolor='#f9f9f9', # Neutral background
181
+ paper_bgcolor='#f9f9f9', # Neutral background
182
  height=600,
183
  margin=dict(t=50, b=100, l=50, r=10),
184
  )
 
254
  showlegend=False,
255
  marker=dict(
256
  size=unique_entities['frequency'] * 5 + 10,
257
+ color=[entity_color_map.get(label, '#cccccc') for label in unique_entities['label']], # Use monochrome map
258
  line_width=1,
259
  line_color='black',
260
  opacity=0.9
 
326
  # Layout 5: Title and Content (often good for charts)
327
  chart_layout = prs.slide_layouts[5]
328
 
329
+ # 1. Title Slide (Layout 0)
330
  title_slide_layout = prs.slide_layouts[0]
331
  slide = prs.slides.add_slide(title_slide_layout)
332
+ slide.shapes.title.text = "NER & Topic Analysis Report"
333
+
334
+ # FIX: Add safety check for placeholder index 1 (subtitle)
335
+ if len(slide.placeholders) > 1:
336
+ subtitle = slide.placeholders[1]
337
+ subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
338
+ # End FIX
339
 
340
  # 2. Source Text Slide
341
  slide = prs.slides.add_slide(chart_layout)
 
386
  cell.fill.solid()
387
  # Optional: Add simple styling to header
388
 
 
 
 
 
 
 
 
389
  # 4. Treemap Slide (Visualization)
390
  fig_treemap = px.treemap(
391
  df,
 
393
  values='score',
394
  color='category',
395
  title="Entity Distribution by Category and Label",
396
+ color_discrete_sequence=px.colors.sequential.Greys # Monochrome palette
397
  )
398
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
399
  treemap_image = fig_to_image_buffer(fig_treemap)
 
406
  # Placeholder if image conversion failed (e.g., Kaleido issue)
407
  slide = prs.slides.add_slide(chart_layout)
408
  slide.shapes.title.text = "Entity Distribution Treemap (Chart Failed)"
409
+ # FIX: Safety check for placeholder index 1
410
+ if len(slide.placeholders) > 1:
411
+ slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
412
 
413
 
414
  # 5. Entity Count Bar Chart Slide (Visualization)
 
420
  y='Count',
421
  color='Category',
422
  title='Total Entities per Category',
423
+ color_discrete_sequence=px.colors.sequential.Greys # Monochrome palette
424
  )
425
  fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
426
  bar_category_image = fig_to_image_buffer(fig_bar_category)
 
432
  else:
433
  slide = prs.slides.add_slide(chart_layout)
434
  slide.shapes.title.text = "Total Entities per Category (Chart Failed)"
435
+ # FIX: Safety check for placeholder index 1
436
+ if len(slide.placeholders) > 1:
437
+ slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
438
 
439
 
440
  # 6. Topic Modeling Bubble Chart Slide
 
450
  else:
451
  slide = prs.slides.add_slide(chart_layout)
452
  slide.shapes.title.text = "Topic Word Weights (Chart Failed)"
453
+ # FIX: Safety check for placeholder index 1
454
+ if len(slide.placeholders) > 1:
455
+ slide.placeholders[1].text = "Chart generation failed. Check app logs for Kaleido errors."
456
 
457
  else:
458
  # Placeholder slide if topic modeling is not available
459
  slide = prs.slides.add_slide(chart_layout)
460
  slide.shapes.title.text = "Topic Modeling Results"
461
+ # FIX: Safety check for placeholder index 1
462
+ if len(slide.placeholders) > 1:
463
+ slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
464
 
465
  # Save the presentation to an in-memory buffer
466
  pptx_buffer = BytesIO()
 
495
  path=[px.Constant("All Entities"), 'category', 'label', 'text'],
496
  values='score',
497
  color='category',
498
+
499
+ color_discrete_sequence=px.colors.sequential.Greys # Monochrome palette
500
  )
501
  fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
502
  treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
 
504
  # 1b. Pie Chart
505
  grouped_counts = df['category'].value_counts().reset_index()
506
  grouped_counts.columns = ['Category', 'Count']
507
+ fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=px.colors.sequential.Greys) # Monochrome palette
508
  fig_pie.update_layout(margin=dict(t=50, b=10))
509
  pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
510
 
511
  # 1c. Bar Chart (Category Count)
512
+ fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=px.colors.sequential.Greys) # Monochrome palette
513
  fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
514
  bar_category_html = fig_bar_category.to_html(full_html=False,include_plotlyjs='cdn')
515
 
 
520
  bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>'
521
 
522
  if not repeating_entities.empty:
523
+ fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Greys) # Monochrome palette
524
  fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100))
525
  bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
526
 
 
529
  network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
530
 
531
  # 1f. Topic Charts HTML
532
+ topic_charts_html = '<h3>Bubble size = word weight</h3>'
533
  if df_topic_data is not None and not df_topic_data.empty:
534
  bubble_figure = create_topic_word_bubbles(df_topic_data)
535
  if bubble_figure:
 
537
  else:
538
  topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>'
539
  else:
540
+ topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #AAAAAA;">'
541
  topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
542
  topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
543
  topic_charts_html += '</div>'
544
 
545
  # 2. Get Highlighted Text
546
+ # The div style is now monochrome/neutral (border: #AAAAAA, background: #FFFFFF)
547
  highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
548
 
549
  # 3. Entity Tables (Pandas to HTML)
 
553
  )
554
 
555
  # 4. Construct the Final HTML
556
+ # Updated CSS to remove all color/pink references
557
  html_content = f"""<!DOCTYPE html><html lang="en"><head>
558
  <meta charset="UTF-8">
559
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
560
  <title>Entity and Topic Analysis Report</title>
561
  <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
562
  <style>
563
+ body {{ font-family: 'Inter', sans-serif; margin: 0; padding: 20px; background-color: #f4f4f4; color: #333; }}
564
  .container {{ max-width: 1200px; margin: 0 auto; background-color: #ffffff; padding: 30px; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }}
565
+ h1 {{ color: #333333; border-bottom: 3px solid #666666; padding-bottom: 10px; margin-top: 0; }}
566
+ h2 {{ color: #555555; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }}
567
  h3 {{ color: #555; margin-top: 20px; }}
568
+ .metadata {{ background-color: #eeeeee; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
569
  .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
570
  table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
571
  table th, table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
572
  table th {{ background-color: #f0f0f0; }}
573
+ .highlighted-text {{ border: 1px solid #AAAAAA; padding: 15px; border-radius: 5px; background-color: #FFFFFF; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
574
  </style></head><body>
575
  <div class="container">
576
  <h1>Entity and Topic Analysis Report</h1>
 
588
  <h2>3. Data Visualizations</h2>
589
  <h3>3.1 Entity Distribution Treemap</h3>
590
  <div class="chart-box">{treemap_html}</div>
591
+ <h3>3.2 Comparative Charts</h3>
592
  <div class="chart-box">{pie_html}</div>
593
  <div class="chart-box">{bar_category_html}</div>
594
  <div class="chart-box">{bar_freq_html}</div>
595
+ <h3>3.3 Entity Relationship Map</h3>
596
  <div class="chart-box">{network_html}</div>
597
+ <h2>4. Topic Modelling</h2>
598
  {topic_charts_html}
599
  </div></body></html>
600
  """
 
608
  <style>
609
  /* Overall app container - NO SIDEBAR */
610
  .main {
611
+ background-color: #F8F8F8; /* Near White/Lightest Gray */
612
  color: #333333; /* Dark grey text for contrast */
613
  }
614
  .stApp {
615
+ background-color: #F8F8F8;
616
  }
617
  /* Text Area background and text color (input fields) */
618
  .stTextArea textarea {
619
+ background-color: #FFFFFF; /* Pure White for input fields */
620
  color: #000000; /* Black text for input */
621
+ border: 1px solid #AAAAAA; /* Gray border */
622
  }
623
  /* Button styling */
624
  .stButton > button {
625
+ background-color: #666666; /* Medium Gray for the button */
626
  color: #FFFFFF; /* White text for contrast */
627
  border: none;
628
  padding: 10px 20px;
 
630
  transition: background-color 0.3s;
631
  }
632
  .stButton > button:hover {
633
+ background-color: #444444; /* Darker Gray on hover */
634
  }
635
  /* Expander header and content background */
636
  .streamlit-expanderHeader, .streamlit-expanderContent {
637
+ background-color: #EEEEEE; /* Very Light Gray */
638
  color: #333333;
639
  }
640
  </style>
641
  """,
642
  unsafe_allow_html=True)
643
+ st.subheader("NER and Topic Analysis Report Generator", divider="gray") # Divider is now gray
644
  st.link_button("by nlpblogs", "https://nlpblogs.com", type="secondary")
645
  expander = st.expander("**Important notes**")
646
  expander.write(f"""**Named Entities:** This app predicts fifteen (15) labels: {', '.join(entity_color_map.keys())}.
647
+ **Dependencies:** Note that **PPTX** and **image export** require the Python libraries `python-pptx`, `plotly`, and `kaleido`. If charts in the PPTX are blank, please check your environment's $\text{kaleido}$ installation/permissions.
648
+ **Results:** Results are compiled into a single, comprehensive **HTML report**, a **PowerPoint (.pptx) file**, and a **CSV file** for easy download and sharing.
649
+ **How to Use:** Type or paste your text into the text area below, then press Ctrl + Enter. Click the 'Results' button to extract entities and generate the report.""")
650
  st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
651
 
652
  # --- Comet ML Setup (Placeholder/Conditional) ---