AIEcosystem commited on
Commit
b04eb29
·
verified ·
1 Parent(s): 72681a3

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +38 -13
src/streamlit_app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  os.environ['HF_HOME'] = '/tmp'
4
  import time
@@ -311,25 +310,36 @@ def generate_network_graph(df, raw_text):
311
  def generate_html_report(df, text_input, elapsed_time, df_topic_data):
312
  """
313
  Generates a full HTML report containing all analysis results and visualizations.
 
314
  """
315
 
316
  # 1. Generate Visualizations (Plotly HTML)
317
 
318
- # 1a. Treemap
319
- fig_treemap = px.treemap(df, path=[px.Constant("All Entities"), 'category', 'label', 'text'], values='score', color='category', title="Entity Distribution by Category and Label")
320
- fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
 
 
 
 
 
 
 
321
  treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
322
- # 1b. Pie Chart
 
323
  grouped_counts = df['category'].value_counts().reset_index()
324
  grouped_counts.columns = ['Category', 'Count']
325
  fig_pie = px.pie(grouped_counts, values='Count', names='Category', title='Distribution of Entities by Category', color_discrete_sequence=px.colors.sequential.RdBu)
326
- fig_pie.update_layout(margin=dict(t=50, b=10))
327
  pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
328
- # 1c. Bar Chart (Category Count)
 
329
  fig_bar_category = px.bar(grouped_counts, x='Category', y='Count', color='Category', title='Total Entities per Category', color_discrete_sequence=px.colors.qualitative.Pastel)
330
- fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10))
331
  bar_category_html = fig_bar_category.to_html(full_html=False, include_plotlyjs='cdn')
332
- # 1d. Bar Chart (Most Frequent Entities)
 
333
  word_counts = df['text'].value_counts().reset_index()
334
  word_counts.columns = ['Entity', 'Count']
335
 
@@ -339,8 +349,9 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
339
 
340
  if not repeating_entities.empty:
341
  fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count', color='Entity', title='Top 10 Most Frequent Entities', color_discrete_sequence=px.colors.sequential.Plasma)
342
- fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10))
343
  bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
 
344
  # 1e. Network Graph HTML - UPDATED to pass text_input
345
  network_fig = generate_network_graph(df, text_input)
346
  network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
@@ -359,8 +370,10 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
359
  topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
360
  topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
361
  topic_charts_html += '</div>'
 
362
  # 2. Get Highlighted Text
363
  highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
 
364
  # 3. Entity Tables (Pandas to HTML)
365
  entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
366
  classes='table table-striped',
@@ -586,7 +599,6 @@ if st.button("Results"):
586
  )
587
 
588
  if comet_initialized:
589
- # FIX APPLIED HERE: Corrected indentation for the following lines
590
  experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
591
  experiment.log_parameter("input_text", text)
592
  experiment.log_table("predicted_entities", df)
@@ -659,8 +671,15 @@ if st.session_state.show_results:
659
  # TAB 2: Treemap
660
  with tab_treemap_viz:
661
  st.markdown("#### Treemap: Entity Distribution")
662
- # Treemap
663
- fig_treemap = px.treemap(df, path=[px.Constant("All Entities"), 'category', 'label', 'text'], values='score', color='category', title="Entity Distribution by Category and Label")
 
 
 
 
 
 
 
664
  fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
665
  st.plotly_chart(fig_treemap, use_container_width=True)
666
 
@@ -746,3 +765,9 @@ if st.session_state.show_results:
746
  file_name=f"entity_topic_report_{time.strftime('%Y%m%d_%H%M%S')}.html",
747
  mime="text/html",
748
  )
 
 
 
 
 
 
 
 
1
  import os
2
  os.environ['HF_HOME'] = '/tmp'
3
  import time
 
310
  def generate_html_report(df, text_input, elapsed_time, df_topic_data):
311
  """
312
  Generates a full HTML report containing all analysis results and visualizations.
313
+ FIXED: Treemap color (added color_continuous_scale) and chart overlap (set explicit heights).
314
  """
315
 
316
  # 1. Generate Visualizations (Plotly HTML)
317
 
318
+ # 1a. Treemap - FIX: Added color_continuous_scale to ensure color renders in static HTML
319
+ fig_treemap = px.treemap(
320
+ df,
321
+ path=[px.Constant("All Entities"), 'category', 'label', 'text'],
322
+ values='score',
323
+ color='category',
324
+ title="Entity Distribution by Category and Label",
325
+ color_continuous_scale=px.colors.sequential.Agsunset # Force a color scale
326
+ )
327
+ fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), height=500) # Added height for treemap
328
  treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
329
+
330
+ # 1b. Pie Chart - FIX: Set explicit height to prevent overlap in the grid
331
  grouped_counts = df['category'].value_counts().reset_index()
332
  grouped_counts.columns = ['Category', 'Count']
333
  fig_pie = px.pie(grouped_counts, values='Count', names='Category', title='Distribution of Entities by Category', color_discrete_sequence=px.colors.sequential.RdBu)
334
+ fig_pie.update_layout(margin=dict(t=50, b=10), height=400)
335
  pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
336
+
337
+ # 1c. Bar Chart (Category Count) - FIX: Set explicit height
338
  fig_bar_category = px.bar(grouped_counts, x='Category', y='Count', color='Category', title='Total Entities per Category', color_discrete_sequence=px.colors.qualitative.Pastel)
339
+ fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10), height=400)
340
  bar_category_html = fig_bar_category.to_html(full_html=False, include_plotlyjs='cdn')
341
+
342
+ # 1d. Bar Chart (Most Frequent Entities) - FIX: Set explicit height
343
  word_counts = df['text'].value_counts().reset_index()
344
  word_counts.columns = ['Entity', 'Count']
345
 
 
349
 
350
  if not repeating_entities.empty:
351
  fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count', color='Entity', title='Top 10 Most Frequent Entities', color_discrete_sequence=px.colors.sequential.Plasma)
352
+ fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10), height=400)
353
  bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
354
+
355
  # 1e. Network Graph HTML - UPDATED to pass text_input
356
  network_fig = generate_network_graph(df, text_input)
357
  network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
 
370
  topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
371
  topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
372
  topic_charts_html += '</div>'
373
+
374
  # 2. Get Highlighted Text
375
  highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
376
+
377
  # 3. Entity Tables (Pandas to HTML)
378
  entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
379
  classes='table table-striped',
 
599
  )
600
 
601
  if comet_initialized:
 
602
  experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
603
  experiment.log_parameter("input_text", text)
604
  experiment.log_table("predicted_entities", df)
 
671
  # TAB 2: Treemap
672
  with tab_treemap_viz:
673
  st.markdown("#### Treemap: Entity Distribution")
674
+ # Treemap (Uses the corrected color in the report generation function)
675
+ fig_treemap = px.treemap(
676
+ df,
677
+ path=[px.Constant("All Entities"), 'category', 'label', 'text'],
678
+ values='score',
679
+ color='category',
680
+ title="Entity Distribution by Category and Label",
681
+ color_continuous_scale=px.colors.sequential.Agsunset # Added color scale here for Streamlit preview too
682
+ )
683
  fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
684
  st.plotly_chart(fig_treemap, use_container_width=True)
685
 
 
765
  file_name=f"entity_topic_report_{time.strftime('%Y%m%d_%H%M%S')}.html",
766
  mime="text/html",
767
  )
768
+
769
+
770
+
771
+
772
+
773
+