Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +38 -13
src/streamlit_app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
os.environ['HF_HOME'] = '/tmp'
|
| 4 |
import time
|
|
@@ -311,25 +310,36 @@ def generate_network_graph(df, raw_text):
|
|
| 311 |
def generate_html_report(df, text_input, elapsed_time, df_topic_data):
|
| 312 |
"""
|
| 313 |
Generates a full HTML report containing all analysis results and visualizations.
|
|
|
|
| 314 |
"""
|
| 315 |
|
| 316 |
# 1. Generate Visualizations (Plotly HTML)
|
| 317 |
|
| 318 |
-
# 1a. Treemap
|
| 319 |
-
fig_treemap = px.treemap(
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
|
| 322 |
-
|
|
|
|
| 323 |
grouped_counts = df['category'].value_counts().reset_index()
|
| 324 |
grouped_counts.columns = ['Category', 'Count']
|
| 325 |
fig_pie = px.pie(grouped_counts, values='Count', names='Category', title='Distribution of Entities by Category', color_discrete_sequence=px.colors.sequential.RdBu)
|
| 326 |
-
fig_pie.update_layout(margin=dict(t=50, b=10))
|
| 327 |
pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
|
| 328 |
-
|
|
|
|
| 329 |
fig_bar_category = px.bar(grouped_counts, x='Category', y='Count', color='Category', title='Total Entities per Category', color_discrete_sequence=px.colors.qualitative.Pastel)
|
| 330 |
-
fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10))
|
| 331 |
bar_category_html = fig_bar_category.to_html(full_html=False, include_plotlyjs='cdn')
|
| 332 |
-
|
|
|
|
| 333 |
word_counts = df['text'].value_counts().reset_index()
|
| 334 |
word_counts.columns = ['Entity', 'Count']
|
| 335 |
|
|
@@ -339,8 +349,9 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
|
|
| 339 |
|
| 340 |
if not repeating_entities.empty:
|
| 341 |
fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count', color='Entity', title='Top 10 Most Frequent Entities', color_discrete_sequence=px.colors.sequential.Plasma)
|
| 342 |
-
fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10))
|
| 343 |
bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
|
|
|
|
| 344 |
# 1e. Network Graph HTML - UPDATED to pass text_input
|
| 345 |
network_fig = generate_network_graph(df, text_input)
|
| 346 |
network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
|
|
@@ -359,8 +370,10 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
|
|
| 359 |
topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
|
| 360 |
topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
|
| 361 |
topic_charts_html += '</div>'
|
|
|
|
| 362 |
# 2. Get Highlighted Text
|
| 363 |
highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
|
|
|
|
| 364 |
# 3. Entity Tables (Pandas to HTML)
|
| 365 |
entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
|
| 366 |
classes='table table-striped',
|
|
@@ -586,7 +599,6 @@ if st.button("Results"):
|
|
| 586 |
)
|
| 587 |
|
| 588 |
if comet_initialized:
|
| 589 |
-
# FIX APPLIED HERE: Corrected indentation for the following lines
|
| 590 |
experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
|
| 591 |
experiment.log_parameter("input_text", text)
|
| 592 |
experiment.log_table("predicted_entities", df)
|
|
@@ -659,8 +671,15 @@ if st.session_state.show_results:
|
|
| 659 |
# TAB 2: Treemap
|
| 660 |
with tab_treemap_viz:
|
| 661 |
st.markdown("#### Treemap: Entity Distribution")
|
| 662 |
-
# Treemap
|
| 663 |
-
fig_treemap = px.treemap(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 664 |
fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
|
| 665 |
st.plotly_chart(fig_treemap, use_container_width=True)
|
| 666 |
|
|
@@ -746,3 +765,9 @@ if st.session_state.show_results:
|
|
| 746 |
file_name=f"entity_topic_report_{time.strftime('%Y%m%d_%H%M%S')}.html",
|
| 747 |
mime="text/html",
|
| 748 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
os.environ['HF_HOME'] = '/tmp'
|
| 3 |
import time
|
|
|
|
| 310 |
def generate_html_report(df, text_input, elapsed_time, df_topic_data):
|
| 311 |
"""
|
| 312 |
Generates a full HTML report containing all analysis results and visualizations.
|
| 313 |
+
FIXED: Treemap color (added color_continuous_scale) and chart overlap (set explicit heights).
|
| 314 |
"""
|
| 315 |
|
| 316 |
# 1. Generate Visualizations (Plotly HTML)
|
| 317 |
|
| 318 |
+
# 1a. Treemap - FIX: Added color_continuous_scale to ensure color renders in static HTML
|
| 319 |
+
fig_treemap = px.treemap(
|
| 320 |
+
df,
|
| 321 |
+
path=[px.Constant("All Entities"), 'category', 'label', 'text'],
|
| 322 |
+
values='score',
|
| 323 |
+
color='category',
|
| 324 |
+
title="Entity Distribution by Category and Label",
|
| 325 |
+
color_continuous_scale=px.colors.sequential.Agsunset # Force a color scale
|
| 326 |
+
)
|
| 327 |
+
fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25), height=500) # Added height for treemap
|
| 328 |
treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn')
|
| 329 |
+
|
| 330 |
+
# 1b. Pie Chart - FIX: Set explicit height to prevent overlap in the grid
|
| 331 |
grouped_counts = df['category'].value_counts().reset_index()
|
| 332 |
grouped_counts.columns = ['Category', 'Count']
|
| 333 |
fig_pie = px.pie(grouped_counts, values='Count', names='Category', title='Distribution of Entities by Category', color_discrete_sequence=px.colors.sequential.RdBu)
|
| 334 |
+
fig_pie.update_layout(margin=dict(t=50, b=10), height=400)
|
| 335 |
pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn')
|
| 336 |
+
|
| 337 |
+
# 1c. Bar Chart (Category Count) - FIX: Set explicit height
|
| 338 |
fig_bar_category = px.bar(grouped_counts, x='Category', y='Count', color='Category', title='Total Entities per Category', color_discrete_sequence=px.colors.qualitative.Pastel)
|
| 339 |
+
fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10), height=400)
|
| 340 |
bar_category_html = fig_bar_category.to_html(full_html=False, include_plotlyjs='cdn')
|
| 341 |
+
|
| 342 |
+
# 1d. Bar Chart (Most Frequent Entities) - FIX: Set explicit height
|
| 343 |
word_counts = df['text'].value_counts().reset_index()
|
| 344 |
word_counts.columns = ['Entity', 'Count']
|
| 345 |
|
|
|
|
| 349 |
|
| 350 |
if not repeating_entities.empty:
|
| 351 |
fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count', color='Entity', title='Top 10 Most Frequent Entities', color_discrete_sequence=px.colors.sequential.Plasma)
|
| 352 |
+
fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'}, margin=dict(t=50, b=10), height=400)
|
| 353 |
bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn')
|
| 354 |
+
|
| 355 |
# 1e. Network Graph HTML - UPDATED to pass text_input
|
| 356 |
network_fig = generate_network_graph(df, text_input)
|
| 357 |
network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn')
|
|
|
|
| 370 |
topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>'
|
| 371 |
topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>'
|
| 372 |
topic_charts_html += '</div>'
|
| 373 |
+
|
| 374 |
# 2. Get Highlighted Text
|
| 375 |
highlighted_text_html = highlight_entities(text_input, df).replace("div style", "div class='highlighted-text' style")
|
| 376 |
+
|
| 377 |
# 3. Entity Tables (Pandas to HTML)
|
| 378 |
entity_table_html = df[['text', 'label', 'score', 'start', 'end', 'category']].to_html(
|
| 379 |
classes='table table-striped',
|
|
|
|
| 599 |
)
|
| 600 |
|
| 601 |
if comet_initialized:
|
|
|
|
| 602 |
experiment = Experiment(api_key=COMET_API_KEY, workspace=COMET_WORKSPACE, project_name=COMET_PROJECT_NAME)
|
| 603 |
experiment.log_parameter("input_text", text)
|
| 604 |
experiment.log_table("predicted_entities", df)
|
|
|
|
| 671 |
# TAB 2: Treemap
|
| 672 |
with tab_treemap_viz:
|
| 673 |
st.markdown("#### Treemap: Entity Distribution")
|
| 674 |
+
# Treemap (Uses the corrected color in the report generation function)
|
| 675 |
+
fig_treemap = px.treemap(
|
| 676 |
+
df,
|
| 677 |
+
path=[px.Constant("All Entities"), 'category', 'label', 'text'],
|
| 678 |
+
values='score',
|
| 679 |
+
color='category',
|
| 680 |
+
title="Entity Distribution by Category and Label",
|
| 681 |
+
color_continuous_scale=px.colors.sequential.Agsunset # Added color scale here for Streamlit preview too
|
| 682 |
+
)
|
| 683 |
fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
|
| 684 |
st.plotly_chart(fig_treemap, use_container_width=True)
|
| 685 |
|
|
|
|
| 765 |
file_name=f"entity_topic_report_{time.strftime('%Y%m%d_%H%M%S')}.html",
|
| 766 |
mime="text/html",
|
| 767 |
)
|
| 768 |
+
|
| 769 |
+
|
| 770 |
+
|
| 771 |
+
|
| 772 |
+
|
| 773 |
+
|