Spaces:
Runtime error
Runtime error
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +22 -20
src/streamlit_app.py
CHANGED
|
@@ -263,7 +263,8 @@ def generate_entity_csv(df):
|
|
| 263 |
# --- HTML REPORT GENERATION FUNCTION ---
|
| 264 |
def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_color_map, report_title="Entity and Topic Analysis Report", branding_html=""):
|
| 265 |
"""
|
| 266 |
-
Generates a full HTML report containing all analysis results and visualizations
|
|
|
|
| 267 |
"""
|
| 268 |
# 1. Generate Visualizations (Plotly HTML)
|
| 269 |
|
|
@@ -324,10 +325,17 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
|
|
| 324 |
highlighted_text_html = highlight_entities(text_input, df, entity_color_map).replace("div style", "div class='highlighted-text' style")
|
| 325 |
|
| 326 |
# 3. Entity Tables (Pandas to HTML)
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
)
|
|
|
|
| 331 |
|
| 332 |
# 4. Construct the Final HTML
|
| 333 |
html_content = f"""<!DOCTYPE html><html lang="en"><head>
|
|
@@ -344,8 +352,9 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_col
|
|
| 344 |
.metadata {{ background-color: #e6f0ff; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
|
| 345 |
.chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
|
| 346 |
table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
|
| 347 |
-
|
| 348 |
-
table
|
|
|
|
| 349 |
.highlighted-text {{ border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
|
| 350 |
</style>
|
| 351 |
</head>
|
|
@@ -518,7 +527,7 @@ DEFAULT_TEXT = (
|
|
| 518 |
"space capabilities within the **European Union**. The core team, including lead engineer Marcus Davies, will hold "
|
| 519 |
"their first collaborative workshop in Berlin, Germany, on August 15th. The community response on social "
|
| 520 |
"media platform X (under the username @TechCEO) was overwhelmingly positive, with many major tech "
|
| 521 |
-
"publications, including Wired Magazine, predicting a major impact on the space technology industry by the "
|
| 522 |
"end of the year, further strengthening the technological standing of the **European Union**. The platform is designed to be compatible with both Windows and Linux operating systems. "
|
| 523 |
"The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
|
| 524 |
"are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
|
|
@@ -565,7 +574,9 @@ custom_labels_text = st.text_area(
|
|
| 565 |
"**Optional:** Enter your own comma-separated entity labels here (e.g., `product, symptom, client_id`). Leave blank for default labels.",
|
| 566 |
height=60,
|
| 567 |
key='custom_labels_input',
|
| 568 |
-
|
|
|
|
|
|
|
| 569 |
)
|
| 570 |
|
| 571 |
col_results, col_clear = st.columns([1, 1])
|
|
@@ -681,14 +692,7 @@ if st.session_state.show_results:
|
|
| 681 |
st.markdown("### 2. Detailed Entity Analysis")
|
| 682 |
tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
|
| 683 |
|
| 684 |
-
|
| 685 |
-
unique_categories = ["User Defined Entities"]
|
| 686 |
-
tabs_to_show = df['label'].unique().tolist()
|
| 687 |
-
st.markdown(f"**Custom Labels Detected: {', '.join(tabs_to_show)}**")
|
| 688 |
-
else:
|
| 689 |
-
unique_categories = list(FIXED_CATEGORY_MAPPING.keys())
|
| 690 |
-
|
| 691 |
-
# --- Function to Apply Conditional Coloring to Scores ---
|
| 692 |
def color_score_gradient(df_input):
|
| 693 |
"""Applies a color gradient to the 'score' column using Pandas Styler."""
|
| 694 |
return df_input.style.background_gradient(
|
|
@@ -712,6 +716,7 @@ if st.session_state.show_results:
|
|
| 712 |
st.markdown(f"##### {label.capitalize()} Entities ({len(df_label)} total)")
|
| 713 |
st.dataframe(styled_df_label, use_container_width=True)
|
| 714 |
else:
|
|
|
|
| 715 |
tabs_category = st.tabs(unique_categories)
|
| 716 |
|
| 717 |
for category, tab in zip(unique_categories, tabs_category):
|
|
@@ -841,11 +846,9 @@ if st.session_state.show_results:
|
|
| 841 |
|
| 842 |
df_topic_data = st.session_state.topic_results
|
| 843 |
|
| 844 |
-
# --- CRITICAL: This is the conditional block that must have correct indentation ---
|
| 845 |
if df_topic_data is not None and not df_topic_data.empty:
|
| 846 |
st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
|
| 847 |
st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
|
| 848 |
-
# END CRITICAL BLOCK
|
| 849 |
else:
|
| 850 |
st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
|
| 851 |
|
|
@@ -900,5 +903,4 @@ if st.session_state.show_results:
|
|
| 900 |
file_name="ner_topic_full_report.html",
|
| 901 |
mime="text/html",
|
| 902 |
use_container_width=True
|
| 903 |
-
)
|
| 904 |
-
|
|
|
|
| 263 |
# --- HTML REPORT GENERATION FUNCTION ---
|
| 264 |
def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_color_map, report_title="Entity and Topic Analysis Report", branding_html=""):
|
| 265 |
"""
|
| 266 |
+
Generates a full HTML report containing all analysis results and visualizations,
|
| 267 |
+
including color gradient styling for the score column in the main table.
|
| 268 |
"""
|
| 269 |
# 1. Generate Visualizations (Plotly HTML)
|
| 270 |
|
|
|
|
| 325 |
highlighted_text_html = highlight_entities(text_input, df, entity_color_map).replace("div style", "div class='highlighted-text' style")
|
| 326 |
|
| 327 |
# 3. Entity Tables (Pandas to HTML)
|
| 328 |
+
# --- FIX: Apply color gradient styling to the DataFrame BEFORE converting to HTML ---
|
| 329 |
+
styled_df = df[['text', 'label', 'score', 'start', 'end', 'category']].style.background_gradient(
|
| 330 |
+
cmap='YlGnBu',
|
| 331 |
+
subset=['score']
|
| 332 |
+
).format({'score': '{:.4f}'})
|
| 333 |
+
|
| 334 |
+
entity_table_html = styled_df.to_html(
|
| 335 |
+
classes='table table-striped',
|
| 336 |
+
index=False,
|
| 337 |
)
|
| 338 |
+
# --- END FIX ---
|
| 339 |
|
| 340 |
# 4. Construct the Final HTML
|
| 341 |
html_content = f"""<!DOCTYPE html><html lang="en"><head>
|
|
|
|
| 352 |
.metadata {{ background-color: #e6f0ff; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }}
|
| 353 |
.chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }}
|
| 354 |
table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }}
|
| 355 |
+
/* Target the cells generated by pandas styling */
|
| 356 |
+
table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
|
| 357 |
+
table th {{ border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f0f0f0; }}
|
| 358 |
.highlighted-text {{ border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }}
|
| 359 |
</style>
|
| 360 |
</head>
|
|
|
|
| 527 |
"space capabilities within the **European Union**. The core team, including lead engineer Marcus Davies, will hold "
|
| 528 |
"their first collaborative workshop in Berlin, Germany, on August 15th. The community response on social "
|
| 529 |
"media platform X (under the username @TechCEO) was overwhelmingly positive, with many major tech "
|
| 530 |
+
"publications, including Wired Magazine, predicting a major impact on the space technology industry by the "
|
| 531 |
"end of the year, further strengthening the technological standing of the **European Union**. The platform is designed to be compatible with both Windows and Linux operating systems. "
|
| 532 |
"The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley "
|
| 533 |
"are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the "
|
|
|
|
| 574 |
"**Optional:** Enter your own comma-separated entity labels here (e.g., `product, symptom, client_id`). Leave blank for default labels.",
|
| 575 |
height=60,
|
| 576 |
key='custom_labels_input',
|
| 577 |
+
# --- UPDATED PLACEHOLDER ENTITIES ---
|
| 578 |
+
placeholder="e.g., product_name, medical_symptom, customer_id, location_code, internal_project"
|
| 579 |
+
# --- END UPDATE ---
|
| 580 |
)
|
| 581 |
|
| 582 |
col_results, col_clear = st.columns([1, 1])
|
|
|
|
| 692 |
st.markdown("### 2. Detailed Entity Analysis")
|
| 693 |
tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
|
| 694 |
|
| 695 |
+
# --- Function to Apply Conditional Coloring to Scores (For Streamlit UI only) ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
def color_score_gradient(df_input):
|
| 697 |
"""Applies a color gradient to the 'score' column using Pandas Styler."""
|
| 698 |
return df_input.style.background_gradient(
|
|
|
|
| 716 |
st.markdown(f"##### {label.capitalize()} Entities ({len(df_label)} total)")
|
| 717 |
st.dataframe(styled_df_label, use_container_width=True)
|
| 718 |
else:
|
| 719 |
+
unique_categories = list(FIXED_CATEGORY_MAPPING.keys())
|
| 720 |
tabs_category = st.tabs(unique_categories)
|
| 721 |
|
| 722 |
for category, tab in zip(unique_categories, tabs_category):
|
|
|
|
| 846 |
|
| 847 |
df_topic_data = st.session_state.topic_results
|
| 848 |
|
|
|
|
| 849 |
if df_topic_data is not None and not df_topic_data.empty:
|
| 850 |
st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True)
|
| 851 |
st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.")
|
|
|
|
| 852 |
else:
|
| 853 |
st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.")
|
| 854 |
|
|
|
|
| 903 |
file_name="ner_topic_full_report.html",
|
| 904 |
mime="text/html",
|
| 905 |
use_container_width=True
|
| 906 |
+
)
|
|
|