Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +73 -196
src/streamlit_app.py
CHANGED
|
@@ -42,30 +42,34 @@ os.environ['HF_HOME'] = '/tmp'
|
|
| 42 |
# --- Color Map for Highlighting and Network Graph Nodes ---
|
| 43 |
entity_color_map = {
|
| 44 |
"person": "#10b981",
|
| 45 |
-
"
|
| 46 |
-
"
|
| 47 |
-
|
| 48 |
"organization": "#f59e0b",
|
| 49 |
-
"
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
-
|
| 55 |
-
"date": "#6366f1",
|
| 56 |
-
"media_type": "#14b8a6",
|
| 57 |
-
"url": "#60a5fa",
|
| 58 |
-
"nationality_religion": "#fb7185"
|
| 59 |
}
|
| 60 |
|
| 61 |
# --- Label Definitions and Category Mapping (Used by the App and PPTX) ---
|
| 62 |
labels = list(entity_color_map.keys())
|
|
|
|
|
|
|
|
|
|
| 63 |
category_mapping = {
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
|
| 70 |
|
| 71 |
|
|
@@ -293,156 +297,11 @@ def generate_network_graph(df, raw_text):
|
|
| 293 |
return fig
|
| 294 |
|
| 295 |
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
def fig_to_image_buffer(fig):
|
| 299 |
-
"""
|
| 300 |
-
Converts a Plotly figure object into a BytesIO buffer containing PNG data.
|
| 301 |
-
Requires 'kaleido' to be installed for image export.
|
| 302 |
-
Returns None if export fails.
|
| 303 |
-
"""
|
| 304 |
-
try:
|
| 305 |
-
# Use pio.to_image to convert the figure to a PNG byte array
|
| 306 |
-
img_bytes = pio.to_image(fig, format="png", width=900, height=500, scale=2)
|
| 307 |
-
img_buffer = BytesIO(img_bytes)
|
| 308 |
-
return img_buffer
|
| 309 |
-
except Exception as e:
|
| 310 |
-
# In a Streamlit environment, we can't show this error directly in the app execution flow
|
| 311 |
-
print(f"Error converting Plotly figure to image: {e}")
|
| 312 |
-
return None
|
| 313 |
-
|
| 314 |
-
# --- PPTX GENERATION FUNCTION (Integrated and Adapted) ---
|
| 315 |
-
|
| 316 |
-
def generate_pptx_report(df, text_input, elapsed_time, df_topic_data, reverse_category_mapping):
|
| 317 |
-
"""
|
| 318 |
-
Generates a PowerPoint presentation (.pptx) file containing key analysis results.
|
| 319 |
-
Returns the file content as a BytesIO buffer.
|
| 320 |
-
"""
|
| 321 |
-
prs = Presentation()
|
| 322 |
-
# Layout 5: Title and Content (often good for charts)
|
| 323 |
-
chart_layout = prs.slide_layouts[5]
|
| 324 |
-
|
| 325 |
-
# 1. Title Slide
|
| 326 |
-
title_slide_layout = prs.slide_layouts[0]
|
| 327 |
-
slide = prs.slides.add_slide(title_slide_layout)
|
| 328 |
-
title = slide.shapes.title
|
| 329 |
-
subtitle = slide.placeholders[1]
|
| 330 |
-
title.text = "NER & Topic Analysis Report"
|
| 331 |
-
subtitle.text = f"Source Text Analysis\nGenerated: {time.strftime('%Y-%m-%d %H:%M:%S')}\nProcessing Time: {elapsed_time:.2f} seconds"
|
| 332 |
-
|
| 333 |
-
# 2. Source Text Slide
|
| 334 |
-
slide = prs.slides.add_slide(chart_layout)
|
| 335 |
-
slide.shapes.title.text = "Analyzed Source Text"
|
| 336 |
-
|
| 337 |
-
# Add the raw text to a text box
|
| 338 |
-
left = Inches(0.5)
|
| 339 |
-
top = Inches(1.5)
|
| 340 |
-
width = Inches(9.0)
|
| 341 |
-
height = Inches(5.0)
|
| 342 |
-
txBox = slide.shapes.add_textbox(left, top, width, height)
|
| 343 |
-
tf = txBox.text_frame
|
| 344 |
-
tf.margin_top = Inches(0.1)
|
| 345 |
-
tf.margin_bottom = Inches(0.1)
|
| 346 |
-
tf.word_wrap = True
|
| 347 |
-
p = tf.add_paragraph()
|
| 348 |
-
p.text = text_input
|
| 349 |
-
p.font.size = Pt(14)
|
| 350 |
-
p.font.name = 'Arial'
|
| 351 |
-
|
| 352 |
-
# 3. Entity Summary Slide (Table)
|
| 353 |
-
slide = prs.slides.add_slide(chart_layout)
|
| 354 |
-
slide.shapes.title.text = "Entity Summary (Count by Category and Label)"
|
| 355 |
-
|
| 356 |
-
# Create the summary table using the app's established logic
|
| 357 |
-
grouped_entity_table = df['label'].value_counts().reset_index()
|
| 358 |
-
grouped_entity_table.columns = ['Entity Label', 'Count']
|
| 359 |
-
grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(
|
| 360 |
-
lambda x: reverse_category_mapping.get(x, 'Other')
|
| 361 |
-
)
|
| 362 |
-
grouped_entity_table = grouped_entity_table[['Category', 'Entity Label', 'Count']]
|
| 363 |
-
|
| 364 |
-
# Simple way to insert a table:
|
| 365 |
-
rows, cols = grouped_entity_table.shape
|
| 366 |
-
x, y, cx, cy = Inches(1), Inches(1.5), Inches(8), Inches(4.5)
|
| 367 |
-
# Add 1 row for the header
|
| 368 |
-
table = slide.shapes.add_table(rows + 1, cols, x, y, cx, cy).table
|
| 369 |
-
|
| 370 |
-
# Set column widths
|
| 371 |
-
table.columns[0].width = Inches(2.7)
|
| 372 |
-
table.columns[1].width = Inches(2.8)
|
| 373 |
-
table.columns[2].width = Inches(2.5)
|
| 374 |
-
|
| 375 |
-
# Set column headers
|
| 376 |
-
for i, col in enumerate(grouped_entity_table.columns):
|
| 377 |
-
cell = table.cell(0, i)
|
| 378 |
-
cell.text = col
|
| 379 |
-
cell.fill.solid()
|
| 380 |
-
# Optional: Add simple styling to header
|
| 381 |
-
|
| 382 |
-
# Fill in the data
|
| 383 |
-
for i in range(rows):
|
| 384 |
-
for j in range(cols):
|
| 385 |
-
cell = table.cell(i+1, j)
|
| 386 |
-
cell.text = str(grouped_entity_table.iloc[i, j])
|
| 387 |
-
# Optional: Style data cells
|
| 388 |
-
|
| 389 |
-
# 4. Treemap Slide (Visualization)
|
| 390 |
-
fig_treemap = px.treemap(
|
| 391 |
-
df,
|
| 392 |
-
path=[px.Constant("All Entities"), 'category', 'label', 'text'],
|
| 393 |
-
values='score',
|
| 394 |
-
color='category',
|
| 395 |
-
title="Entity Distribution by Category and Label",
|
| 396 |
-
color_discrete_sequence=px.colors.qualitative.Dark24
|
| 397 |
-
)
|
| 398 |
-
fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25))
|
| 399 |
-
treemap_image = fig_to_image_buffer(fig_treemap)
|
| 400 |
-
|
| 401 |
-
if treemap_image:
|
| 402 |
-
slide = prs.slides.add_slide(chart_layout)
|
| 403 |
-
slide.shapes.title.text = "Entity Distribution Treemap"
|
| 404 |
-
slide.shapes.add_picture(treemap_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
|
| 405 |
-
|
| 406 |
-
# 5. Entity Count Bar Chart Slide (Visualization)
|
| 407 |
-
grouped_counts = df['category'].value_counts().reset_index()
|
| 408 |
-
grouped_counts.columns = ['Category', 'Count']
|
| 409 |
-
fig_bar_category = px.bar(
|
| 410 |
-
grouped_counts,
|
| 411 |
-
x='Category',
|
| 412 |
-
y='Count',
|
| 413 |
-
color='Category',
|
| 414 |
-
title='Total Entities per Category',
|
| 415 |
-
color_discrete_sequence=px.colors.qualitative.Pastel
|
| 416 |
-
)
|
| 417 |
-
fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'})
|
| 418 |
-
bar_category_image = fig_to_image_buffer(fig_bar_category)
|
| 419 |
-
|
| 420 |
-
if bar_category_image:
|
| 421 |
-
slide = prs.slides.add_slide(chart_layout)
|
| 422 |
-
slide.shapes.title.text = "Total Entities per Category"
|
| 423 |
-
slide.shapes.add_picture(bar_category_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
|
| 424 |
|
| 425 |
-
|
| 426 |
-
if df_topic_data is not None and not df_topic_data.empty:
|
| 427 |
-
# Ensure data frame is in the format expected by create_topic_word_bubbles
|
| 428 |
-
df_topic_data_pptx = df_topic_data.rename(columns={'Topic_ID': 'topic', 'Word': 'word', 'Weight': 'weight'})
|
| 429 |
-
bubble_figure = create_topic_word_bubbles(df_topic_data_pptx)
|
| 430 |
-
bubble_image = fig_to_image_buffer(bubble_figure)
|
| 431 |
-
if bubble_image:
|
| 432 |
-
slide = prs.slides.add_slide(chart_layout)
|
| 433 |
-
slide.shapes.title.text = "Topic Word Weights (Bubble Chart)"
|
| 434 |
-
slide.shapes.add_picture(bubble_image, Inches(0.75), Inches(1.5), width=Inches(8.5))
|
| 435 |
-
else:
|
| 436 |
-
# Placeholder slide if topic modeling is not available
|
| 437 |
-
slide = prs.slides.add_slide(chart_layout)
|
| 438 |
-
slide.shapes.title.text = "Topic Modeling Results"
|
| 439 |
-
slide.placeholders[1].text = "Topic Modeling requires more unique input (at least two unique entities)."
|
| 440 |
|
| 441 |
-
|
| 442 |
-
pptx_buffer = BytesIO()
|
| 443 |
-
prs.save(pptx_buffer)
|
| 444 |
-
pptx_buffer.seek(0)
|
| 445 |
-
return pptx_buffer
|
| 446 |
|
| 447 |
# --- NEW CSV GENERATION FUNCTION ---
|
| 448 |
def generate_entity_csv(df):
|
|
@@ -568,9 +427,9 @@ def generate_html_report(df, text_input, elapsed_time, df_topic_data):
|
|
| 568 |
<div class="chart-box">{pie_html}</div>
|
| 569 |
<div class="chart-box">{bar_category_html}</div>
|
| 570 |
<div class="chart-box">{bar_freq_html}</div>
|
| 571 |
-
<h3>3.3 Entity
|
| 572 |
<div class="chart-box">{network_html}</div>
|
| 573 |
-
<h2>4. Topic
|
| 574 |
{topic_charts_html}
|
| 575 |
</div></body></html>
|
| 576 |
"""
|
|
@@ -612,13 +471,24 @@ st.markdown(
|
|
| 612 |
</style>
|
| 613 |
""",
|
| 614 |
unsafe_allow_html=True)
|
| 615 |
-
st.subheader("
|
| 616 |
st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 617 |
expander = st.expander("**Important notes**")
|
| 618 |
-
expander.write(
|
| 619 |
-
|
| 620 |
-
**Results:** Results are compiled into a single, comprehensive **HTML report
|
| 621 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
|
| 623 |
|
| 624 |
# --- Comet ML Setup (Placeholder/Conditional) ---
|
|
@@ -753,20 +623,23 @@ if st.session_state.show_results:
|
|
| 753 |
st.markdown("### 1. Analyzed Text with Highlighted Entities")
|
| 754 |
st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
|
| 755 |
|
| 756 |
-
|
| 757 |
-
st.markdown("### 2. Entity Summary Table (Count by Label)")
|
| 758 |
-
grouped_entity_table = df['label'].value_counts().reset_index()
|
| 759 |
-
grouped_entity_table.columns = ['Entity Label', 'Count']
|
| 760 |
-
grouped_entity_table['Category'] = grouped_entity_table['Entity Label'].map(reverse_category_mapping)
|
| 761 |
-
st.dataframe(grouped_entity_table[['Category', 'Entity Label', 'Count']], use_container_width=True)
|
| 762 |
-
st.markdown("---")
|
| 763 |
|
| 764 |
-
#
|
| 765 |
-
st.markdown("###
|
| 766 |
tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
|
| 767 |
|
| 768 |
with tab_category_details:
|
| 769 |
st.markdown("#### Detailed Entities Table (Grouped by Category)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 770 |
unique_categories = list(category_mapping.keys())
|
| 771 |
tabs_category = st.tabs(unique_categories)
|
| 772 |
for category, tab in zip(unique_categories, tabs_category):
|
|
@@ -795,9 +668,9 @@ if st.session_state.show_results:
|
|
| 795 |
fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
|
| 796 |
st.plotly_chart(fig_treemap, use_container_width=True)
|
| 797 |
|
| 798 |
-
#
|
| 799 |
st.markdown("---")
|
| 800 |
-
st.markdown("###
|
| 801 |
|
| 802 |
col1, col2, col3 = st.columns(3)
|
| 803 |
|
|
@@ -826,12 +699,12 @@ if st.session_state.show_results:
|
|
| 826 |
st.info("No entities repeat for frequency chart.")
|
| 827 |
|
| 828 |
st.markdown("---")
|
| 829 |
-
st.markdown("###
|
| 830 |
network_fig = generate_network_graph(df, st.session_state.last_text)
|
| 831 |
st.plotly_chart(network_fig, use_container_width=True)
|
| 832 |
|
| 833 |
st.markdown("---")
|
| 834 |
-
st.markdown("###
|
| 835 |
|
| 836 |
if df_topic_data is not None and not df_topic_data.empty:
|
| 837 |
bubble_figure = create_topic_word_bubbles(df_topic_data)
|
|
@@ -856,17 +729,9 @@ if st.session_state.show_results:
|
|
| 856 |
type="primary"
|
| 857 |
)
|
| 858 |
|
| 859 |
-
|
| 860 |
-
pptx_buffer = generate_pptx_report(df, st.session_state.last_text, st.session_state.elapsed_time, df_topic_data, reverse_category_mapping)
|
| 861 |
-
st.download_button(
|
| 862 |
-
label="Download Presentation Slides (.pptx)",
|
| 863 |
-
data=pptx_buffer,
|
| 864 |
-
file_name="ner_topic_report.pptx",
|
| 865 |
-
mime="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
| 866 |
-
type="primary"
|
| 867 |
-
)
|
| 868 |
|
| 869 |
-
#
|
| 870 |
csv_buffer = generate_entity_csv(df)
|
| 871 |
st.download_button(
|
| 872 |
label="Download Extracted Entities (CSV)",
|
|
@@ -875,4 +740,16 @@ if st.session_state.show_results:
|
|
| 875 |
mime="text/csv",
|
| 876 |
type="secondary"
|
| 877 |
)
|
| 878 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# --- Color Map for Highlighting and Network Graph Nodes ---
|
| 43 |
entity_color_map = {
|
| 44 |
"person": "#10b981",
|
| 45 |
+
"country": "#3b82f6",
|
| 46 |
+
"city": "#4ade80",
|
| 47 |
+
|
| 48 |
"organization": "#f59e0b",
|
| 49 |
+
"date": "#8b5cf6",
|
| 50 |
+
"time": "#ec4899",
|
| 51 |
+
"cardinal": "#06b6d4",
|
| 52 |
+
"money": "#f43f5e",
|
| 53 |
+
"position": "#a855f7",
|
| 54 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
}
|
| 56 |
|
| 57 |
# --- Label Definitions and Category Mapping (Used by the App and PPTX) ---
|
| 58 |
labels = list(entity_color_map.keys())
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
|
| 62 |
category_mapping = {
|
| 63 |
+
"People": ["person", "organization", "position"],
|
| 64 |
+
"Locations": ["country", "city"],
|
| 65 |
+
"Time": ["date", "time"],
|
| 66 |
+
"Numbers": ["money", "cardinal"]
|
| 67 |
}
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
|
| 73 |
reverse_category_mapping = {label: category for category, label_list in category_mapping.items() for label in label_list}
|
| 74 |
|
| 75 |
|
|
|
|
| 297 |
return fig
|
| 298 |
|
| 299 |
|
| 300 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
# --- NEW CSV GENERATION FUNCTION ---
|
| 307 |
def generate_entity_csv(df):
|
|
|
|
| 427 |
<div class="chart-box">{pie_html}</div>
|
| 428 |
<div class="chart-box">{bar_category_html}</div>
|
| 429 |
<div class="chart-box">{bar_freq_html}</div>
|
| 430 |
+
<h3>3.3 Entity Relationship Map (Edges = Same Sentence)</h3>
|
| 431 |
<div class="chart-box">{network_html}</div>
|
| 432 |
+
<h2>4. Topic Modelling</h2>
|
| 433 |
{topic_charts_html}
|
| 434 |
</div></body></html>
|
| 435 |
"""
|
|
|
|
| 471 |
</style>
|
| 472 |
""",
|
| 473 |
unsafe_allow_html=True)
|
| 474 |
+
st.subheader("Entity and Topic Analysis Report Generator", divider="rainbow")
|
| 475 |
st.link_button("by nlpblogs", "https://nlpblogs.com", type="tertiary")
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
|
| 480 |
expander = st.expander("**Important notes**")
|
| 481 |
+
expander.write("""**Named Entities:** This DataHarvest web app predicts nine (9) labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"
|
| 482 |
+
|
| 483 |
+
**Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing.
|
| 484 |
+
|
| 485 |
+
**How to Use:** Type or paste your text into the text area below, press Ctrl + Enter, and then click the 'Results' button.
|
| 486 |
+
|
| 487 |
+
**Technical issues:** If your connection times out, please refresh the page or reopen the app's URL.""")
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
|
| 491 |
+
|
| 492 |
st.markdown("For any errors or inquiries, please contact us at [info@nlpblogs.com](mailto:info@nlpblogs.com)")
|
| 493 |
|
| 494 |
# --- Comet ML Setup (Placeholder/Conditional) ---
|
|
|
|
| 623 |
st.markdown("### 1. Analyzed Text with Highlighted Entities")
|
| 624 |
st.markdown(highlight_entities(st.session_state.last_text, df), unsafe_allow_html=True)
|
| 625 |
|
| 626 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 627 |
|
| 628 |
+
# 2. Detailed Entity Analysis Tabs
|
| 629 |
+
st.markdown("### 2. Detailed Entity Analysis")
|
| 630 |
tab_category_details, tab_treemap_viz = st.tabs(["📑 Entities Grouped by Category", "🗺️ Treemap Distribution"])
|
| 631 |
|
| 632 |
with tab_category_details:
|
| 633 |
st.markdown("#### Detailed Entities Table (Grouped by Category)")
|
| 634 |
+
with st.expander("See Glossary of tags"):
|
| 635 |
+
st.write('''
|
| 636 |
+
- **text**: ['entity extracted from your text data']
|
| 637 |
+
- **label**: ['label (tag) assigned to a given extracted entity']
|
| 638 |
+
- **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']
|
| 639 |
+
- **start**: ['index of the start of the corresponding entity']
|
| 640 |
+
- **end**: ['index of the end of the corresponding entity']
|
| 641 |
+
''')
|
| 642 |
+
|
| 643 |
unique_categories = list(category_mapping.keys())
|
| 644 |
tabs_category = st.tabs(unique_categories)
|
| 645 |
for category, tab in zip(unique_categories, tabs_category):
|
|
|
|
| 668 |
fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10))
|
| 669 |
st.plotly_chart(fig_treemap, use_container_width=True)
|
| 670 |
|
| 671 |
+
# 3. Comparative Charts
|
| 672 |
st.markdown("---")
|
| 673 |
+
st.markdown("### 3. Comparative Charts")
|
| 674 |
|
| 675 |
col1, col2, col3 = st.columns(3)
|
| 676 |
|
|
|
|
| 699 |
st.info("No entities repeat for frequency chart.")
|
| 700 |
|
| 701 |
st.markdown("---")
|
| 702 |
+
st.markdown("### 4. Entity Relationship Map")
|
| 703 |
network_fig = generate_network_graph(df, st.session_state.last_text)
|
| 704 |
st.plotly_chart(network_fig, use_container_width=True)
|
| 705 |
|
| 706 |
st.markdown("---")
|
| 707 |
+
st.markdown("### 5. Topic Modelling Analysis")
|
| 708 |
|
| 709 |
if df_topic_data is not None and not df_topic_data.empty:
|
| 710 |
bubble_figure = create_topic_word_bubbles(df_topic_data)
|
|
|
|
| 729 |
type="primary"
|
| 730 |
)
|
| 731 |
|
| 732 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 733 |
|
| 734 |
+
# 2. CSV Data Download (NEW)
|
| 735 |
csv_buffer = generate_entity_csv(df)
|
| 736 |
st.download_button(
|
| 737 |
label="Download Extracted Entities (CSV)",
|
|
|
|
| 740 |
mime="text/csv",
|
| 741 |
type="secondary"
|
| 742 |
)
|
| 743 |
+
|
| 744 |
+
with st.expander("See Glossary of tags"):
|
| 745 |
+
st.write("Use the following code to embed the DataHarvest web app on your website. Feel free to adjust the width and height values to fit your page.")
|
| 746 |
+
code = '''
|
| 747 |
+
<iframe
|
| 748 |
+
src="https://aiecosystem-dataharvest.hf.space"
|
| 749 |
+
frameborder="0"
|
| 750 |
+
width="850"
|
| 751 |
+
height="450"
|
| 752 |
+
></iframe>
|
| 753 |
+
'''
|
| 754 |
+
st.code(code, language="html")
|
| 755 |
+
|