Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -169,17 +169,18 @@ sdg_colors = {
|
|
| 169 |
}
|
| 170 |
|
| 171 |
# Function to plot SDG dominant bar graphs using Plotly
|
| 172 |
-
|
| 173 |
-
def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
| 174 |
"""
|
| 175 |
-
Plots a horizontal bar graph of SDG predictions and superimposes the icon of the most frequent SDG.
|
| 176 |
-
|
| 177 |
Args:
|
| 178 |
df (pd.DataFrame): DataFrame containing SDG predictions.
|
| 179 |
title (str): Title of the plot.
|
| 180 |
pred_column (str): Column name to use for plotting (e.g., 'pred1').
|
|
|
|
|
|
|
| 181 |
icons_folder (str): Path to the folder containing SDG icons.
|
| 182 |
-
|
| 183 |
Returns:
|
| 184 |
plotly.graph_objs._figure.Figure: The Plotly figure object.
|
| 185 |
"""
|
|
@@ -209,6 +210,9 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
|
| 209 |
textfont=dict(size=10)
|
| 210 |
)
|
| 211 |
|
|
|
|
|
|
|
|
|
|
| 212 |
# Adjust layout for better visibility
|
| 213 |
fig.update_layout(
|
| 214 |
title=dict(
|
|
@@ -219,14 +223,15 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
|
| 219 |
title=None,
|
| 220 |
tickfont=dict(size=12)
|
| 221 |
),
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
height=600,
|
| 224 |
#width=800,
|
| 225 |
showlegend=False,
|
| 226 |
template="simple_white",
|
| 227 |
-
xaxis=dict(
|
| 228 |
-
tickfont=dict(size=12) # Reduce x-axis font size
|
| 229 |
-
),
|
| 230 |
)
|
| 231 |
|
| 232 |
# Identify the most frequent SDG
|
|
@@ -250,10 +255,10 @@ def plot_sdg(df, title, pred_column, icons_folder='assets/icons/'):
|
|
| 250 |
dict(
|
| 251 |
source='data:image/png;base64,' + encoded_image,
|
| 252 |
xref="paper", yref="paper",
|
| 253 |
-
x=0.
|
| 254 |
-
sizex=0.2, sizey=0.2, #
|
| 255 |
-
xanchor="
|
| 256 |
-
yanchor="
|
| 257 |
layer="above" # Ensure the icon is above other plot elements
|
| 258 |
)
|
| 259 |
)
|
|
@@ -306,9 +311,9 @@ def generate_page_report(df_pages, report_file_name):
|
|
| 306 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_page.jpeg"
|
| 307 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_page.jpeg"
|
| 308 |
|
| 309 |
-
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
| 310 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 311 |
-
plot_sdg(df_doc, "Secondary SDGs", 'pred2').write_image(
|
| 312 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 313 |
|
| 314 |
# Add plots to the Word document
|
|
@@ -358,9 +363,9 @@ def generate_sentence_report(df_sentences, report_file_name):
|
|
| 358 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_sentence.jpeg"
|
| 359 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_sentence.jpeg"
|
| 360 |
|
| 361 |
-
plot_sdg(df_doc, "Primary SDGs", 'pred1').write_image(
|
| 362 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 363 |
-
plot_sdg(df_doc, "Secondary SDGs", 'pred2').write_image(
|
| 364 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 365 |
|
| 366 |
# Add plots to the Word document
|
|
@@ -521,9 +526,10 @@ def launch_interface():
|
|
| 521 |
outputs=[start_page, end_page]
|
| 522 |
)
|
| 523 |
|
| 524 |
-
#
|
| 525 |
gr.Markdown("## SDG Analysis Type")
|
| 526 |
-
|
|
|
|
| 527 |
with gr.Tab("π Page-Level Analysis"):
|
| 528 |
gr.Markdown(
|
| 529 |
"""
|
|
@@ -546,10 +552,10 @@ def launch_interface():
|
|
| 546 |
gr.Markdown(
|
| 547 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
| 548 |
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
| 549 |
-
"SDG will be highlighted above the graph. Download the Page Predictions
|
| 550 |
-
label
|
| 551 |
)
|
| 552 |
-
|
| 553 |
gr.Markdown("##### Download Results")
|
| 554 |
with gr.Row():
|
| 555 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
|
@@ -562,8 +568,8 @@ def launch_interface():
|
|
| 562 |
gr.Markdown(
|
| 563 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
| 564 |
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
| 565 |
-
"Primary SDGs. Download the
|
| 566 |
-
label
|
| 567 |
)
|
| 568 |
|
| 569 |
gr.Markdown("##### Download Results")
|
|
@@ -594,10 +600,10 @@ def launch_interface():
|
|
| 594 |
gr.Markdown(
|
| 595 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
| 596 |
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
| 597 |
-
"SDG will be highlighted above the graph. Download the Sentence Predictions
|
| 598 |
-
label
|
| 599 |
)
|
| 600 |
-
|
| 601 |
gr.Markdown("##### Download Results")
|
| 602 |
with gr.Row():
|
| 603 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
|
@@ -610,10 +616,10 @@ def launch_interface():
|
|
| 610 |
gr.Markdown(
|
| 611 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
| 612 |
"SDGs that are not the primary focus of the sentences analysed. These SDGs are second to the "+
|
| 613 |
-
"Primary SDGs. Download the Sentence Predictions
|
| 614 |
-
label
|
| 615 |
)
|
| 616 |
-
|
| 617 |
gr.Markdown("##### Download Results")
|
| 618 |
with gr.Row():
|
| 619 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
|
@@ -662,10 +668,18 @@ def launch_interface():
|
|
| 662 |
|
| 663 |
# Generate plots with icon overlay
|
| 664 |
first_plot = plot_sdg(
|
| 665 |
-
df_page_predictions,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
)
|
| 667 |
second_plot = plot_sdg(
|
| 668 |
-
df_page_predictions,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 669 |
)
|
| 670 |
|
| 671 |
# Define output file names
|
|
@@ -689,9 +703,14 @@ def launch_interface():
|
|
| 689 |
save_figure_as_jpeg(second_plot, secondary_page_jpeg)
|
| 690 |
|
| 691 |
return (
|
| 692 |
-
first_plot,
|
| 693 |
-
|
| 694 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
)
|
| 696 |
|
| 697 |
except Exception as e:
|
|
@@ -740,10 +759,18 @@ def launch_interface():
|
|
| 740 |
|
| 741 |
# Generate plots with icon overlay
|
| 742 |
first_plot = plot_sdg(
|
| 743 |
-
df_sentence_predictions,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
)
|
| 745 |
second_plot = plot_sdg(
|
| 746 |
-
df_sentence_predictions,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 747 |
)
|
| 748 |
|
| 749 |
# Define output file names
|
|
@@ -767,9 +794,14 @@ def launch_interface():
|
|
| 767 |
save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
|
| 768 |
|
| 769 |
return (
|
| 770 |
-
first_plot,
|
| 771 |
-
|
| 772 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
)
|
| 774 |
|
| 775 |
except Exception as e:
|
|
@@ -778,10 +810,10 @@ def launch_interface():
|
|
| 778 |
|
| 779 |
# Reset functions to clear the outputs
|
| 780 |
def reset_page_outputs():
|
| 781 |
-
return [None
|
| 782 |
|
| 783 |
def reset_sentence_outputs():
|
| 784 |
-
return [None
|
| 785 |
|
| 786 |
# Button actions for Page-Level Analysis
|
| 787 |
page_button.click(
|
|
@@ -793,7 +825,7 @@ def launch_interface():
|
|
| 793 |
page_csv, # π Download Page Predictions CSV
|
| 794 |
page_docx, # π Download Page Report DOCX
|
| 795 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 796 |
-
|
| 797 |
page_report_file_secondary, # π Download Page Report DOCX
|
| 798 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 799 |
]
|
|
@@ -807,7 +839,7 @@ def launch_interface():
|
|
| 807 |
page_csv,
|
| 808 |
page_docx,
|
| 809 |
page_jpeg1,
|
| 810 |
-
|
| 811 |
page_report_file_secondary,
|
| 812 |
secondary_page_jpeg
|
| 813 |
]
|
|
@@ -823,7 +855,7 @@ def launch_interface():
|
|
| 823 |
sentence_csv, # π Download Sentence Predictions CSV
|
| 824 |
sentence_docx, # π Download Sentence Report DOCX
|
| 825 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 826 |
-
|
| 827 |
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
| 828 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 829 |
]
|
|
@@ -837,7 +869,7 @@ def launch_interface():
|
|
| 837 |
sentence_csv,
|
| 838 |
sentence_docx,
|
| 839 |
sentence_jpeg1,
|
| 840 |
-
|
| 841 |
sentence_report_file_secondary,
|
| 842 |
secondary_sentence_jpeg
|
| 843 |
]
|
|
|
|
| 169 |
}
|
| 170 |
|
| 171 |
# Function to plot SDG dominant bar graphs using Plotly
|
| 172 |
+
def plot_sdg(df, title, pred_column, analysis_level, sdg_type, icons_folder='assets/icons/'):
|
|
|
|
| 173 |
"""
|
| 174 |
+
Plots a horizontal bar graph of SDG predictions and superimposes the icon of the most frequent SDG at the center.
|
| 175 |
+
|
| 176 |
Args:
|
| 177 |
df (pd.DataFrame): DataFrame containing SDG predictions.
|
| 178 |
title (str): Title of the plot.
|
| 179 |
pred_column (str): Column name to use for plotting (e.g., 'pred1').
|
| 180 |
+
analysis_level (str): Level of analysis ('pages' or 'sentences').
|
| 181 |
+
sdg_type (str): Type of SDG analysis ('primary' or 'secondary').
|
| 182 |
icons_folder (str): Path to the folder containing SDG icons.
|
| 183 |
+
|
| 184 |
Returns:
|
| 185 |
plotly.graph_objs._figure.Figure: The Plotly figure object.
|
| 186 |
"""
|
|
|
|
| 210 |
textfont=dict(size=10)
|
| 211 |
)
|
| 212 |
|
| 213 |
+
# Construct dynamic x-axis title
|
| 214 |
+
xaxis_title = f"Percentage of {analysis_level} aligned with {sdg_type.capitalize()} SDGs"
|
| 215 |
+
|
| 216 |
# Adjust layout for better visibility
|
| 217 |
fig.update_layout(
|
| 218 |
title=dict(
|
|
|
|
| 223 |
title=None,
|
| 224 |
tickfont=dict(size=12)
|
| 225 |
),
|
| 226 |
+
xaxis=dict(
|
| 227 |
+
title=xaxis_title, # Dynamic x-axis title
|
| 228 |
+
tickfont=dict(size=12) # Reduce x-axis font size
|
| 229 |
+
),
|
| 230 |
+
margin=dict(l=20, r=30, t=100, b=20), # Adjusted margins
|
| 231 |
height=600,
|
| 232 |
#width=800,
|
| 233 |
showlegend=False,
|
| 234 |
template="simple_white",
|
|
|
|
|
|
|
|
|
|
| 235 |
)
|
| 236 |
|
| 237 |
# Identify the most frequent SDG
|
|
|
|
| 255 |
dict(
|
| 256 |
source='data:image/png;base64,' + encoded_image,
|
| 257 |
xref="paper", yref="paper",
|
| 258 |
+
x=0.3, y=1.2, # Center of the plot
|
| 259 |
+
sizex=0.2, sizey=0.2, # Adjust size as needed
|
| 260 |
+
xanchor="center",
|
| 261 |
+
yanchor="middle",
|
| 262 |
layer="above" # Ensure the icon is above other plot elements
|
| 263 |
)
|
| 264 |
)
|
|
|
|
| 311 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_page.jpeg"
|
| 312 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_page.jpeg"
|
| 313 |
|
| 314 |
+
plot_sdg(df_doc, "Primary SDGs", 'pred1', analysis_level='pages', sdg_type='primary').write_image(
|
| 315 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 316 |
+
plot_sdg(df_doc, "Secondary SDGs", 'pred2', analysis_level='pages', sdg_type='secondary').write_image(
|
| 317 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 318 |
|
| 319 |
# Add plots to the Word document
|
|
|
|
| 363 |
first_sdg_plot_path = f"{sanitized_doc_name}_first_sdg_sentence.jpeg"
|
| 364 |
second_sdg_plot_path = f"{sanitized_doc_name}_second_sdg_sentence.jpeg"
|
| 365 |
|
| 366 |
+
plot_sdg(df_doc, "Primary SDGs", 'pred1', analysis_level='sentences', sdg_type='primary').write_image(
|
| 367 |
first_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 368 |
+
plot_sdg(df_doc, "Secondary SDGs", 'pred2', analysis_level='sentences', sdg_type='secondary').write_image(
|
| 369 |
second_sdg_plot_path, format='jpeg', scale=7, engine="kaleido")
|
| 370 |
|
| 371 |
# Add plots to the Word document
|
|
|
|
| 526 |
outputs=[start_page, end_page]
|
| 527 |
)
|
| 528 |
|
| 529 |
+
# SDG Analysis Type Section
|
| 530 |
gr.Markdown("## SDG Analysis Type")
|
| 531 |
+
|
| 532 |
+
# Main Tabs for Page-Level and Sentence-Level Analysis
|
| 533 |
with gr.Tab("π Page-Level Analysis"):
|
| 534 |
gr.Markdown(
|
| 535 |
"""
|
|
|
|
| 552 |
gr.Markdown(
|
| 553 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
| 554 |
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
| 555 |
+
"SDG will be highlighted above the graph. Download the Page Predictions CSV for further details.",
|
| 556 |
+
label="Note", container=True
|
| 557 |
)
|
| 558 |
+
|
| 559 |
gr.Markdown("##### Download Results")
|
| 560 |
with gr.Row():
|
| 561 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
|
|
|
| 568 |
gr.Markdown(
|
| 569 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
| 570 |
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
| 571 |
+
"Primary SDGs. Download the Page Predictions CSV for further details.",
|
| 572 |
+
label="Note", container=True
|
| 573 |
)
|
| 574 |
|
| 575 |
gr.Markdown("##### Download Results")
|
|
|
|
| 600 |
gr.Markdown(
|
| 601 |
"When the analysis is done, the Primary SDGs bar graph on the left will show "+
|
| 602 |
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
| 603 |
+
"SDG will be highlighted above the graph. Download the Sentence Predictions CSV for further details.",
|
| 604 |
+
label="Note", container=True
|
| 605 |
)
|
| 606 |
+
|
| 607 |
gr.Markdown("##### Download Results")
|
| 608 |
with gr.Row():
|
| 609 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
|
|
|
| 616 |
gr.Markdown(
|
| 617 |
"When the analysis is done, the Secondary SDGs bar graph on the left will show "+
|
| 618 |
"SDGs that are not the primary focus of the sentences analysed. These SDGs are second to the "+
|
| 619 |
+
"Primary SDGs. Download the Sentence Predictions CSV for further details.",
|
| 620 |
+
label="Note", container=True
|
| 621 |
)
|
| 622 |
+
|
| 623 |
gr.Markdown("##### Download Results")
|
| 624 |
with gr.Row():
|
| 625 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
|
|
|
| 668 |
|
| 669 |
# Generate plots with icon overlay
|
| 670 |
first_plot = plot_sdg(
|
| 671 |
+
df_page_predictions,
|
| 672 |
+
"π Primary SDGs",
|
| 673 |
+
'pred1',
|
| 674 |
+
analysis_level='pages', # Specify analysis level
|
| 675 |
+
sdg_type='primary' # Specify SDG type
|
| 676 |
)
|
| 677 |
second_plot = plot_sdg(
|
| 678 |
+
df_page_predictions,
|
| 679 |
+
"π Secondary SDGs",
|
| 680 |
+
'pred2',
|
| 681 |
+
analysis_level='pages', # Specify analysis level
|
| 682 |
+
sdg_type='secondary' # Specify SDG type
|
| 683 |
)
|
| 684 |
|
| 685 |
# Define output file names
|
|
|
|
| 703 |
save_figure_as_jpeg(second_plot, secondary_page_jpeg)
|
| 704 |
|
| 705 |
return (
|
| 706 |
+
first_plot,
|
| 707 |
+
second_plot,
|
| 708 |
+
page_csv_file,
|
| 709 |
+
page_report_file,
|
| 710 |
+
primary_page_jpeg,
|
| 711 |
+
page_csv_file_secondary,
|
| 712 |
+
page_report_file_secondary,
|
| 713 |
+
secondary_page_jpeg
|
| 714 |
)
|
| 715 |
|
| 716 |
except Exception as e:
|
|
|
|
| 759 |
|
| 760 |
# Generate plots with icon overlay
|
| 761 |
first_plot = plot_sdg(
|
| 762 |
+
df_sentence_predictions,
|
| 763 |
+
"π Primary SDGs",
|
| 764 |
+
'pred1',
|
| 765 |
+
analysis_level='sentences', # Specify analysis level
|
| 766 |
+
sdg_type='primary' # Specify SDG type
|
| 767 |
)
|
| 768 |
second_plot = plot_sdg(
|
| 769 |
+
df_sentence_predictions,
|
| 770 |
+
"π Secondary SDGs",
|
| 771 |
+
'pred2',
|
| 772 |
+
analysis_level='sentences', # Specify analysis level
|
| 773 |
+
sdg_type='secondary' # Specify SDG type
|
| 774 |
)
|
| 775 |
|
| 776 |
# Define output file names
|
|
|
|
| 794 |
save_figure_as_jpeg(second_plot, secondary_sentence_jpeg)
|
| 795 |
|
| 796 |
return (
|
| 797 |
+
first_plot,
|
| 798 |
+
second_plot,
|
| 799 |
+
sentence_csv_file,
|
| 800 |
+
sentence_report_file,
|
| 801 |
+
primary_sentence_jpeg,
|
| 802 |
+
sentence_csv_file_secondary,
|
| 803 |
+
sentence_report_file_secondary,
|
| 804 |
+
secondary_sentence_jpeg
|
| 805 |
)
|
| 806 |
|
| 807 |
except Exception as e:
|
|
|
|
| 810 |
|
| 811 |
# Reset functions to clear the outputs
|
| 812 |
def reset_page_outputs():
|
| 813 |
+
return [None] * 8
|
| 814 |
|
| 815 |
def reset_sentence_outputs():
|
| 816 |
+
return [None] * 8
|
| 817 |
|
| 818 |
# Button actions for Page-Level Analysis
|
| 819 |
page_button.click(
|
|
|
|
| 825 |
page_csv, # π Download Page Predictions CSV
|
| 826 |
page_docx, # π Download Page Report DOCX
|
| 827 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 828 |
+
page_csv_file_secondary, # π Download Page Predictions CSV
|
| 829 |
page_report_file_secondary, # π Download Page Report DOCX
|
| 830 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 831 |
]
|
|
|
|
| 839 |
page_csv,
|
| 840 |
page_docx,
|
| 841 |
page_jpeg1,
|
| 842 |
+
page_csv_file_secondary,
|
| 843 |
page_report_file_secondary,
|
| 844 |
secondary_page_jpeg
|
| 845 |
]
|
|
|
|
| 855 |
sentence_csv, # π Download Sentence Predictions CSV
|
| 856 |
sentence_docx, # π Download Sentence Report DOCX
|
| 857 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 858 |
+
sentence_csv_file_secondary, # π Download Sentence Predictions CSV
|
| 859 |
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
| 860 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 861 |
]
|
|
|
|
| 869 |
sentence_csv,
|
| 870 |
sentence_docx,
|
| 871 |
sentence_jpeg1,
|
| 872 |
+
sentence_csv_file_secondary,
|
| 873 |
sentence_report_file_secondary,
|
| 874 |
secondary_sentence_jpeg
|
| 875 |
]
|