Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -282,14 +282,14 @@ def generate_page_report(df_pages, report_file_name):
|
|
| 282 |
'(Primary and Secondary) for each page with a probability score greater than zero.'
|
| 283 |
)
|
| 284 |
|
| 285 |
-
doc.add_heading("
|
| 286 |
doc.add_paragraph(
|
| 287 |
'This graph displays the most essential SDG the AI model associates with pages. The bars '
|
| 288 |
'represent the percentage of pages most strongly aligned with each SDG. This offers insight into the dominant '
|
| 289 |
'sustainable development theme within the document.'
|
| 290 |
)
|
| 291 |
|
| 292 |
-
doc.add_heading("
|
| 293 |
doc.add_paragraph(
|
| 294 |
'This graph shows the second most relevant SDGs for pages. Although these SDGs are '
|
| 295 |
'not the primary focus, the text has some relevance to these goals.'
|
|
@@ -334,14 +334,14 @@ def generate_sentence_report(df_sentences, report_file_name):
|
|
| 334 |
'(Primary and Secondary) for each sentence with a probability score greater than zero.'
|
| 335 |
)
|
| 336 |
|
| 337 |
-
doc.add_heading("
|
| 338 |
doc.add_paragraph(
|
| 339 |
'This graph displays the most essential SDG the AI model associates with sentences. The bars '
|
| 340 |
'represent the percentage of sentences most strongly aligned with each SDG. This offers more profound insight '
|
| 341 |
'into the dominant sustainable development theme within the document.'
|
| 342 |
)
|
| 343 |
|
| 344 |
-
doc.add_heading("
|
| 345 |
doc.add_paragraph(
|
| 346 |
'This graph shows the second most relevant SDGs for sentences. Although these SDGs are not '
|
| 347 |
'the primary focus, the text has some relevance to these goals.'
|
|
@@ -483,6 +483,7 @@ def launch_interface():
|
|
| 483 |
)
|
| 484 |
|
| 485 |
# Shared PDF file input for both analyses
|
|
|
|
| 486 |
with gr.Row():
|
| 487 |
file_input = gr.File(
|
| 488 |
label="π Upload PDF File for Analysis", file_types=[".pdf"]
|
|
@@ -492,7 +493,8 @@ def launch_interface():
|
|
| 492 |
gr.Markdown(
|
| 493 |
"""
|
| 494 |
## PDF Text Extraction Mode
|
| 495 |
-
Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select
|
|
|
|
| 496 |
"""
|
| 497 |
)
|
| 498 |
with gr.Row():
|
|
@@ -503,7 +505,7 @@ def launch_interface():
|
|
| 503 |
)
|
| 504 |
|
| 505 |
with gr.Row():
|
| 506 |
-
start_page = gr.Number(value=1, label="π’ Start Page", visible=False)
|
| 507 |
end_page = gr.Number(value=1, label="π’ End Page", visible=False)
|
| 508 |
|
| 509 |
# Function to update visibility of start_page and end_page
|
|
@@ -531,27 +533,44 @@ def launch_interface():
|
|
| 531 |
It provides **high-level SDG mapping** of documents at the page level.
|
| 532 |
"""
|
| 533 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 534 |
# Nested Tabs for Primary and Secondary SDGs
|
| 535 |
with gr.Tabs():
|
| 536 |
with gr.TabItem("π Primary SDGs"):
|
| 537 |
-
|
| 538 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
with gr.Row():
|
| 540 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
| 541 |
page_docx = gr.File(label="π Download Page Report DOCX")
|
| 542 |
page_jpeg1 = gr.File(label="πΌοΈ Download Primary SDGs JPEG")
|
| 543 |
|
| 544 |
with gr.TabItem("π Secondary SDGs"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
secondary_page_plot = gr.Plot(label="π Secondary SDGs [Page-Level]")
|
| 546 |
|
| 547 |
with gr.Row():
|
| 548 |
page_csv_secondary = gr.File(label="π Download Page Predictions CSV")
|
| 549 |
page_report_file_secondary = gr.File(label="π Download Page Report DOCX")
|
| 550 |
-
secondary_page_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
| 551 |
-
|
| 552 |
-
with gr.Row():
|
| 553 |
-
page_button = gr.Button("πββοΈ Run Page-Level Analysis")
|
| 554 |
-
reset_page_button = gr.Button("π Reset Page-Level Analysis", elem_classes="reset-button")
|
| 555 |
|
| 556 |
with gr.Tab("βοΈ Sentence-Level Analysis"):
|
| 557 |
gr.Markdown(
|
|
@@ -562,11 +581,23 @@ def launch_interface():
|
|
| 562 |
It provides **detailed SDG mapping** at the sentence level.
|
| 563 |
"""
|
| 564 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
# Nested Tabs for Primary and Secondary SDGs
|
| 566 |
with gr.Tabs():
|
| 567 |
with gr.TabItem("π Primary SDGs"):
|
| 568 |
-
|
| 569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
with gr.Row():
|
| 571 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
| 572 |
sentence_docx = gr.File(label="π Download Sentence Report DOCX")
|
|
@@ -579,11 +610,7 @@ def launch_interface():
|
|
| 579 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
| 580 |
sentence_report_file_secondary = gr.File(label="π Download Sentence Report DOCX")
|
| 581 |
secondary_sentence_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
| 582 |
-
|
| 583 |
-
with gr.Row():
|
| 584 |
-
sentence_button = gr.Button("πββοΈ Run Sentence-Level Analysis")
|
| 585 |
-
reset_sentence_button = gr.Button("π Reset Sentence-Level Analysis", elem_classes="reset-button")
|
| 586 |
-
|
| 587 |
# Function to process page-level analysis
|
| 588 |
@spaces.GPU
|
| 589 |
def process_pages(file, extraction_mode, start_page, end_page):
|
|
@@ -633,13 +660,13 @@ def launch_interface():
|
|
| 633 |
)
|
| 634 |
|
| 635 |
# Define output file names
|
| 636 |
-
page_csv_file = f"{sanitized_file_name}
|
| 637 |
-
page_report_file = f"{sanitized_file_name}
|
| 638 |
-
primary_page_jpeg = f"{sanitized_file_name}
|
| 639 |
|
| 640 |
-
page_csv_file_secondary = f"{sanitized_file_name}
|
| 641 |
-
page_report_file_secondary = f"{sanitized_file_name}
|
| 642 |
-
secondary_page_jpeg = f"{sanitized_file_name}
|
| 643 |
|
| 644 |
# Save CSV and reports
|
| 645 |
df_page_predictions.to_csv(page_csv_file, index=False)
|
|
@@ -711,13 +738,13 @@ def launch_interface():
|
|
| 711 |
)
|
| 712 |
|
| 713 |
# Define output file names
|
| 714 |
-
sentence_csv_file = f"{sanitized_file_name}
|
| 715 |
-
sentence_report_file = f"{sanitized_file_name}
|
| 716 |
-
primary_sentence_jpeg = f"{sanitized_file_name}
|
| 717 |
|
| 718 |
-
sentence_csv_file_secondary = f"{sanitized_file_name}
|
| 719 |
-
sentence_report_file_secondary = f"{sanitized_file_name}
|
| 720 |
-
secondary_sentence_jpeg = f"{sanitized_file_name}
|
| 721 |
|
| 722 |
# Save CSV and reports
|
| 723 |
df_sentence_predictions.to_csv(sentence_csv_file, index=False)
|
|
@@ -757,8 +784,8 @@ def launch_interface():
|
|
| 757 |
page_csv, # π Download Page Predictions CSV
|
| 758 |
page_docx, # π Download Page Report DOCX
|
| 759 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 760 |
-
page_csv_secondary, # π Download Page Predictions CSV
|
| 761 |
-
page_report_file_secondary, # π Download Page Report DOCX
|
| 762 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 763 |
]
|
| 764 |
)
|
|
@@ -787,8 +814,8 @@ def launch_interface():
|
|
| 787 |
sentence_csv, # π Download Sentence Predictions CSV
|
| 788 |
sentence_docx, # π Download Sentence Report DOCX
|
| 789 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 790 |
-
sentence_csv_secondary, # π Download Sentence Predictions CSV
|
| 791 |
-
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
| 792 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 793 |
]
|
| 794 |
)
|
|
|
|
| 282 |
'(Primary and Secondary) for each page with a probability score greater than zero.'
|
| 283 |
)
|
| 284 |
|
| 285 |
+
doc.add_heading("Primary SDGs Bar Graph", level=3)
|
| 286 |
doc.add_paragraph(
|
| 287 |
'This graph displays the most essential SDG the AI model associates with pages. The bars '
|
| 288 |
'represent the percentage of pages most strongly aligned with each SDG. This offers insight into the dominant '
|
| 289 |
'sustainable development theme within the document.'
|
| 290 |
)
|
| 291 |
|
| 292 |
+
doc.add_heading("Secondary SDGs Bar Graph", level=3)
|
| 293 |
doc.add_paragraph(
|
| 294 |
'This graph shows the second most relevant SDGs for pages. Although these SDGs are '
|
| 295 |
'not the primary focus, the text has some relevance to these goals.'
|
|
|
|
| 334 |
'(Primary and Secondary) for each sentence with a probability score greater than zero.'
|
| 335 |
)
|
| 336 |
|
| 337 |
+
doc.add_heading("Primary SDGs Bar Graph", level=3)
|
| 338 |
doc.add_paragraph(
|
| 339 |
'This graph displays the most essential SDG the AI model associates with sentences. The bars '
|
| 340 |
'represent the percentage of sentences most strongly aligned with each SDG. This offers more profound insight '
|
| 341 |
'into the dominant sustainable development theme within the document.'
|
| 342 |
)
|
| 343 |
|
| 344 |
+
doc.add_heading("Secondary SDGs Bar Graph", level=3)
|
| 345 |
doc.add_paragraph(
|
| 346 |
'This graph shows the second most relevant SDGs for sentences. Although these SDGs are not '
|
| 347 |
'the primary focus, the text has some relevance to these goals.'
|
|
|
|
| 483 |
)
|
| 484 |
|
| 485 |
# Shared PDF file input for both analyses
|
| 486 |
+
gr.Markdown("## Upload PDF File")
|
| 487 |
with gr.Row():
|
| 488 |
file_input = gr.File(
|
| 489 |
label="π Upload PDF File for Analysis", file_types=[".pdf"]
|
|
|
|
| 493 |
gr.Markdown(
|
| 494 |
"""
|
| 495 |
## PDF Text Extraction Mode
|
| 496 |
+
Choose whether to analyze all pages or a specific range of pages. If you want to exclude certain pages from the analysis, select
|
| 497 |
+
"Range of Pages" and specify the start and end pages.
|
| 498 |
"""
|
| 499 |
)
|
| 500 |
with gr.Row():
|
|
|
|
| 505 |
)
|
| 506 |
|
| 507 |
with gr.Row():
|
| 508 |
+
start_page = gr.Number(value=1, label="π’ Start Page", visible=False, info="The cover page is page 1")
|
| 509 |
end_page = gr.Number(value=1, label="π’ End Page", visible=False)
|
| 510 |
|
| 511 |
# Function to update visibility of start_page and end_page
|
|
|
|
| 533 |
It provides **high-level SDG mapping** of documents at the page level.
|
| 534 |
"""
|
| 535 |
)
|
| 536 |
+
|
| 537 |
+
with gr.Row():
|
| 538 |
+
page_button = gr.Button("πββοΈ Run Page-Level Analysis")
|
| 539 |
+
reset_page_button = gr.Button("π Reset Page-Level Analysis", elem_classes="reset-button")
|
| 540 |
+
|
| 541 |
# Nested Tabs for Primary and Secondary SDGs
|
| 542 |
with gr.Tabs():
|
| 543 |
with gr.TabItem("π Primary SDGs"):
|
| 544 |
+
with gr.Row():
|
| 545 |
+
primary_page_plot = gr.Plot(label="π Primary SDGs [Page-Level]", scale=2)
|
| 546 |
+
gr.Textbox(
|
| 547 |
+
"When the analysis is completed, the Primary SDGs bar graph on the left will show "+
|
| 548 |
+
"the percentage of pages that strongly align with each SDG. The icon for the most frequent "+
|
| 549 |
+
"SDG is highlighted above the graph. Download the Page Predictions CVS for further details.",
|
| 550 |
+
interactive=False, scale=1
|
| 551 |
+
)
|
| 552 |
+
gr.Markdown("##### Download results")
|
| 553 |
with gr.Row():
|
| 554 |
page_csv = gr.File(label="π Download Page Predictions CSV")
|
| 555 |
page_docx = gr.File(label="π Download Page Report DOCX")
|
| 556 |
page_jpeg1 = gr.File(label="πΌοΈ Download Primary SDGs JPEG")
|
| 557 |
|
| 558 |
with gr.TabItem("π Secondary SDGs"):
|
| 559 |
+
with gr.Row():
|
| 560 |
+
primary_page_plot = gr.Plot(label="π Primary SDGs [Page-Level]", scale=2)
|
| 561 |
+
gr.Textbox(
|
| 562 |
+
"When the analysis is completed, the Secondary SDGs bar graph on the left will show "+
|
| 563 |
+
"SDGs that are not the primary focus of the pages analysed. These SDGs are second to the "+
|
| 564 |
+
"Primary SDGs.",
|
| 565 |
+
interactive=False, scale=1
|
| 566 |
+
)
|
| 567 |
+
gr.Markdown("##### Download results")
|
| 568 |
secondary_page_plot = gr.Plot(label="π Secondary SDGs [Page-Level]")
|
| 569 |
|
| 570 |
with gr.Row():
|
| 571 |
page_csv_secondary = gr.File(label="π Download Page Predictions CSV")
|
| 572 |
page_report_file_secondary = gr.File(label="π Download Page Report DOCX")
|
| 573 |
+
secondary_page_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 574 |
|
| 575 |
with gr.Tab("βοΈ Sentence-Level Analysis"):
|
| 576 |
gr.Markdown(
|
|
|
|
| 581 |
It provides **detailed SDG mapping** at the sentence level.
|
| 582 |
"""
|
| 583 |
)
|
| 584 |
+
|
| 585 |
+
with gr.Row():
|
| 586 |
+
sentence_button = gr.Button("πββοΈ Run Sentence-Level Analysis")
|
| 587 |
+
reset_sentence_button = gr.Button("π Reset Sentence-Level Analysis", elem_classes="reset-button")
|
| 588 |
+
|
| 589 |
# Nested Tabs for Primary and Secondary SDGs
|
| 590 |
with gr.Tabs():
|
| 591 |
with gr.TabItem("π Primary SDGs"):
|
| 592 |
+
with gr.Row():
|
| 593 |
+
primary_sentence_plot = gr.Plot(label="π Primary SDGs [Sentence-Level]", scale=2)
|
| 594 |
+
gr.Textbox(
|
| 595 |
+
"When the analysis is completed, the Primary SDGs bar graph on the left will show "+
|
| 596 |
+
"the percentage of sentences that strongly align with each SDG. The icon for the most frequent "+
|
| 597 |
+
"SDG is highlighted above the graph. Download the Sentence Predictions CVS for further details.",
|
| 598 |
+
interactive=False, scale=1
|
| 599 |
+
)
|
| 600 |
+
gr.Markdown("##### Download results")
|
| 601 |
with gr.Row():
|
| 602 |
sentence_csv = gr.File(label="π Download Sentence Predictions CSV")
|
| 603 |
sentence_docx = gr.File(label="π Download Sentence Report DOCX")
|
|
|
|
| 610 |
sentence_csv_secondary = gr.File(label="π Download Sentence Predictions CSV")
|
| 611 |
sentence_report_file_secondary = gr.File(label="π Download Sentence Report DOCX")
|
| 612 |
secondary_sentence_jpeg = gr.File(label="πΌοΈ Download Secondary SDGs JPEG")
|
| 613 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
# Function to process page-level analysis
|
| 615 |
@spaces.GPU
|
| 616 |
def process_pages(file, extraction_mode, start_page, end_page):
|
|
|
|
| 660 |
)
|
| 661 |
|
| 662 |
# Define output file names
|
| 663 |
+
page_csv_file = f"{sanitized_file_name}_SDG-Page_predictions.csv"
|
| 664 |
+
page_report_file = f"{sanitized_file_name}_SDG-Page_report.docx"
|
| 665 |
+
primary_page_jpeg = f"{sanitized_file_name}_SDG-Page_primary_graph.jpeg"
|
| 666 |
|
| 667 |
+
page_csv_file_secondary = f"{sanitized_file_name}_SDG-Page_predictions.csv"
|
| 668 |
+
page_report_file_secondary = f"{sanitized_file_name}_SDG-Page_report.docx"
|
| 669 |
+
secondary_page_jpeg = f"{sanitized_file_name}_SDG-Page_secondary_graph.jpeg"
|
| 670 |
|
| 671 |
# Save CSV and reports
|
| 672 |
df_page_predictions.to_csv(page_csv_file, index=False)
|
|
|
|
| 738 |
)
|
| 739 |
|
| 740 |
# Define output file names
|
| 741 |
+
sentence_csv_file = f"{sanitized_file_name}_SDG-Sentence_predictions.csv"
|
| 742 |
+
sentence_report_file = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
| 743 |
+
primary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_primary_graph.jpeg"
|
| 744 |
|
| 745 |
+
sentence_csv_file_secondary = f"{sanitized_file_name}_SDG-Sentence_predictions.csv"
|
| 746 |
+
sentence_report_file_secondary = f"{sanitized_file_name}_SDG-Sentence_report.docx"
|
| 747 |
+
secondary_sentence_jpeg = f"{sanitized_file_name}_SDG-Sentence_secondary_graph.jpeg"
|
| 748 |
|
| 749 |
# Save CSV and reports
|
| 750 |
df_sentence_predictions.to_csv(sentence_csv_file, index=False)
|
|
|
|
| 784 |
page_csv, # π Download Page Predictions CSV
|
| 785 |
page_docx, # π Download Page Report DOCX
|
| 786 |
page_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 787 |
+
page_csv_secondary, # π Download Page Predictions CSV
|
| 788 |
+
page_report_file_secondary, # π Download Page Report DOCX
|
| 789 |
secondary_page_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 790 |
]
|
| 791 |
)
|
|
|
|
| 814 |
sentence_csv, # π Download Sentence Predictions CSV
|
| 815 |
sentence_docx, # π Download Sentence Report DOCX
|
| 816 |
sentence_jpeg1, # πΌοΈ Download Primary SDGs JPEG
|
| 817 |
+
sentence_csv_secondary, # π Download Sentence Predictions CSV
|
| 818 |
+
sentence_report_file_secondary, # π Download Sentence Report DOCX
|
| 819 |
secondary_sentence_jpeg # πΌοΈ Download Secondary SDGs JPEG
|
| 820 |
]
|
| 821 |
)
|