Spaces:

pszemraj
/

document-summarization

Running on CPU Upgrade

App Files Files Community

Peter commited on Oct 17, 2022

Commit

34de38e

1 Parent(s): 50d040d

✨ add ability to download outputs

Browse files

Signed-off-by: Peter <74869040+pszemraj@users.noreply.github.com>

Files changed (2) hide show

app.py +13 -3
utils.py +49 -2

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from doctr.models import ocr_predictor
 from pdf2text import convert_PDF_to_Text
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
-from utils import load_example_filenames, truncate_word_count
 _here = Path(__file__).parent
@@ -125,7 +125,10 @@ def proc_submission(
     html += ""
-    return html, sum_text_out, scores_out
 def load_single_example_text(
@@ -295,6 +298,13 @@ if __name__ == "__main__":
                 label="Summary Scores", placeholder="Summary scores will appear here"
             )
         gr.Markdown("---")
         with gr.Column():
             gr.Markdown("### Advanced Settings")
@@ -351,7 +361,7 @@ if __name__ == "__main__":
                 repetition_penalty,
                 no_repeat_ngram_size,
             ],
-            outputs=[output_text, summary_text, summary_scores],
         )
     demo.launch(enable_queue=True)

 from pdf2text import convert_PDF_to_Text
 from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
+from utils import load_example_filenames, truncate_word_count, saves_summary
 _here = Path(__file__).parent
     html += ""
+    # save to file
+    saved_file = saves_summary(_summaries)
+    return html, sum_text_out, scores_out, saved_file
 def load_single_example_text(
                 label="Summary Scores", placeholder="Summary scores will appear here"
             )
+            text_file = gr.File(
+            label="Download Summary as Text File",
+            file_count="single",
+            type="file",
+            interactive=False,
+        )
         gr.Markdown("---")
         with gr.Column():
             gr.Markdown("### Advanced Settings")
                 repetition_penalty,
                 no_repeat_ngram_size,
             ],
+            outputs=[output_text, summary_text, summary_scores, text_file],
         )
     demo.launch(enable_queue=True)

utils.py CHANGED Viewed

@@ -4,11 +4,17 @@
 import re
 from pathlib import Path
 from natsort import natsorted
 import subprocess
 def truncate_word_count(text, max_words=512):
     """
     truncate_word_count - a helper function for the gradio module
@@ -67,3 +73,44 @@ def load_example_filenames(example_path: str or Path):
     # load the examples into a list
     examples = {f.name: f for f in example_path.glob("*.txt")}
     return examples

 import re
 from pathlib import Path
+from datetime import datetime
 from natsort import natsorted
 import subprocess
+def get_timestamp()->str:
+    """
+    get_timestamp - get a timestamp for the current time
+    Returns:
+        str, the timestamp
+    """
+    return datetime.now().strftime("%Y%m%d_%H%M%S")
 def truncate_word_count(text, max_words=512):
     """
     truncate_word_count - a helper function for the gradio module
     # load the examples into a list
     examples = {f.name: f for f in example_path.glob("*.txt")}
     return examples
+def saves_summary(summarize_output, outpath:str or Path=None, add_signature=True):
+    """
+    saves_summary - save the summary generated from summarize_via_tokenbatches() to a text file
+            _summaries = summarize_via_tokenbatches(
+              text,
+              batch_length=token_batch_length,
+              batch_stride=batch_stride,
+              **settings,
+          )
+    """
+    outpath = Path.cwd() / f"document_summary_{get_timestamp()}.txt" if outpath is None else Path(outpath)
+    sum_text = [s["summary"][0] for s in summarize_output]
+    sum_scores = [f"\n - {round(s['summary_score'],4)}" for s in summarize_output]
+    scores_text = "\n".join(sum_scores)
+    full_summary = "\n\t".join(sum_text)
+    with open(
+        outpath,
+        "w",
+    ) as fo:
+        if add_signature:
+            fo.write(
+                "Generated with the Document Summarization space :) https://hf.co/spaces/pszemraj/document-summarization\n\n"
+            )
+        fo.writelines(full_summary)
+    with open(
+        outpath,
+        "a",
+    ) as fo:
+        fo.write("\n" * 3)
+        fo.write(f"\n\nSection Scores:\n")
+        fo.writelines(scores_text)
+        fo.write("\n\n---\n")
+    return outpath