Spaces:

ML-unipi
/

TermsOfServiceSummarization

Runtime error

App Files Files Community

tommasobaldi commited on Aug 28, 2022

Commit

146d058

1 Parent(s): 1ba1fd2

working on text splitting

Browse files

Files changed (1) hide show

app.py +27 -60

app.py CHANGED Viewed

@@ -1,14 +1,8 @@
-import html
 import os
 from typing import AnyStr
 import nltk
-from nltk.tokenize import sent_tokenize
-from nltk.tokenize import word_tokenize
 import streamlit as st
-import validators
 from transformers import pipeline
-from validators import ValidationFailure
 def main() -> None:
@@ -52,51 +46,6 @@ def main() -> None:
             text = file.read()
         return text
-    if "target_text" not in st.session_state:
-        st.session_state.target_text = ""
-    if "sentence_lenght" not in st.session_state:
-        st.session_state.sentence_length = 15
-    if "sample_choice" not in st.session_state:
-        st.session_state.sentence_length = ""
-    st.header("Input")
-    # sentences_length = st.number_input(
-    #     label="How many senetences to be extracted:",
-    #     min_value=5,
-    #     max_value=15,
-    #     step=1,
-    #     value=st.session_state.sentence_length
-    # )
-    sample_choice = st.selectbox(
-        label="Select a sample:",
-        options=get_list_files()
-    )
-    st.session_state.target_text = fetch_file_content(sample_choice)
-    target_text_input = st.text_area(
-        value=st.session_state.target_text,
-        label="Paste your own Term Of Service:",
-        height=240
-    )
-    summarize_button = st.button(label="Try it!")
-    # @st.cache(suppress_st_warning=True,
-    #           show_spinner=False,
-    #           allow_output_mutation=True,
-    #           hash_funcs={"torch.nn.parameter.Parameter": lambda _: None,
-    #                       "tokenizers.Tokenizer": lambda _: None,
-    #                       "tokenizers.AddedToken": lambda _: None,
-    #                       }
-    #           )
-    # def summary_from_cache(summary_sentence: tuple) -> tuple:
-    #     with st.spinner("Summarizing in progress..."):
-    #         return tuple(summarizer.abstractive_summary(list(summary_sentence)))
     def join_sentences(sentences: list) -> str:
         return " ".join([sentence for sentence in sentences])
@@ -120,20 +69,38 @@ def main() -> None:
     pipe = create_pipeline()
     if summarize_button:
         if target_text_input is not "":
-            summary_sentences = ""
             with st.spinner("Summarizing in progress..."):
                 sentences = split_sentences_by_token_length(nltk.sent_tokenize(target_text_input), 600)
                 for sentence in sentences:
-                    summary_sentences += "".join(pipe(sentence)["summary_text"])
-                display_summary(summary_sentences.split("."))
-                    #output = pipe(sentence)
-                    #st.markdown(output["summary_text"])
 if __name__ == "__main__":

 import os
 from typing import AnyStr
 import nltk
 import streamlit as st
 from transformers import pipeline
 def main() -> None:
             text = file.read()
         return text
     def join_sentences(sentences: list) -> str:
         return " ".join([sentence for sentence in sentences])
     pipe = create_pipeline()
+    if "target_text" not in st.session_state:
+        st.session_state.target_text = ""
+    if "sentence_lenght" not in st.session_state:
+        st.session_state.sentence_length = 15
+    if "sample_choice" not in st.session_state:
+        st.session_state.sentence_length = ""
+    st.header("Input")
+    sample_choice = st.selectbox(
+        label="Select a sample:",
+        options=get_list_files()
+    )
+    st.session_state.target_text = fetch_file_content(sample_choice)
+    target_text_input = st.text_area(
+        value=st.session_state.target_text,
+        label="Paste your own Term Of Service:",
+        height=240
+    )
+    summarize_button = st.button(label="Try it!")
     if summarize_button:
         if target_text_input is not "":
+            summary_sentences = []
             with st.spinner("Summarizing in progress..."):
                 sentences = split_sentences_by_token_length(nltk.sent_tokenize(target_text_input), 600)
                 for sentence in sentences:
+                    output = pipe(sentence)
+                    summary = output["summary_text"]
+                    summary_sentences.append(summary.split("."))
+                display_summary(summary_sentences)
 if __name__ == "__main__":