Spaces:
Runtime error
Runtime error
Commit
·
87fbf70
1
Parent(s):
69f90b2
working on text splitting
Browse files
app.py
CHANGED
|
@@ -97,29 +97,33 @@ def main() -> None:
|
|
| 97 |
# with st.spinner("Summarizing in progress..."):
|
| 98 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
| 99 |
|
| 100 |
-
def
|
| 101 |
-
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
| 106 |
for sentence in sentences:
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
else:
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
| 116 |
|
| 117 |
pipe = create_pipeline()
|
| 118 |
|
| 119 |
if summarize_button:
|
| 120 |
if target_text_input is not "":
|
| 121 |
with st.spinner("Summarizing in progress..."):
|
| 122 |
-
sentences =
|
| 123 |
for sentence in sentences:
|
| 124 |
st.text(sentence)
|
| 125 |
#output = pipe(sentence)
|
|
|
|
| 97 |
# with st.spinner("Summarizing in progress..."):
|
| 98 |
# return tuple(summarizer.abstractive_summary(list(summary_sentence)))
|
| 99 |
|
| 100 |
+
def join_sentences(sentences: list) -> str:
|
| 101 |
+
return " ".join([sentence for sentence in sentences])
|
| 102 |
|
| 103 |
+
def split_sentences_by_token_length(sentences: list, split_token_length: int) -> list:
|
| 104 |
+
accumulated_lists = []
|
| 105 |
+
result_list = []
|
| 106 |
+
cumulative_token_length = 0
|
| 107 |
for sentence in sentences:
|
| 108 |
+
token_list = [token for token in nltk.word_tokenize(sentence) if token not in ['.']]
|
| 109 |
+
token_length = len(token_list)
|
| 110 |
+
if token_length + cumulative_token_length > split_token_length and result_list:
|
| 111 |
+
accumulated_lists.append(join_sentences(result_list))
|
| 112 |
+
result_list = [sentence]
|
| 113 |
+
cumulative_token_length = token_length
|
| 114 |
else:
|
| 115 |
+
result_list.append(sentence)
|
| 116 |
+
cumulative_token_length += token_length
|
| 117 |
+
if result_list:
|
| 118 |
+
accumulated_lists.append(join_sentences(result_list))
|
| 119 |
+
return accumulated_lists
|
| 120 |
|
| 121 |
pipe = create_pipeline()
|
| 122 |
|
| 123 |
if summarize_button:
|
| 124 |
if target_text_input is not "":
|
| 125 |
with st.spinner("Summarizing in progress..."):
|
| 126 |
+
sentences = split_sentences_by_token_length(target_text_input)
|
| 127 |
for sentence in sentences:
|
| 128 |
st.text(sentence)
|
| 129 |
#output = pipe(sentence)
|