Cpp4App_test / SEM /retention_pp_processing.py
HaochenGong
create
f1554a2
from types_pp_processing import cleanHtml
import spacy
nlp = spacy.load('en_core_web_sm')
def retention_process(txt):
text = ""
result = cleanHtml(txt)
for sen in result:
text += sen
time = ""
doc = nlp(text)
flag = 0
for token in doc:
if flag == 1:
if token.text == "year" or token.text == "month" or token.text == "week" or token.text == "day" or token.text == "hour":
time += " " + token.text
break
else:
flag = 0
if token.pos_ == "NUM":
flag = 1
time = token.text
if time == "":
time = "The privacy policy does not specify how long the data will be retained"
return time,text