Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -42,7 +42,7 @@ address_recognizer = PatternRecognizer(supported_entity="ADDRESS", patterns=[add
|
|
| 42 |
|
| 43 |
# Add the custom address recognizer to the analyzer
|
| 44 |
analyzer.registry.add_recognizer(address_recognizer)
|
| 45 |
-
analyzer.get_recognizers
|
| 46 |
# Define a function to extract entities
|
| 47 |
|
| 48 |
|
|
@@ -181,18 +181,18 @@ if uploaded_file is not None:
|
|
| 181 |
text = pg.get_text()
|
| 182 |
sentences = sentence_tokenize(text)
|
| 183 |
for sent in sentences:
|
| 184 |
-
x = mask_generation(sent)
|
| 185 |
|
| 186 |
-
sent_n_q_c=[]
|
| 187 |
-
sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
|
| 188 |
-
for i in sent_n:
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
x_q = x.lower().replace('.',' ').split(' ')
|
| 192 |
-
e=[]
|
| 193 |
-
for i in x_q:
|
| 194 |
-
|
| 195 |
-
t5_words=set(sent_n_q_c).difference(set(e))
|
| 196 |
entities,words_out = extract_entities(sent)
|
| 197 |
# print("\nwords_out:",words_out)
|
| 198 |
# print("\nT5",t5_words)
|
|
@@ -202,7 +202,7 @@ if uploaded_file is not None:
|
|
| 202 |
new=[]
|
| 203 |
for w in words_out:
|
| 204 |
new+=w.split('\n')
|
| 205 |
-
words_out+=t5_words
|
| 206 |
new+=bert_words
|
| 207 |
words_out = [i for i in new if len(i)>3]
|
| 208 |
# print("\nfinal:",words_out)
|
|
|
|
| 42 |
|
| 43 |
# Add the custom address recognizer to the analyzer
|
| 44 |
analyzer.registry.add_recognizer(address_recognizer)
|
| 45 |
+
# analyzer.get_recognizers
|
| 46 |
# Define a function to extract entities
|
| 47 |
|
| 48 |
|
|
|
|
| 181 |
text = pg.get_text()
|
| 182 |
sentences = sentence_tokenize(text)
|
| 183 |
for sent in sentences:
|
| 184 |
+
# x = mask_generation(sent)
|
| 185 |
|
| 186 |
+
# sent_n_q_c=[]
|
| 187 |
+
# sent_n = list(set(sent.lower().replace('.',' ').split("\n")))
|
| 188 |
+
# for i in sent_n:
|
| 189 |
+
# for j in i.split(" "):
|
| 190 |
+
# sent_n_q_c+=j.split(',')
|
| 191 |
+
# x_q = x.lower().replace('.',' ').split(' ')
|
| 192 |
+
# e=[]
|
| 193 |
+
# for i in x_q:
|
| 194 |
+
# e+=i.split(',')
|
| 195 |
+
# t5_words=set(sent_n_q_c).difference(set(e))
|
| 196 |
entities,words_out = extract_entities(sent)
|
| 197 |
# print("\nwords_out:",words_out)
|
| 198 |
# print("\nT5",t5_words)
|
|
|
|
| 202 |
new=[]
|
| 203 |
for w in words_out:
|
| 204 |
new+=w.split('\n')
|
| 205 |
+
# words_out+=t5_words
|
| 206 |
new+=bert_words
|
| 207 |
words_out = [i for i in new if len(i)>3]
|
| 208 |
# print("\nfinal:",words_out)
|