Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -178,8 +178,7 @@ if uploaded_file is not None:
|
|
| 178 |
if pdf_document:
|
| 179 |
redacted_text = []
|
| 180 |
for pg in pdf_document:
|
| 181 |
-
text = pg.get_text(
|
| 182 |
-
st.text_area(pg.get_text())
|
| 183 |
sentences = sentence_tokenize(text)
|
| 184 |
for sent in sentences:
|
| 185 |
entities,words_out = extract_entities(sent)
|
|
@@ -189,13 +188,14 @@ if uploaded_file is not None:
|
|
| 189 |
new+=w.split('\n')
|
| 190 |
words_out+=bert_words
|
| 191 |
words_out = [i for i in new if len(i)>2]
|
| 192 |
-
|
| 193 |
# print(words_out)
|
| 194 |
words_out=sorted(words_out, key=len,reverse=True)
|
|
|
|
| 195 |
print(words_out)
|
| 196 |
for i in words_out:
|
| 197 |
redact_text(pg,i)
|
| 198 |
-
|
|
|
|
| 199 |
output_pdf = "output_redacted.pdf"
|
| 200 |
pdf_document.save(output_pdf)
|
| 201 |
|
|
|
|
| 178 |
if pdf_document:
|
| 179 |
redacted_text = []
|
| 180 |
for pg in pdf_document:
|
| 181 |
+
text = pg.get_text()
|
|
|
|
| 182 |
sentences = sentence_tokenize(text)
|
| 183 |
for sent in sentences:
|
| 184 |
entities,words_out = extract_entities(sent)
|
|
|
|
| 188 |
new+=w.split('\n')
|
| 189 |
words_out+=bert_words
|
| 190 |
words_out = [i for i in new if len(i)>2]
|
|
|
|
| 191 |
# print(words_out)
|
| 192 |
words_out=sorted(words_out, key=len,reverse=True)
|
| 193 |
+
redact_text+=words_out
|
| 194 |
print(words_out)
|
| 195 |
for i in words_out:
|
| 196 |
redact_text(pg,i)
|
| 197 |
+
st.text_area(pg.get_text())
|
| 198 |
+
|
| 199 |
output_pdf = "output_redacted.pdf"
|
| 200 |
pdf_document.save(output_pdf)
|
| 201 |
|