Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -112,28 +112,7 @@ def create_masked_text(input_text, entities, mask_dict):
|
|
| 112 |
masked_text = masked_text[:entity['start']] + mask_dict[entity['word']] + masked_text[entity['end']:]
|
| 113 |
return masked_text
|
| 114 |
|
| 115 |
-
|
| 116 |
-
if file_type == "txt":
|
| 117 |
-
return masked_text.encode("utf-8")
|
| 118 |
-
elif file_type == "pdf":
|
| 119 |
-
pdf_buffer = io.BytesIO()
|
| 120 |
-
pdf = FPDF()
|
| 121 |
-
pdf.add_page()
|
| 122 |
-
pdf.set_font("Arial", size=12)
|
| 123 |
-
pdf.multi_cell(0, 10, masked_text)
|
| 124 |
-
pdf.output(pdf_buffer)
|
| 125 |
-
pdf_buffer.seek(0)
|
| 126 |
-
return pdf_buffer.getvalue()
|
| 127 |
-
elif file_type == "docx":
|
| 128 |
-
doc = docx.Document()
|
| 129 |
-
doc.add_paragraph(masked_text)
|
| 130 |
-
buffer = io.BytesIO()
|
| 131 |
-
doc.save(buffer)
|
| 132 |
-
buffer.seek(0)
|
| 133 |
-
return buffer.getvalue()
|
| 134 |
-
else:
|
| 135 |
-
st.error("Unsupported file type for export")
|
| 136 |
-
return None
|
| 137 |
Run_Button = st.button("Run")
|
| 138 |
|
| 139 |
if Run_Button and input_text:
|
|
@@ -155,6 +134,7 @@ if Run_Button and input_text:
|
|
| 155 |
entity['end'] += offset
|
| 156 |
|
| 157 |
all_outputs.extend(output)
|
|
|
|
| 158 |
|
| 159 |
# Combine entities
|
| 160 |
|
|
@@ -171,7 +151,7 @@ if Run_Button and input_text:
|
|
| 171 |
entity['masked_word'] = mask_dict.get(entity['word'], entity['word'])
|
| 172 |
else:
|
| 173 |
entity['masked_word'] = entity['word']
|
| 174 |
-
|
| 175 |
#df = pd.DataFrame.from_dict(output_comb)
|
| 176 |
#cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
|
| 177 |
#df_final = df[cols_to_keep].loc[:,~df.columns.duplicated()].copy()
|
|
@@ -189,21 +169,8 @@ if Run_Button and input_text:
|
|
| 189 |
else:
|
| 190 |
label = entity['entity_group']
|
| 191 |
spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
|
| 192 |
-
# Custom CSS to prevent label overlap
|
| 193 |
-
custom_css = """
|
| 194 |
-
<style>
|
| 195 |
-
.entity-label {
|
| 196 |
-
font-size: 0.7em;
|
| 197 |
-
line-height: 1;
|
| 198 |
-
padding: 0.25em;
|
| 199 |
-
border-radius: 0.25em;
|
| 200 |
-
top: -1.5em;
|
| 201 |
-
position: relative;
|
| 202 |
-
}
|
| 203 |
-
</style>
|
| 204 |
-
"""
|
| 205 |
|
| 206 |
-
html =
|
| 207 |
st.write(html, unsafe_allow_html=True)
|
| 208 |
|
| 209 |
st.subheader("Masking Dictionary")
|
|
|
|
| 112 |
masked_text = masked_text[:entity['start']] + mask_dict[entity['word']] + masked_text[entity['end']:]
|
| 113 |
return masked_text
|
| 114 |
|
| 115 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
Run_Button = st.button("Run")
|
| 117 |
|
| 118 |
if Run_Button and input_text:
|
|
|
|
| 134 |
entity['end'] += offset
|
| 135 |
|
| 136 |
all_outputs.extend(output)
|
| 137 |
+
|
| 138 |
|
| 139 |
# Combine entities
|
| 140 |
|
|
|
|
| 151 |
entity['masked_word'] = mask_dict.get(entity['word'], entity['word'])
|
| 152 |
else:
|
| 153 |
entity['masked_word'] = entity['word']
|
| 154 |
+
print("output_comb", output_comb)
|
| 155 |
#df = pd.DataFrame.from_dict(output_comb)
|
| 156 |
#cols_to_keep = ['word', 'entity_group', 'score', 'start', 'end']
|
| 157 |
#df_final = df[cols_to_keep].loc[:,~df.columns.duplicated()].copy()
|
|
|
|
| 169 |
else:
|
| 170 |
label = entity['entity_group']
|
| 171 |
spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
+
html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
|
| 174 |
st.write(html, unsafe_allow_html=True)
|
| 175 |
|
| 176 |
st.subheader("Masking Dictionary")
|