Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -105,12 +105,18 @@ def create_mask_dict(entities):
|
|
| 105 |
entity_counters[entity['entity_group']] += 1
|
| 106 |
mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
|
| 107 |
return mask_dict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
def export_masked_text(masked_text, file_type):
|
| 109 |
if file_type == "txt":
|
| 110 |
return masked_text.encode("utf-8")
|
| 111 |
elif file_type == "pdf":
|
| 112 |
pdf_buffer = io.BytesIO()
|
| 113 |
-
from fpdf import FPDF
|
| 114 |
pdf = FPDF()
|
| 115 |
pdf.add_page()
|
| 116 |
pdf.set_font("Arial", size=12)
|
|
@@ -157,7 +163,7 @@ if Run_Button and input_text:
|
|
| 157 |
# Create mask dictionary
|
| 158 |
mask_dict = create_mask_dict(output_comb)
|
| 159 |
|
| 160 |
-
masked_text =
|
| 161 |
|
| 162 |
# Apply masking and add masked_word column
|
| 163 |
for entity in output_comb:
|
|
@@ -183,20 +189,36 @@ if Run_Button and input_text:
|
|
| 183 |
else:
|
| 184 |
label = entity['entity_group']
|
| 185 |
spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
-
html = spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
|
| 188 |
st.write(html, unsafe_allow_html=True)
|
| 189 |
|
|
|
|
| 190 |
export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
)
|
| 200 |
|
| 201 |
st.subheader("Masking Dictionary")
|
| 202 |
-
st.json(mask_dict)
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
entity_counters[entity['entity_group']] += 1
|
| 106 |
mask_dict[entity['word']] = f"{entity['entity_group']}_{entity_counters[entity['entity_group']]}"
|
| 107 |
return mask_dict
|
| 108 |
+
def create_masked_text(input_text, entities, mask_dict):
|
| 109 |
+
masked_text = input_text
|
| 110 |
+
for entity in sorted(entities, key=lambda x: x['start'], reverse=True):
|
| 111 |
+
if entity['entity_group'] not in ['CARDINAL', 'EVENT']:
|
| 112 |
+
masked_text = masked_text[:entity['start']] + mask_dict[entity['word']] + masked_text[entity['end']:]
|
| 113 |
+
return masked_text
|
| 114 |
+
|
| 115 |
def export_masked_text(masked_text, file_type):
|
| 116 |
if file_type == "txt":
|
| 117 |
return masked_text.encode("utf-8")
|
| 118 |
elif file_type == "pdf":
|
| 119 |
pdf_buffer = io.BytesIO()
|
|
|
|
| 120 |
pdf = FPDF()
|
| 121 |
pdf.add_page()
|
| 122 |
pdf.set_font("Arial", size=12)
|
|
|
|
| 163 |
# Create mask dictionary
|
| 164 |
mask_dict = create_mask_dict(output_comb)
|
| 165 |
|
| 166 |
+
masked_text = create_masked_text(input_text, output_comb, mask_dict)
|
| 167 |
|
| 168 |
# Apply masking and add masked_word column
|
| 169 |
for entity in output_comb:
|
|
|
|
| 189 |
else:
|
| 190 |
label = entity['entity_group']
|
| 191 |
spacy_display["ents"].append({"start": entity["start"], "end": entity["end"], "label": label})
|
| 192 |
+
# Custom CSS to prevent label overlap
|
| 193 |
+
custom_css = """
|
| 194 |
+
<style>
|
| 195 |
+
.entity-label {
|
| 196 |
+
font-size: 0.7em;
|
| 197 |
+
line-height: 1;
|
| 198 |
+
padding: 0.25em;
|
| 199 |
+
border-radius: 0.25em;
|
| 200 |
+
top: -1.5em;
|
| 201 |
+
position: relative;
|
| 202 |
+
}
|
| 203 |
+
</style>
|
| 204 |
+
"""
|
| 205 |
|
| 206 |
+
html = custom_css + spacy.displacy.render(spacy_display, style="ent", minify=True, manual=True)
|
| 207 |
st.write(html, unsafe_allow_html=True)
|
| 208 |
|
| 209 |
+
# Download button
|
| 210 |
export_file_type = uploaded_file.type.split("/")[-1] if uploaded_file is not None else "txt"
|
| 211 |
+
masked_file_content = export_masked_text(masked_text, export_file_type)
|
| 212 |
+
if masked_file_content:
|
| 213 |
+
st.download_button(
|
| 214 |
+
label="Download Masked Text",
|
| 215 |
+
data=masked_file_content,
|
| 216 |
+
file_name=f"masked_output.{export_file_type}",
|
| 217 |
+
mime=f"application/{export_file_type}" if export_file_type != "txt" else "text/plain"
|
| 218 |
+
)
|
|
|
|
| 219 |
|
| 220 |
st.subheader("Masking Dictionary")
|
| 221 |
+
st.json(mask_dict)
|
| 222 |
+
|
| 223 |
+
st.subheader("Masked Text Preview")
|
| 224 |
+
st.text(masked_text)
|