Spaces:
Sleeping
Sleeping
| """ | |
| This code was adapted from https://huggingface.co/spaces/HugoLaurencon/examples_before_after_pii/ | |
| """ | |
| import streamlit as st | |
| import json | |
| import pandas as pd | |
| st.set_page_config(page_title="PII Visualization", layout="wide") | |
| st.title("PII Visualization") | |
| tags = ["KEY", "IP_ADDRESS", "EMAIL"] | |
| types = ["False positives", "False negatives"] | |
| matches = {"False negatives": "fn", "False positives": "fp"} | |
| def load_data(): | |
| with open(f"data/{chosen_tag.lower()}_detections_{matches[chosen_type]}.json", "r") as f: | |
| samples = json.load(f) | |
| return samples | |
| col1, col2, col3 = st.columns([1, 1, 4]) | |
| with col1: | |
| chosen_type = st.selectbox( | |
| label="Select the type of detections", | |
| options=types, | |
| index=0) | |
| with col2: | |
| chosen_tag = st.selectbox( | |
| label="Select the PII TAG", | |
| options=tags, | |
| index=0) | |
| samples = load_data() | |
| max_docs = len(samples) | |
| col1, col2 = st.columns([2, 4]) | |
| with col1: | |
| index_example = st.number_input(f"Index of the chosen example from the existing {max_docs}", min_value=0, max_value=max_docs-1, value=0, step=1) | |
| st.write("Scroll down to visualize PII detections highlighted in yellow, we split the text at the start and end of the key to highlight it.") | |
| detection = samples[index_example] | |
| delimiter = f"PI:{matches[chosen_type].upper()}" | |
| count = detection.count(delimiter) | |
| st.subheader(f"{count} {chosen_type.lower()} for {chosen_tag} tag in example {index_example}:") | |
| subparts = [] | |
| advance, found = 0, 0 | |
| last_part = detection | |
| while found < count: | |
| start = advance + last_part.index(delimiter) | |
| end = advance + last_part.index("END_PI")+ 6 | |
| st.code(detection[advance:start]) | |
| st.markdown("<span style=\"background-color: #FFFF00\">"+detection[start:end]+"</span>", unsafe_allow_html=True) | |
| last_part = detection[end:] | |
| advance = end | |
| found += 1 | |
| st.code(last_part) | |