| import streamlit as st |
| import torch |
| import os |
| import re |
| import json |
| from transformers import AutoTokenizer, AutoModelForTokenClassification |
| from sentence_transformers import SentenceTransformer, util |
| from collections import defaultdict |
| import os |
|
|
| MODEL_PATH = "inesctec/Citilink-XLMR-Anonymization-pt" |
| MODEL_REL_PATH = "inesctec/Citilink-mpnet-Entity-Linker-pt" |
|
|
| st.set_page_config( |
| page_title="️ Text Anonymization Demo", |
| page_icon="🛡️", |
| layout="wide", |
| initial_sidebar_state="expanded" |
| ) |
|
|
| st.markdown(""" |
| <style> |
| |
| /* 1. Elimina a capacidade de arrastar/redimensionar a barra */ |
| [data-testid="stSidebarResizer"] { |
| display: none !important; |
| } |
| |
| /* 2. Garante que a barra tem uma largura fixa apenas enquanto aberta */ |
| /* Assim ela não 'dança' e o fecho continua a ser total */ |
| [data-testid="stSidebar"][aria-expanded="true"] { |
| min-width: 320px; |
| max-width: 320px; |
| } |
| |
| /* 3. Ajusta o conteúdo principal para colar à esquerda quando fechada */ |
| [data-testid="stMain"] { |
| margin-left: 0px; |
| } |
| .main-header { |
| font-size: 2.5rem; |
| font-weight: bold; |
| color: #e63946; |
| text-align: center; |
| margin-bottom: 1rem; |
| } |
| .anon-box { |
| padding: 1.5rem; |
| margin: 0.5rem 0; |
| border-radius: 0.5rem; |
| border-left: 5px solid #1d3557; |
| background-color: #f8f9fa; |
| font-family: 'Courier New', Courier, monospace; |
| line-height: 1.6; |
| color: #1e1e1e; |
| } |
| @media (prefers-color-scheme: dark) { |
| .anon-box { background-color: #1e212b; color: #e0e0e0; border-left: 5px solid #a8dadc; } |
| } |
| .entity-tag { |
| background-color: #e9ecef; |
| padding: 2px 6px; |
| border-radius: 4px; |
| font-weight: bold; |
| color: #1d3557; |
| } |
| .metric-box { |
| background-color: #f1faee; |
| padding: 1rem; |
| border-radius: 0.5rem; |
| text-align: center; |
| border: 1px solid #a8dadc; |
| } |
| |
| .small-metric-container { |
| display: flex; |
| justify-content: space-between; |
| gap: 10px; |
| margin-top: 10px; /* Espaço logo abaixo do botão */ |
| } |
| .small-metric-box { |
| flex: 1; |
| background-color: #f8f9fa; |
| border-radius: 6px; |
| padding: 5px; |
| text-align: center; |
| border: 1px solid #dee2e6; |
| } |
| .metric-label { |
| font-size: 0.65rem; |
| color: #6c757d; |
| text-transform: uppercase; |
| font-weight: bold; |
| } |
| .metric-value { |
| font-size: 1rem; |
| color: #1d3557; |
| font-weight: bold; |
| } |
| |
| .result-window { |
| height: 450px; |
| overflow-y: auto; |
| padding: 1rem; |
| border-radius: 8px; |
| border: 1px solid #dee2e6; |
| background-color: #ffffff; |
| font-family: 'Courier New', Courier, monospace; |
| font-size: 0.85rem; |
| line-height: 1.5; |
| } |
| @media (prefers-color-scheme: dark) { |
| .result-window { background-color: #1e212b; color: #e0e0e0; border: 1px solid #444; } |
| |
| .browser-window { |
| height: 500px; |
| overflow-y: auto; |
| padding: 15px; |
| border-radius: 0px 0px 8px 8px; /* Arredondado apenas em baixo */ |
| border: 1px solid #d1d5db; |
| background-color: #ffffff; |
| font-family: 'Consolas', 'Monaco', monospace; |
| font-size: 0.9rem; |
| user-select: text; /* Garante que o utilizador pode selecionar o texto */ |
| } |
| .browser-header { |
| background-color: #f1f5f9; |
| padding: 5px 15px; |
| border: 1px solid #d1d5db; |
| border-bottom: none; |
| border-radius: 8px 8px 0px 0px; |
| font-size: 0.75rem; |
| font-weight: bold; |
| color: #475569; |
| display: flex; |
| align-items: center; |
| gap: 8px; |
| } |
| .dot { height: 10px; width: 10px; border-radius: 50%; display: inline-block; } |
| } |
| /* Estilo para a área de conteúdo das Tabs */ |
| /* Janela de Texto com fundo #262730 */ |
| .tab-window { |
| /* Reduzimos para alinhar com a caixa de estatísticas da esquerda */ |
| height: 495px; |
| |
| overflow-y: auto; |
| padding: 20px; |
| border: 1px solid #444; |
| border-radius: 0px 0px 8px 8px; |
| background-color: #262730 !important; |
| font-family: 'Consolas', 'Monaco', monospace; |
| font-size: 0.95rem; |
| line-height: 1.6; |
| user-select: text; |
| color: #efefef !important; |
| margin-bottom: 20px; |
| } |
| |
| /* Ajuste do sombreado das TAGS para o modo escuro */ |
| .entity-highlight { |
| background-color: #3d3f4b; /* Cinza um pouco mais claro que o fundo */ |
| color: #a8dadc; /* Texto da tag num azul ciano suave */ |
| padding: 2px 6px; |
| border-radius: 4px; |
| border: 1px solid #555; |
| display: inline-block; |
| line-height: 1.2; |
| font-weight: bold; |
| } |
| |
| /* Estilização das Tabs (Abas) */ |
| .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p { |
| color: #9ca3af !important; /* Um cinza claro para as abas "apagadas" */ |
| font-size: 1rem; |
| font-weight: bold; |
| transition: color 0.3s ease; |
| } |
| |
| /* 2. COR DAS ABAS QUANDO ESTÃO SELECIONADAS (ATIVAS) */ |
| /* Aqui mudamos para o Azul que pediste anteriormente */ |
| .stTabs [aria-selected="true"] [data-testid="stMarkdownContainer"] p { |
| color: #a8dadc !important; /* Um azul ciano/claro para brilhar no dark mode */ |
| } |
| |
| /* 3. A LINHA (BARRA) QUE FICA POR BAIXO DA ABA SELECIONADA */ |
| .stTabs [data-baseweb="tab-highlight"] { |
| background-color: #a8dadc !important; /* Cor da linha que corre por baixo */ |
| } |
| |
| /* 4. EFEITO AO PASSAR O RATO (HOVER) */ |
| .stTabs [data-baseweb="tab"]:hover [data-testid="stMarkdownContainer"] p { |
| color: #ffffff !important; /* Fica branco ao passar o rato */ |
| } |
| |
| /* 1. Remove o espaço em branco excessivo no topo sem quebrar o botão da barra lateral */ |
| .block-container { |
| padding-top: 1.5rem !important; |
| padding-bottom: 0rem !important; |
| } |
| |
| /* 2. Esconde o fundo e a decoração do header, mas MANTÉM o botão de abrir/fechar */ |
| header[data-testid="stHeader"] { |
| background: transparent !important; |
| color: transparent !important; |
| } |
| |
| /* 3. Garante que o botão da barra lateral (Chevron) é visível mesmo com header transparente */ |
| [data-testid="collapsedControl"] { |
| color: #bdbbbb !important; /* Mesma cor do seu título */ |
| visibility: visible !important; |
| display: flex !important; |
| } |
| |
| /* 4. Elimina a capacidade de arrastar a largura da barra */ |
| [data-testid="stSidebarResizer"] { |
| display: none !important; |
| } |
| |
| /* 5. Largura fixa da barra lateral */ |
| [data-testid="stSidebar"][aria-expanded="true"] { |
| min-width: 320px; |
| max-width: 320px; |
| } |
| |
| /* Ajusta a margem do título principal */ |
| .main-title { |
| margin-top: -45px !important; |
| position: relative; |
| } |
| |
| .streamlit-expanderHeader { |
| background-color: #1e212b !important; |
| border: 1px solid #444 !important; |
| border-radius: 8px !important; |
| } |
| html, body { |
| overflow: hidden !important; |
| height: 100%; |
| } |
| |
| /* 2. Garante que o container principal do Streamlit retém o scroll */ |
| /* Isso permite que a barra de scroll que vês seja a da App e não a da Web */ |
| [data-testid="stMainViewContainer"] { |
| overflow-y: auto !important; |
| } |
| |
| /* Opcional: Se quiseres que a barra do Streamlit seja mais discreta/fina */ |
| [data-testid="stMainViewContainer"]::-webkit-scrollbar { |
| width: 8px; |
| } |
| [data-testid="stMainViewContainer"]::-webkit-scrollbar-thumb { |
| background: #444; |
| border-radius: 10px; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
|
|
| @st.cache_resource |
| def load_models(): |
| try: |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, add_prefix_space=True) |
| model_ner = AutoModelForTokenClassification.from_pretrained(MODEL_PATH) |
| rel_model = SentenceTransformer(MODEL_REL_PATH) |
| return tokenizer, model_ner, rel_model, None |
| except Exception as e: |
| return None, None, None, str(e) |
|
|
|
|
| def process_anonymization(text, threshold, tokenizer, model_ner, rel_model): |
| if not text.strip(): |
| return "Por favor, insira um texto.", {} |
|
|
| id2label = model_ner.config.id2label |
| inputs = tokenizer( |
| text, |
| truncation=True, |
| max_length=512, |
| stride=164, |
| return_overflowing_tokens=True, |
| return_offsets_mapping=True, |
| padding=True, |
| return_tensors="pt" |
| ) |
|
|
| all_predictions = [] |
| offset_mapping_all = inputs.pop("offset_mapping") |
| overflow_to_sample = inputs.pop("overflow_to_sample_mapping") |
| input_ids_all = inputs["input_ids"] |
|
|
| with torch.no_grad(): |
| logits = model_ner(input_ids=input_ids_all).logits |
| all_predictions = torch.argmax(logits, dim=2).tolist() |
|
|
| entidades_brutas = [] |
| id2label = model_ner.config.id2label |
| SPACE_PREFIXES = [" ", "▁", "Ġ"] |
|
|
| for window_idx, (predictions, offsets) in enumerate(zip(all_predictions, offset_mapping_all)): |
| temp_entity = {"tokens": [], "start": None, "end": None, "label": None} |
|
|
| for idx, (pred_id, offset) in enumerate(zip(predictions, offsets)): |
| label_name = id2label[pred_id] |
| start_char, end_char = int(offset[0]), int(offset[1]) |
| if start_char == end_char: continue |
|
|
| tag_full = label_name.split('-', 1)[1] if '-' in label_name else None |
| tag_clean = tag_full.replace("PERSONAL-", "") if tag_full else None |
|
|
| token_id = input_ids_all[window_idx][idx].item() |
| token = tokenizer.convert_ids_to_tokens(token_id) |
| comeca_nova_palavra = any(token.startswith(p) for p in SPACE_PREFIXES) |
|
|
| if label_name.startswith("B-"): |
|
|
| if temp_entity["label"]: |
|
|
| chars_to_remove = ".,;:!?" |
| if temp_entity["label"] == "PERSONAL-PositionDepartment": |
| chars_to_remove = ",;:!?" |
|
|
| while (temp_entity["end"] - temp_entity["start"]) > 0 and text[ |
| temp_entity["end"] - 1] in chars_to_remove: |
| temp_entity["end"] -= 1 |
| entidades_brutas.append(temp_entity) |
|
|
| temp_entity = {"start": start_char, "end": end_char, "label": tag_full, "tokens": [token]} |
|
|
| elif label_name.startswith("I-") and temp_entity["label"] == tag_full: |
| temp_entity["tokens"].append(token) |
| temp_entity["end"] = end_char |
|
|
| elif not comeca_nova_palavra and temp_entity["label"] is not None: |
| temp_entity["tokens"].append(token) |
| temp_entity["end"] = end_char |
|
|
| else: |
| if temp_entity["label"]: |
| chars_to_remove = ".,;:!?" |
| if temp_entity["label"] == "PERSONAL-PositionDepartment": |
| chars_to_remove = ",;:!?" |
|
|
| while (temp_entity["end"] - temp_entity["start"]) > 0 and text[ |
| temp_entity["end"] - 1] in chars_to_remove: |
| temp_entity["end"] -= 1 |
| entidades_brutas.append(temp_entity) |
| temp_entity = {"tokens": [], "start": None, "end": None, "label": None} |
|
|
| entidades_brutas.sort(key=lambda x: x["start"]) |
| entidades_finais = [] |
|
|
| for atual in entidades_brutas: |
| if not entidades_finais: |
| entidades_finais.append(atual) |
| continue |
|
|
| ultima = entidades_finais[-1] |
|
|
| distancia = atual["start"] - ultima["end"] |
|
|
| if distancia <= 1 and atual["label"] == ultima["label"]: |
|
|
| ultima["end"] = atual["end"] |
|
|
| ultima["tokens"].extend(atual["tokens"]) |
| else: |
|
|
| adicionar = True |
| for i, selecionada in enumerate(entidades_finais): |
| interseccao = max(0, |
| min(atual["end"], selecionada["end"]) - max(atual["start"], selecionada["start"])) |
| if interseccao > 0: |
| |
| if (atual["end"] - atual["start"]) > (selecionada["end"] - selecionada["start"]): |
| entidades_finais[i] = atual |
| adicionar = False |
| break |
| if adicionar: |
| entidades_finais.append(atual) |
|
|
| labels_modelo = ["Name", "Address", "Company", "Vehicle", "PositionDepartment"] |
| labels_com_id = ["Name", "AdministrativeInformation", "PositionDepartment", "Address", "PersonalDocument", |
| "Company", |
| "LicensePlate", "Vehicle"] |
|
|
| known_entities = {} |
| known_embeddings = {} |
| id_counters = defaultdict(int) |
|
|
| entidades_finais.sort(key=lambda x: x["start"]) |
| for ent in entidades_finais: |
| tag_limpa = ent["label"].replace("PERSONAL-", "") |
| texto_original = text[ent["start"]:ent["end"]].strip() |
| texto_key = texto_original.lower() |
| assigned_id = None |
|
|
| if tag_limpa in labels_com_id: |
| if (tag_limpa, texto_key) in known_entities: |
| assigned_id = known_entities[(tag_limpa, texto_key)] |
| elif tag_limpa in labels_modelo: |
| emb_atual = rel_model.encode(texto_key, convert_to_tensor=True) |
| best_prob, best_match_id = 0.0, None |
| candidatos = [(tk, tid) for (lbl, tk), tid in known_entities.items() if lbl == tag_limpa] |
|
|
| for prev_text_key, prev_id in candidatos: |
| emb_prev = known_embeddings.get(prev_text_key) |
| if emb_prev is None: |
| emb_prev = rel_model.encode(prev_text_key, convert_to_tensor=True) |
| known_embeddings[prev_text_key] = emb_prev |
|
|
| score = util.cos_sim(emb_atual, emb_prev).item() |
| if score > best_prob: |
| best_prob, best_match_id = score, prev_id |
|
|
| if best_prob > threshold: assigned_id = best_match_id |
| known_embeddings[texto_key] = emb_atual |
|
|
| if assigned_id is None: |
| id_counters[tag_limpa] += 1 |
| assigned_id = id_counters[tag_limpa] |
| known_entities[(tag_limpa, texto_key)] = assigned_id |
|
|
| ent["entity_id"] = assigned_id |
|
|
| |
| entidades_para_substituir = sorted(entidades_finais, key=lambda x: x["start"], reverse=True) |
| texto_anon = text |
| relatorio_json = [] |
|
|
| for ent in entidades_para_substituir: |
| tag_limpa = ent["label"].replace("PERSONAL-", "") |
| id_part = f"-{ent['entity_id']}" if ent.get('entity_id') else "" |
| texto_original = text[ent["start"]:ent["end"]].strip() |
|
|
| |
| relatorio_json.append({ |
| "category": ent["label"], |
| "text": texto_original, |
| "start": ent["start"], |
| "end": ent["end"], |
| "id": ent.get("entity_id") |
| }) |
|
|
| placeholder = f' <span class="entity-highlight"><b><{tag_limpa}{id_part}></b></span> ' |
| texto_anon = texto_anon[:ent["start"]] + placeholder + texto_anon[ent["end"]:] |
|
|
| relatorio_json.reverse() |
|
|
| return re.sub(r' +', ' ', texto_anon).strip(), relatorio_json |
|
|
|
|
| @st.cache_data |
| def load_example_texts(): |
| json_path = os.path.join(os.path.dirname(__file__), 'example_text.json') |
| try: |
| with open(json_path, 'r', encoding='utf-8') as f: |
| return json.load(f) |
| except Exception: |
| return {"Custom Text": "", "1º Portuguese Meeting Minute": "", "2º Portuguese Meeting Minute": "", |
| "3º Portuguese Meeting Minute": ""} |
|
|
|
|
| def main(): |
| st.markdown( |
| '<p style="font-size: 60px; font-weight: bold; color: #bdbbbb; text-align: center; margin-bottom: 10px;">🛡️ PID: Text Anonymization Demo</p>', |
| unsafe_allow_html=True) |
| st.markdown(""" |
| <p style="text-align: center; color: #666;"> |
| Automatic text anonymization for city council minutes and administrative documents |
| </p> |
| """, unsafe_allow_html=True) |
|
|
| tokenizer, model_ner, rel_model, error = load_models() |
|
|
| if error: |
| st.error(f"Erro ao carregar modelos: {error}") |
| st.stop() |
|
|
| st.sidebar.header("⚙️ Configuration") |
|
|
| st.sidebar.write("---") |
|
|
| example_texts = load_example_texts() |
|
|
| selected_example = st.sidebar.selectbox( |
| "Choose an example:", |
| options=list(example_texts.keys()), |
| index=0 |
| ) |
|
|
| st.sidebar.markdown("<br><br>", unsafe_allow_html=True) |
|
|
| threshold = st.sidebar.slider( |
| "Entity Linking Threshold", |
| min_value=0.0, max_value=1.0, value=0.80, step=0.05, |
| help="Higher threshold = more strict. Use a higher value to ensure only very similar entities get the same ID, preventing different people from being grouped together." |
| ) |
|
|
| st.sidebar.markdown("---") |
|
|
| st.sidebar.markdown("### 📊 About") |
|
|
| st.sidebar.info(f""" |
| - **Anonymization (NER)** uses Token Classification to identify and mask sensitive information (PID) in administrative documents. |
| - **Model**: XLM-RoBERTa fine-tuned for Named Entity Recognition. |
| - **Languages**: Portuguese (pt-pt). |
| - **Method**: Sequence Labeling with Bi-Encoder Entity Linking. |
| """) |
|
|
| st.sidebar.markdown("") |
| st.sidebar.markdown("### 🔗 Resources") |
| st.sidebar.markdown(""" |
| - [📖 Model Card](https://huggingface.co/liaad/Citilink-XLMR-Anonymization-pt) (Anonymization) |
| - [📖 Model Card](https://huggingface.co/liaad/Citilink-mpnet-Entity-Linker-pt) (Entity Linking) |
| - [💾 GitHub Repository](https://github.com/) |
| """) |
|
|
| st.write("") |
|
|
| with st.expander("🎯 How it works", expanded=False): |
| st.markdown(""" |
| The anonymization process is powered by two specialized AI models working in sequence: |
| |
| 1. **The Detector (NER):** A model designed to extract personal information entities across multiple categories, identifying sensitive data within the document's context. |
| 2. **The Linker (Entity Linking):** Understands when different words refer to the same entity. For example, it knows that *"João Silva"* and *"Sr. Silva"* are the same person, assigning them a consistent ID (e.g., `<Name-1>`). |
| |
| """) |
|
|
| st.markdown(""" |
| <div style="background-color: #262730; padding: 20px; border-radius: 10px; border-left: 5px solid #3b82f6; margin-bottom: 25px;"> |
| <p style="color: #3b82f6; font-weight: bold; margin-top: 0; margin-bottom: 5px;">Supported Entities:</p> |
| <p style="font-size: 0.75em; color: #94a3b8; margin-bottom: 15px;">Note: Entities marked with <span style="color: #60a5fa; font-weight: bold;">(ID)</span> support consistent linking across the document.</p> |
| <div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; font-size: 0.85em; color: white;"> |
| <div> |
| • Name <b style="color: #60a5fa;">(ID)</b><br>• Admin. Document <b style="color: #60a5fa;">(ID)</b><br>• Position/Department <b style="color: #60a5fa;">(ID)</b><br>• Address <b style="color: #60a5fa;">(ID)</b><br>• Date |
| </div> |
| <div> |
| • Location<br>• Personal Document <b style="color: #60a5fa;">(ID)</b><br>• Company <b style="color: #60a5fa;">(ID)</b><br>• Artistic Activity |
| </div> |
| <div> |
| • Degree<br>• Time<br>• License <b style="color: #60a5fa;">(ID)</b><br>• Job |
| </div> |
| <div> |
| • Vehicle <b style="color: #60a5fa;">(ID)</b><br>• Faculty<br>• Family Relationship<br>• Other |
| </div> |
| </div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| st.markdown("**INPUT:**") |
| st.markdown(''' |
| <div class="tab-window" style="height: auto; padding: 15px; border-top: 4px solid #666; margin-bottom: 10px;"> |
| O interessado Dr. João Silva submeteu o processo administrativo 5597/2023 no dia 20/05/2023, relativo ao imóvel localizado na Rua das Flores n.º 10 conforme o solicitado. |
| </div> |
| ''', unsafe_allow_html=True) |
|
|
| st.markdown("**OUTPUT:**") |
| st.markdown(f''' |
| <div class="tab-window" style="height: auto; padding: 15px; border-top: 4px solid #a8dadc;"> |
| O interessado <span class="entity-highlight"><b><PositionDepartment-1></b></span> <span class="entity-highlight"><b><Name-1></b></span> |
| submeteu o processo administrativo <span class="entity-highlight"><b><AdministrativeInformation-1></b></span> |
| no dia <span class="entity-highlight"><b><Date></b></span>, |
| relativo ao imóvel localizado na <span class="entity-highlight"><b><Address></b></span> conforme o solicitado. |
| </div> |
| ''', unsafe_allow_html=True) |
|
|
| st.write("") |
|
|
| col_ex_in, col_ex_out = st.columns(2) |
|
|
| col1, col2 = st.columns([1, 1]) |
|
|
| with col1: |
| st.subheader("📝 Input Document") |
|
|
| input_text = st.text_area( |
| "Enter yout text here:", |
| value=example_texts[selected_example], |
| height=400, |
| key=f"input_area_{selected_example}", |
| placeholder="Paste your document text here..." |
| ) |
|
|
| st.markdown(""" |
| <style> |
| /* Remove o puxador de redimensionamento de todas as text areas */ |
| div[data-testid="stTextArea"] textarea { |
| resize: none; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| process_btn = st.button("🔍 Anonymize", type="primary", use_container_width=True) |
|
|
| if process_btn and input_text: |
| with st.spinner("Processing..."): |
| texto_final, relatorio = process_anonymization(input_text, threshold, tokenizer, model_ner, rel_model) |
|
|
| total_entidades = len(relatorio) |
|
|
| tipos_unicos = len(set(ent["category"] for ent in relatorio)) |
|
|
| st.markdown(f""" |
| <div class="small-metric-container"> |
| <div class="small-metric-box"> |
| <div class="metric-label">Entities</div> |
| <div class="metric-value">{total_entidades}</div> |
| </div> |
| <div class="small-metric-box"> |
| <div class="metric-label">Categories</div> |
| <div class="metric-value">{tipos_unicos}</div> |
| </div> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| with col2: |
| st.subheader("🔒 Anonymization Results") |
|
|
| if process_btn and input_text: |
| tab_text, tab_entities = st.tabs(["📄 Anonymized Text", "🔍 Extracted Entities"]) |
|
|
| with tab_text: |
|
|
| st.markdown(f''' |
| <div class="tab-window" style="border-top: 4px solid #2162bf;"> |
| {texto_final} |
| </div> |
| ''', unsafe_allow_html=True) |
|
|
| with tab_entities: |
|
|
| agrupado = {} |
| for item in relatorio: |
| cat_limpa = item["category"].replace("PERSONAL-", "") |
| if cat_limpa not in agrupado: |
| agrupado[cat_limpa] = [] |
| agrupado[cat_limpa].append(item["text"]) |
|
|
| html_content = "" |
| for cat, lista in agrupado.items(): |
| count = len(lista) |
| html_content += f"<div style='margin-bottom:20px;'>" |
| html_content += f"<b style='color:#a8dadc; font-size:1.1rem;'>{cat}</b> " |
| html_content += f"<span style='color:#666;'>({count})</span><br>" |
|
|
| for item in lista: |
| html_content += f"<div style='color:#ffffff; margin-left:15px; margin-top:3px;'>- {item}</div>" |
|
|
| html_content += f"</div>" |
|
|
| st.markdown(f''' |
| <div class="tab-window" style="border-top: 4px solid #2162bf;"> |
| {html_content if html_content else "No entities found."} |
| </div> |
| ''', unsafe_allow_html=True) |
|
|
| with st.expander("📋 Full Entity Report (JSON)"): |
|
|
| download_data = { |
| "full_text": input_text, |
| "personal_info": relatorio |
| } |
|
|
| json_string = json.dumps(download_data, indent=4, ensure_ascii=False) |
|
|
| st.download_button( |
| label="📥 Download JSON Report", |
| data=json_string, |
| file_name="anonymization_report.json", |
| mime="application/json", |
| use_container_width=True |
| ) |
|
|
| st.json(relatorio) |
| else: |
| st.markdown(f''' |
| <div style="margin-top: 30px;"> |
| </div> |
| ''', unsafe_allow_html=True) |
| st.info("Please process a document on the left to view the results.") |
| st.markdown("---") |
|
|
|
|
| if __name__ == "__main__": |
| main() |