Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline, AutoTokenizer, AutoModelForTokenClassification | |
| import json | |
| import os | |
| # Define your model ID on Hugging Face Hub | |
| # Make sure to replace 'doronpe12/log-anonymizer-heb-support' with your exact model repository name. | |
| MODEL_ID = "doronpe12/log-anonymizer-heb-support" | |
| # Path to the ner_labels.json file. | |
| # This file is still required for the app's internal logic, even if not needed by the pipeline directly. | |
| LABELS_FILE = "ner_labels.json" | |
| # --- Load Label Mappings --- | |
| # This part ensures the Gradio app knows how to interpret the NER tags. | |
| # We still need this file for the app's logic, but the pipeline itself will get | |
| # the labels from the model's config.json. | |
| try: | |
| with open(LABELS_FILE, 'r', encoding='utf-8') as f: | |
| label_mappings = json.load(f) | |
| id2label = {int(k): v for k, v in label_mappings["id2label"].items()} | |
| label2id = label_mappings["label2id"] | |
| print(f"Loaded {len(id2label)} NER labels from {LABELS_FILE}.") | |
| except FileNotFoundError: | |
| print(f"Error: {LABELS_FILE} not found. Please ensure it's uploaded to the Space.") | |
| # Fallback labels (should not be reached if the file is present) | |
| id2label = {0: "O", 1: "B-PII", 2: "I-PII"} | |
| label2id = {v: k for k, v in id2label.items()} | |
| except Exception as e: | |
| print(f"Error loading {LABELS_FILE}: {e}") | |
| id2label = {0: "O", 1: "B-PII", 2: "I-PII"} | |
| label2id = {v: k for k, v in id2label.items()} | |
| # --- Load the Model and Tokenizer --- | |
| print(f"Loading NER pipeline for model: {MODEL_ID}...") | |
| try: | |
| ner_pipeline = pipeline( | |
| "ner", | |
| model=MODEL_ID, | |
| tokenizer=MODEL_ID, | |
| aggregation_strategy="simple", | |
| # <<< 讛讘注讬讛 谞驻转专讛 讻讗谉: 讛住专谞讜 讗转 讛讗专讙讜诪谞讟 id2label | |
| ) | |
| print("NER pipeline loaded successfully.") | |
| except Exception as e: | |
| print(f"Failed to load NER pipeline: {e}") | |
| print("Please ensure the model ID is correct and accessible on Hugging Face Hub.") | |
| print("If your model is private, ensure your Space is also private.") | |
| ner_pipeline = None | |
| # --- Prediction Function for Gradio --- | |
| def predict_pii(text): | |
| if ner_pipeline is None: | |
| return {"text": text, "entities": []}, "Error: Model not loaded. Please check logs." | |
| results = ner_pipeline(text) | |
| formatted_entities = [] | |
| for entity in results: | |
| formatted_entities.append({ | |
| "start": entity['start'], | |
| "end": entity['end'], | |
| "entity": entity['entity_group'], | |
| "score": entity['score'] | |
| }) | |
| return {"text": text, "entities": formatted_entities}, "PII detection complete." | |
| # --- Gradio Interface --- | |
| examples = [ | |
| "[2024-07-30 10:30:00] INFO: User John Doe from 192.168.1.10 accessed /data/reports on web-server-01. Email: john.doe@example.com, Phone: +972-50-1234567. ID: 123456789.", | |
| "[2024-07-30 11:15:20] ERROR: Failed authentication for username admin with password 'MyS3cr3tP@ssw0rd!' from IP 203.0.113.45. API Key: sk-xyz123abc.", | |
| "[2024-07-30 12:00:00] DEBUG: System heartbeat ok. No PII here.", | |
| "[2024-07-30 13:45:10] AUDIT: Transaction IL110111522200000333999 initiated by Jane Smith for 1,500,000.50 ILS. Bank: Bank Hapoalim.", | |
| "[2024-07-30 15:00:00] INFO: 讘拽砖讛 诪-172.16.0.10 诇-api.mycompany.co.il. 住讟讟讜住: 200 OK. 砖诐 诪砖转诪砖: upwwe13.", | |
| ] | |
| iface = gr.Interface( | |
| fn=predict_pii, | |
| inputs=gr.Textbox(lines=5, placeholder="讛讻谞住 讻讗谉 砖讜专转 诇讜讙 诇讘讚讬拽转 PII...", label="砖讜专转 诇讜讙"), | |
| outputs=[ | |
| gr.HighlightedText(label="转讜爪讗讜转 讝讬讛讜讬 PII"), | |
| gr.Textbox(label="住讟讟讜住/讛讜讚注讜转") | |
| ], | |
| title="诪讜讚诇 讝讬讛讜讬 PII 讘拽讘爪讬 诇讜讙 (注讘专讬转 讜讗谞讙诇讬转)", | |
| description="讛讻谞住 砖讜专转 诇讜讙 讻讚讬 诇讝讛讜转 诪讬讚注 诪讝讛讛 讗讬砖讬转 (PII) 讻讙讜谉 讻转讜讘讜转 IP, 砖诪讜转 诪砖转诪砖, 住讬住诪讗讜转, 诪讬讬诇讬诐, 诪住驻专讬 讟诇驻讜谉, 驻专讟讬 讞砖讘讜谉 讘谞拽 讜注讜讚.", | |
| examples=examples, | |
| flagging_mode="never" # <<< 转讜拽谞讛 讛讗讝讛专讛 砖诇 Gradio | |
| ) | |
| iface.launch() | |