doronpe12 commited on
Commit
62e3da0
verified
1 Parent(s): 4f67600

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -31
app.py CHANGED
@@ -4,19 +4,18 @@ import json
4
  import os
5
 
6
  # Define your model ID on Hugging Face Hub
7
- # Make sure to replace 'your-username' with your actual Hugging Face username
8
- # and 'hebrew-english-log-pii-ner-model' with the exact repository name you pushed to.
9
  MODEL_ID = "doronpe12/log-anonymizer-heb-support"
10
 
11
  # Path to the ner_labels.json file.
12
- # When deployed on Hugging Face Spaces, this file will be in the root of the repo.
13
  LABELS_FILE = "ner_labels.json"
14
 
15
  # --- Load Label Mappings ---
16
  # This part ensures the Gradio app knows how to interpret the NER tags.
 
 
17
  try:
18
- # Attempt to load ner_labels.json from the same directory as app.py
19
- # This is how it will be found on Hugging Face Spaces
20
  with open(LABELS_FILE, 'r', encoding='utf-8') as f:
21
  label_mappings = json.load(f)
22
  id2label = {int(k): v for k, v in label_mappings["id2label"].items()}
@@ -24,64 +23,51 @@ try:
24
  print(f"Loaded {len(id2label)} NER labels from {LABELS_FILE}.")
25
  except FileNotFoundError:
26
  print(f"Error: {LABELS_FILE} not found. Please ensure it's uploaded to the Space.")
27
- # Fallback if the file is not found (e.g., during local testing without the file)
28
- # In a real Space, you'd want this file to be present.
29
- id2label = {0: "O", 1: "B-PERSON", 2: "I-PERSON"} # Minimal fallback
30
- label2id = {"O": 0, "B-PERSON": 1, "I-PERSON": 2}
31
  except Exception as e:
32
  print(f"Error loading {LABELS_FILE}: {e}")
33
- # Fallback to minimal labels
34
- id2label = {0: "O", 1: "B-PERSON", 2: "I-PERSON"}
35
- label2id = {"O": 0, "B-PERSON": 1, "I-PERSON": 2}
36
 
37
 
38
  # --- Load the Model and Tokenizer ---
39
- # The pipeline will automatically download the model from MODEL_ID
40
- # if it's not already cached in the Space's environment.
41
  print(f"Loading NER pipeline for model: {MODEL_ID}...")
42
  try:
43
- # Pass id2label to the pipeline for readable output
44
  ner_pipeline = pipeline(
45
  "ner",
46
  model=MODEL_ID,
47
  tokenizer=MODEL_ID,
48
- aggregation_strategy="simple", # Aggregates B-I-I tokens into a single entity
49
- id2label=id2label # Ensures output labels are human-readable strings
50
  )
51
  print("NER pipeline loaded successfully.")
52
  except Exception as e:
53
  print(f"Failed to load NER pipeline: {e}")
54
  print("Please ensure the model ID is correct and accessible on Hugging Face Hub.")
55
- ner_pipeline = None # Set to None to handle errors gracefully in the predict function
 
56
 
57
  # --- Prediction Function for Gradio ---
58
  def predict_pii(text):
59
- """
60
- Predicts PII entities in the input text using the loaded NER pipeline.
61
- Returns a list of dictionaries suitable for Gradio's HighlightedText component.
62
- """
63
  if ner_pipeline is None:
64
  return {"text": text, "entities": []}, "Error: Model not loaded. Please check logs."
65
 
66
  results = ner_pipeline(text)
67
 
68
- # Format results for Gradio's HighlightedText component
69
- # HighlightedText expects a dict with 'text' and 'entities'
70
- # Each entity is a dict with 'start', 'end', 'entity' (label), 'score' (optional)
71
  formatted_entities = []
72
  for entity in results:
73
- # Gradio's HighlightedText needs 'entity' key for the label
74
  formatted_entities.append({
75
  "start": entity['start'],
76
  "end": entity['end'],
77
- "entity": entity['entity_group'], # Use entity_group for aggregated label
78
  "score": entity['score']
79
  })
80
 
81
  return {"text": text, "entities": formatted_entities}, "PII detection complete."
82
 
83
  # --- Gradio Interface ---
84
- # Define example log entries
85
  examples = [
86
  "[2024-07-30 10:30:00] INFO: User John Doe from 192.168.1.10 accessed /data/reports on web-server-01. Email: john.doe@example.com, Phone: +972-50-1234567. ID: 123456789.",
87
  "[2024-07-30 11:15:20] ERROR: Failed authentication for username admin with password 'MyS3cr3tP@ssw0rd!' from IP 203.0.113.45. API Key: sk-xyz123abc.",
@@ -90,7 +76,6 @@ examples = [
90
  "[2024-07-30 15:00:00] INFO: 讘拽砖讛 诪-172.16.0.10 诇-api.mycompany.co.il. 住讟讟讜住: 200 OK. 砖诐 诪砖转诪砖: upwwe13.",
91
  ]
92
 
93
- # Create the Gradio interface
94
  iface = gr.Interface(
95
  fn=predict_pii,
96
  inputs=gr.Textbox(lines=5, placeholder="讛讻谞住 讻讗谉 砖讜专转 诇讜讙 诇讘讚讬拽转 PII...", label="砖讜专转 诇讜讙"),
@@ -101,8 +86,7 @@ iface = gr.Interface(
101
  title="诪讜讚诇 讝讬讛讜讬 PII 讘拽讘爪讬 诇讜讙 (注讘专讬转 讜讗谞讙诇讬转)",
102
  description="讛讻谞住 砖讜专转 诇讜讙 讻讚讬 诇讝讛讜转 诪讬讚注 诪讝讛讛 讗讬砖讬转 (PII) 讻讙讜谉 讻转讜讘讜转 IP, 砖诪讜转 诪砖转诪砖, 住讬住诪讗讜转, 诪讬讬诇讬诐, 诪住驻专讬 讟诇驻讜谉, 驻专讟讬 讞砖讘讜谉 讘谞拽 讜注讜讚.",
103
  examples=examples,
104
- allow_flagging="never" # Disable flagging for this demo
105
  )
106
 
107
- # Launch the Gradio app
108
  iface.launch()
 
4
  import os
5
 
6
  # Define your model ID on Hugging Face Hub
7
+ # Make sure to replace 'doronpe12/log-anonymizer-heb-support' with your exact model repository name.
 
8
  MODEL_ID = "doronpe12/log-anonymizer-heb-support"
9
 
10
  # Path to the ner_labels.json file.
11
+ # This file is still required for the app's internal logic, even if not needed by the pipeline directly.
12
  LABELS_FILE = "ner_labels.json"
13
 
14
  # --- Load Label Mappings ---
15
  # This part ensures the Gradio app knows how to interpret the NER tags.
16
+ # We still need this file for the app's logic, but the pipeline itself will get
17
+ # the labels from the model's config.json.
18
  try:
 
 
19
  with open(LABELS_FILE, 'r', encoding='utf-8') as f:
20
  label_mappings = json.load(f)
21
  id2label = {int(k): v for k, v in label_mappings["id2label"].items()}
 
23
  print(f"Loaded {len(id2label)} NER labels from {LABELS_FILE}.")
24
  except FileNotFoundError:
25
  print(f"Error: {LABELS_FILE} not found. Please ensure it's uploaded to the Space.")
26
+ # Fallback labels (should not be reached if the file is present)
27
+ id2label = {0: "O", 1: "B-PII", 2: "I-PII"}
28
+ label2id = {v: k for k, v in id2label.items()}
 
29
  except Exception as e:
30
  print(f"Error loading {LABELS_FILE}: {e}")
31
+ id2label = {0: "O", 1: "B-PII", 2: "I-PII"}
32
+ label2id = {v: k for k, v in id2label.items()}
 
33
 
34
 
35
  # --- Load the Model and Tokenizer ---
 
 
36
  print(f"Loading NER pipeline for model: {MODEL_ID}...")
37
  try:
 
38
  ner_pipeline = pipeline(
39
  "ner",
40
  model=MODEL_ID,
41
  tokenizer=MODEL_ID,
42
+ aggregation_strategy="simple",
43
+ # <<< 讛讘注讬讛 谞驻转专讛 讻讗谉: 讛住专谞讜 讗转 讛讗专讙讜诪谞讟 id2label
44
  )
45
  print("NER pipeline loaded successfully.")
46
  except Exception as e:
47
  print(f"Failed to load NER pipeline: {e}")
48
  print("Please ensure the model ID is correct and accessible on Hugging Face Hub.")
49
+ print("If your model is private, ensure your Space is also private.")
50
+ ner_pipeline = None
51
 
52
  # --- Prediction Function for Gradio ---
53
  def predict_pii(text):
 
 
 
 
54
  if ner_pipeline is None:
55
  return {"text": text, "entities": []}, "Error: Model not loaded. Please check logs."
56
 
57
  results = ner_pipeline(text)
58
 
 
 
 
59
  formatted_entities = []
60
  for entity in results:
 
61
  formatted_entities.append({
62
  "start": entity['start'],
63
  "end": entity['end'],
64
+ "entity": entity['entity_group'],
65
  "score": entity['score']
66
  })
67
 
68
  return {"text": text, "entities": formatted_entities}, "PII detection complete."
69
 
70
  # --- Gradio Interface ---
 
71
  examples = [
72
  "[2024-07-30 10:30:00] INFO: User John Doe from 192.168.1.10 accessed /data/reports on web-server-01. Email: john.doe@example.com, Phone: +972-50-1234567. ID: 123456789.",
73
  "[2024-07-30 11:15:20] ERROR: Failed authentication for username admin with password 'MyS3cr3tP@ssw0rd!' from IP 203.0.113.45. API Key: sk-xyz123abc.",
 
76
  "[2024-07-30 15:00:00] INFO: 讘拽砖讛 诪-172.16.0.10 诇-api.mycompany.co.il. 住讟讟讜住: 200 OK. 砖诐 诪砖转诪砖: upwwe13.",
77
  ]
78
 
 
79
  iface = gr.Interface(
80
  fn=predict_pii,
81
  inputs=gr.Textbox(lines=5, placeholder="讛讻谞住 讻讗谉 砖讜专转 诇讜讙 诇讘讚讬拽转 PII...", label="砖讜专转 诇讜讙"),
 
86
  title="诪讜讚诇 讝讬讛讜讬 PII 讘拽讘爪讬 诇讜讙 (注讘专讬转 讜讗谞讙诇讬转)",
87
  description="讛讻谞住 砖讜专转 诇讜讙 讻讚讬 诇讝讛讜转 诪讬讚注 诪讝讛讛 讗讬砖讬转 (PII) 讻讙讜谉 讻转讜讘讜转 IP, 砖诪讜转 诪砖转诪砖, 住讬住诪讗讜转, 诪讬讬诇讬诐, 诪住驻专讬 讟诇驻讜谉, 驻专讟讬 讞砖讘讜谉 讘谞拽 讜注讜讚.",
88
  examples=examples,
89
+ flagging_mode="never" # <<< 转讜拽谞讛 讛讗讝讛专讛 砖诇 Gradio
90
  )
91
 
 
92
  iface.launch()