VGreatVig07 commited on
Commit
03c6783
·
verified ·
1 Parent(s): 4ee0a7a

Update utils/masker3.py

Browse files
Files changed (1) hide show
  1. utils/masker3.py +25 -3
utils/masker3.py CHANGED
@@ -1,6 +1,8 @@
1
  import re
2
  import spacy
3
  from typing import Dict, Any, List
 
 
4
 
5
  # Load spaCy model
6
  nlp = spacy.load("en_core_web_sm")
@@ -9,7 +11,14 @@ def mask_pii(text: str) -> Dict[str, Any]:
9
  """
10
  Enhanced PII masking with JSON output format
11
  """
12
- masked_text = text
 
 
 
 
 
 
 
13
  entities = []
14
 
15
  def mask_and_record(pattern, label, group=0):
@@ -60,13 +69,26 @@ def mask_pii(text: str) -> Dict[str, Any]:
60
  })
61
 
62
  # Optional: Set category based on simple rule or ML model
63
- category = "sensitive_information"
 
64
 
 
 
 
 
 
 
 
 
 
 
65
  return {
66
- "input_email_body": text,
67
  "list_of_masked_entities": sorted(entities, key=lambda x: x["position"][0]),
68
  "masked_email": masked_text,
69
  "category_of_the_email": category
70
  }
71
 
72
 
 
 
 
1
  import re
2
  import spacy
3
  from typing import Dict, Any, List
4
+ from langdetect import detect
5
+ from deep_translator import GoogleTranslator
6
 
7
  # Load spaCy model
8
  nlp = spacy.load("en_core_web_sm")
 
11
  """
12
  Enhanced PII masking with JSON output format
13
  """
14
+ lang = detect(text)
15
+ if lang == 'en':
16
+ #return text
17
+ masked_text = text
18
+ else:
19
+ # Translate to English
20
+ translated = GoogleTranslator(source=lang, target='en').translate(text)
21
+ masked_text = translated
22
  entities = []
23
 
24
  def mask_and_record(pattern, label, group=0):
 
69
  })
70
 
71
  # Optional: Set category based on simple rule or ML model
72
+ category = "Problem"
73
+
74
 
75
+ # if lang == 'en':
76
+ # masked_text = masked_text
77
+ # else:
78
+ # masked_text = GoogleTranslator(source='en', target=lang).translate(masked_text)
79
+ text2 = text
80
+ for ent in entities:
81
+ entity_value = ent['entity']
82
+ classification = ent['classification']
83
+ text = text.replace(entity_value, f"[{classification}]")
84
+ masked_text = text
85
  return {
86
+ "input_email_body": text2,
87
  "list_of_masked_entities": sorted(entities, key=lambda x: x["position"][0]),
88
  "masked_email": masked_text,
89
  "category_of_the_email": category
90
  }
91
 
92
 
93
+ text = "Subject: Unvorhergesehener Absturz der Datenanalyse-Plattform\n\nDie Datenanalyse-Plattform brach unerwartet ab, da die Speicheroberfläche zu gering war My name is Sophia Rossi.. Ich habe versucht, Laravel 8 und meinen MacBook Pro neu zu starten, aber das Problem behält sich bei. Ich benötige Ihre Unterstützung, um diesen Fehler zu beheben. You can reach me at janesmith@company.com."
94
+ print(mask_pii(text))