qyle commited on
Commit
e9e1684
·
verified ·
1 Parent(s): e346a35

disabling language detection

Browse files
Files changed (1) hide show
  1. classes/prompt_sanitizer.py +13 -10
classes/prompt_sanitizer.py CHANGED
@@ -102,20 +102,23 @@ class PromptSanitizer:
102
  return text
103
 
104
  # 1. Automatic Language Detection
105
- try:
106
- detected_lang = detect(text)
107
- # Presidio needs to know if we are using 'en' or 'fr'
108
- # If it detects something else, we default to 'en'
109
- lang = detected_lang if detected_lang in ["en", "fr"] else "en"
110
- except Exception:
111
- lang = "en"
112
-
113
- # 1. Detect PII
 
 
 
114
  results = self.analyzer.analyze(
115
  text=text, entities=self.target_entities, language=lang
116
  )
117
 
118
- # 2. Redact PII
119
  anonymized_result = self.anonymizer.anonymize(
120
  text=text,
121
  analyzer_results=results, # pyright: ignore[reportArgumentType]
 
102
  return text
103
 
104
  # 1. Automatic Language Detection
105
+ # This step would add a lot of overhead to the LLM calls (almost 1s).
106
+ # try:
107
+ # detected_lang = detect(text)
108
+ # # Presidio needs to know if we are using 'en' or 'fr'
109
+ # # If it detects something else, we default to 'en'
110
+ # lang = detected_lang if detected_lang in ["en", "fr"] else "en"
111
+ # except Exception:
112
+ # lang = "en"
113
+
114
+ lang = "en"
115
+
116
+ # 2. Detect PII
117
  results = self.analyzer.analyze(
118
  text=text, entities=self.target_entities, language=lang
119
  )
120
 
121
+ # 3. Redact PII
122
  anonymized_result = self.anonymizer.anonymize(
123
  text=text,
124
  analyzer_results=results, # pyright: ignore[reportArgumentType]