Spaces:
Build error
Build error
| import uuid | |
| def process_tokens(tokens, inverse_uuid_map=None, uuid_map=None, embeddings_model=None, fhe_ner_detection=None, client=None): | |
| """Processes tokens based on the provided parameters for either deanonymizing, anonymizing or default processing.""" | |
| processed_tokens = [] | |
| for token in tokens: | |
| if not token.strip() or not re.match(r"\w+", token): # Directly append non-word tokens or whitespace | |
| processed_tokens.append(token) | |
| continue | |
| if inverse_uuid_map is not None: # For deanonymizing response | |
| processed_tokens.append(inverse_uuid_map.get(token, token)) | |
| elif uuid_map is not None and embeddings_model is not None and fhe_ner_detection is not None and client is not None: # For FHEAnonymizer call | |
| x = embeddings_model.wv[token][None] | |
| prediction_proba = fhe_ner_detection.predict_proba(x) | |
| probability = prediction_proba[0][1] | |
| if probability >= 0.5: | |
| tmp_uuid = uuid_map.get(token, str(uuid.uuid4())[:8]) | |
| processed_tokens.append(tmp_uuid) | |
| uuid_map[token] = tmp_uuid | |
| else: | |
| processed_tokens.append(token) | |
| else: | |
| processed_tokens.append(token) | |
| return ''.join(processed_tokens) |