Spaces:
No application file
No application file
| #!/usr/bin/env python3 | |
| """ | |
| Data Scientist.: Dr.Eddy Giusepe Chirinos Isidro | |
| Objetivo: Neste script utilizamos um modelo pré-treinado para extrair | |
| Entidades e usamos o pacote logging do python para registrar | |
| nossos LOGs. | |
| """ | |
| import logging | |
| from transformers import pipeline | |
| class EntityRecognizer: | |
| def __init__(self, model_name="Babelscape/wikineural-multilingual-ner"): # https://huggingface.co/Babelscape/wikineural-multilingual-ner | |
| self.model = self.load_model(model_name) | |
| self.logger = self.setup_logger() | |
| def load_model(self, model_name="Babelscape/wikineural-multilingual-ner"): | |
| # Carrego o modelo pré-treinado do Hugging Face: | |
| return pipeline("ner", model=model_name, tokenizer=model_name) | |
| def setup_logger(self): | |
| # Configuração de Logs: | |
| logger = logging.getLogger(__name__) | |
| logger.setLevel(logging.INFO) | |
| formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') | |
| file_handler = logging.FileHandler('reconhecimento_de_entidade.log') | |
| file_handler.setLevel(logging.INFO) | |
| file_handler.setFormatter(formatter) | |
| logger.addHandler(file_handler) | |
| return logger | |
| def recognize_entities(self, text): | |
| # Use o modelo NER pré-treinado para reconhecer entidades no texto: | |
| entities = self.model(text) | |
| recognized_entities = [] | |
| for entity in entities: | |
| entity_text = entity['word'] | |
| entity_type = entity['entity'] | |
| recognized_entities.append((entity_text, entity_type)) | |
| self.logger.info(f"Entidades reconhecidas: {recognized_entities}") | |
| return recognized_entities | |
| def process_classification_result(self, tokens_and_tags): | |
| result = {} | |
| current_type = None | |
| current_entity = "" | |
| for token, tag in tokens_and_tags: | |
| if tag.startswith("B-"): | |
| if current_type is not None and current_entity: | |
| result[current_entity] = current_type | |
| current_type = tag[2:] | |
| current_entity = token | |
| elif tag.startswith("I-"): | |
| current_entity += " " + token | |
| if current_type is not None and current_entity: | |
| result[current_entity] = current_type | |
| return result | |
| if __name__ == "__main__": | |
| # Exemplo de uso: | |
| #model_name = "Babelscape/wikineural-multilingual-ner" | |
| #text = "O Eddwin e a Karina foram para Estados Unidos a estudar em Harvard." | |
| text = "Eddy e Karina compraram uns tênis na loja Nike." | |
| entity_recognizer = EntityRecognizer() # entity_recognizer = EntityRecognizer(model_name) | |
| recognized = entity_recognizer.recognize_entities(text) | |
| print(recognized) | |
| print("🤗🤗🤗") | |
| result = entity_recognizer.process_classification_result(recognized) | |
| result = {k.replace(" ##", ""): v for k, v in result.items()} # Remove '##' from keys | |
| print(result) | |