| from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel | |
| from sklearn.ensemble import IsolationForest | |
| from tqdm import tqdm | |
| import torch | |
| import gradio as gr | |
| import numpy as np | |
| tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-base-portuguese-cased") | |
| model = AutoModel.from_pretrained("neuralmind/bert-base-portuguese-cased") | |
| model.eval() | |
| data = np.load("x_train.npy") | |
| iso_forest = IsolationForest(contamination=0.15, random_state=42) | |
| iso_forest.fit(data) | |
| def classify_email(text): | |
| with torch.no_grad(): | |
| inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=256) | |
| outputs = model(**inputs) | |
| cls_embedding = outputs.last_hidden_state[:, 0, :].cpu().numpy() | |
| pred = iso_forest.predict(cls_embedding)[0] | |
| return pred | |
| demo = gr.Interface(fn=classify_email, inputs="text", outputs="number") | |
| demo.launch() | |