| import torch |
| import torch.nn as nn |
| from sklearn.metrics import accuracy_score, f1_score |
|
|
| from .metrics import class_wise_f1 |
|
|
|
|
| def run_knn( |
| eval_type, |
| train_embeddings, |
| train_labels, |
| test_embeddings, |
| test_labels, |
| num_classes, |
| is_multilabel, |
| device, |
| skip_idx=False, |
| return_class_wise=False, |
| ): |
| if not is_multilabel: |
| if eval_type == "KNN-5": |
| predictions = _run_knn_for_k( |
| train_embeddings=train_embeddings, |
| train_labels=train_labels, |
| test_embeddings=test_embeddings, |
| num_classes=num_classes, |
| k=5, |
| device=device, |
| skip_idx=skip_idx, |
| ) |
| elif eval_type == "KNN-20": |
| predictions = _run_knn_for_k( |
| train_embeddings=train_embeddings, |
| train_labels=train_labels, |
| test_embeddings=test_embeddings, |
| num_classes=num_classes, |
| k=20, |
| device=device, |
| skip_idx=skip_idx, |
| ) |
|
|
| if return_class_wise: |
| return class_wise_f1(y_true=test_labels, y_pred=predictions, num_classes=num_classes) |
| return accuracy_score(y_true=test_labels, y_pred=predictions) |
| else: |
| |
| |
| |
| assert num_classes == train_labels.shape[-1] |
| assert num_classes == test_labels.shape[-1] |
| predictions = [] |
| for class_idx in range(num_classes): |
| train_single_labels = train_labels[:, class_idx] |
|
|
| if eval_type == "KNN-5": |
| single_predictions = _run_knn_for_k( |
| train_embeddings=train_embeddings, |
| train_labels=train_single_labels, |
| test_embeddings=test_embeddings, |
| num_classes=2, |
| k=5, |
| device=device, |
| skip_idx=skip_idx, |
| ) |
| if eval_type == "KNN-20": |
| single_predictions = _run_knn_for_k( |
| train_embeddings=train_embeddings, |
| train_labels=train_single_labels, |
| test_embeddings=test_embeddings, |
| num_classes=2, |
| k=20, |
| device=device, |
| skip_idx=skip_idx, |
| ) |
| predictions.append(single_predictions) |
|
|
| predictions = torch.stack(predictions, dim=1) |
|
|
| if return_class_wise: |
| return [f1_score(test_labels[:, i], predictions[:, i]) for i in range(num_classes)] |
| else: |
| return f1_score(y_true=test_labels, y_pred=predictions, average="micro") |
|
|
|
|
| def _run_knn_for_k( |
| train_embeddings, train_labels, test_embeddings, num_classes, k, device, skip_idx |
| ): |
| train_embeddings = train_embeddings.to(device) |
| test_embeddings = test_embeddings.to(device) |
| train_labels = train_labels.to(device) |
| cos = nn.CosineSimilarity(dim=-1) |
| all_preds = [] |
| for idx in range(test_embeddings.shape[0]): |
| test_embedding = test_embeddings[idx].unsqueeze(dim=0).repeat(train_embeddings.shape[0], 1) |
| sims = cos(test_embedding, train_embeddings) |
| top_k = torch.topk(sims, k=k) |
| if skip_idx: |
| top_k_values = top_k.values[1:] |
| top_k_indices = top_k.indices[1:] |
| else: |
| top_k_values = top_k.values |
| top_k_indices = top_k.indices |
|
|
| fetched_labels = train_labels[top_k_indices] |
| fetched_onehots = nn.functional.one_hot(fetched_labels, num_classes=num_classes) |
| distances = top_k_values.clone().div_(0.07).exp_() |
| weighted_sum_onehots = (distances.unsqueeze(dim=1) * fetched_onehots).sum(dim=0) |
| prediction = torch.argmax(weighted_sum_onehots) |
| all_preds.append(prediction) |
|
|
| return torch.LongTensor(all_preds).cpu() |
|
|