Spaces:
Sleeping
Sleeping
| from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, hamming_loss | |
| import numpy as np | |
| from sklearn.preprocessing import MultiLabelBinarizer | |
| def find_top_n(embeddings,n,index,data): | |
| if len(embeddings.shape) == 1: | |
| embeddings = embeddings.reshape(1, -1) | |
| top_ids_and_scores = index.search_knn(embeddings, n) | |
| data_ans=[] | |
| for i, (ids, scores) in enumerate(top_ids_and_scores): | |
| data_now=[] | |
| for id in ids: | |
| data_now.append((data[0][int(id)],data[1][int(id)],data[2][int(id)])) | |
| data_ans.append(data_now) | |
| return data_ans | |
| def print_line(class_name, metrics, is_header=False): | |
| if is_header: | |
| line = f"| {'Class':<10} | " + " | ".join([f"{metric:<10}" for metric in metrics]) | |
| else: | |
| line = f"| {class_name:<10} | " + " | ".join([f"{metrics[metric]:<10.3f}" for metric in metrics]) | |
| print(line) | |
| if is_header: | |
| print('-' * len(line)) | |
| def calculate_per_class_metrics(classes, ground_truth, predictions): | |
| # Convert ground truth and predictions to numeric format | |
| gt_numeric = np.array([int(gt) for gt in ground_truth]) | |
| pred_numeric = np.array([int(pred) for pred in predictions]) | |
| results = {} | |
| for i, class_name in enumerate(classes): | |
| # For each class, calculate the 'vs rest' binary labels | |
| gt_binary = (gt_numeric == i).astype(int) | |
| pred_binary = (pred_numeric == i).astype(int) | |
| # Calculate metrics, handling cases where a class is not present in predictions or ground truth | |
| precision = precision_score(gt_binary, pred_binary, zero_division=0) | |
| recall = recall_score(gt_binary, pred_binary, zero_division=0) | |
| f1 = f1_score(gt_binary, pred_binary, zero_division=0) | |
| acc = np.mean(gt_binary == pred_binary) | |
| # Calculate recall for all other classes as 'rest' | |
| rest_recall = recall_score(1 - gt_binary, 1 - pred_binary, zero_division=0) | |
| results[class_name] = { | |
| 'Precision': precision, | |
| 'Recall': recall, | |
| 'F1 Score': f1, | |
| 'Accuracy': acc, | |
| 'Avg Recall (with rest)': (recall + rest_recall) / 2 | |
| } | |
| print_line("Metric", results[classes[0]], is_header=True) | |
| for class_name, metrics in results.items(): | |
| print_line(class_name, metrics) | |
| overall_metrics = {metric_name: np.mean([metrics[metric_name] for metrics in results.values()]) for metric_name in results[classes[0]].keys()} | |
| print_line("Overall", overall_metrics) | |
| def calculate_metrics(y_true, y_pred): | |
| accuracy = accuracy_score(y_true, y_pred) | |
| avg_f1 = f1_score(y_true, y_pred, average='macro') | |
| avg_recall = recall_score(y_true, y_pred, average='macro') | |
| return accuracy, avg_f1,avg_recall | |
| def compute_three_recalls(labels, preds): | |
| all_n, all_p, tn, tp = 0, 0, 0, 0 | |
| for label, pred in zip(labels, preds): | |
| if label == '0': | |
| all_p += 1 | |
| if label == '1': | |
| all_n += 1 | |
| if pred is not None and label == pred == '0': | |
| tp += 1 | |
| if pred is not None and label == pred == '1': | |
| tn += 1 | |
| if pred is None: | |
| continue | |
| machine_rec , human_rec= tp * 100 / all_p if all_p != 0 else 0, tn * 100 / all_n if all_n != 0 else 0 | |
| avg_rec = (human_rec + machine_rec) / 2 | |
| return (human_rec, machine_rec, avg_rec) | |
| def compute_metrics(labels, preds,ids=None, full_labels=False): | |
| if ids is not None: | |
| # unique ids | |
| dict_labels,dict_preds={},{} | |
| for i in range(len(ids)): | |
| dict_labels[ids[i]]=labels[i] | |
| dict_preds[ids[i]]=preds[i] | |
| labels=list(dict_labels.values()) | |
| preds=list(dict_preds.values()) | |
| if not full_labels: | |
| labels_map = {(1,0): 0, (0,10^3): 1, (1,1): 2} | |
| labels_bin = [labels_map[tup] for tup in labels] | |
| preds_bin = [labels_map[tup] for tup in preds] | |
| else: | |
| labels_map ={ | |
| (1, 0, 0): 0, # Human | |
| (0, 10^3, 1): 1, (0, 10^3, 2): 2, (0, 10^3, 3): 3, (0, 10^3, 4): 4, # AI | |
| (1, 1, 1): 5, (1, 1, 2): 6, (1, 1, 3): 7, (1, 1, 4): 8 # Human+AI | |
| } | |
| labels_bin = [labels_map[tup] for tup in labels] | |
| preds_bin = [labels_map[tup] for tup in preds] | |
| acc = accuracy_score(labels_bin, preds_bin) | |
| precision = precision_score(labels_bin, preds_bin, average="macro") | |
| recall = recall_score(labels_bin, preds_bin, average="macro") | |
| f1 = f1_score(labels_bin, preds_bin, average="macro") | |
| mse = mean_squared_error(labels_bin, preds_bin) | |
| mae = mean_absolute_error(labels_bin, preds_bin) | |
| return (acc, precision, recall, f1, mse, mae) | |
| def compute_metrics_train(labels, preds,ids=None): | |
| if ids is not None: | |
| # unique ids | |
| dict_labels,dict_preds={},{} | |
| for i in range(len(ids)): | |
| dict_labels[ids[i]]=labels[i] | |
| dict_preds[ids[i]]=preds[i] | |
| labels=list(dict_labels.values()) | |
| preds=list(dict_preds.values()) | |
| human_rec, machine_rec, avg_rec = compute_three_recalls(labels, preds) | |
| acc = accuracy_score(labels, preds) | |
| precision = precision_score(labels, preds, average="macro") | |
| recall = recall_score(labels, preds, average="macro") | |
| f1 = f1_score(labels, preds, average="macro") | |
| return (human_rec, machine_rec, avg_rec, acc, precision, recall, f1) | |