Spaces:
Sleeping
Sleeping
File size: 5,421 Bytes
3fef185 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, hamming_loss
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
def find_top_n(embeddings,n,index,data):
if len(embeddings.shape) == 1:
embeddings = embeddings.reshape(1, -1)
top_ids_and_scores = index.search_knn(embeddings, n)
data_ans=[]
for i, (ids, scores) in enumerate(top_ids_and_scores):
data_now=[]
for id in ids:
data_now.append((data[0][int(id)],data[1][int(id)],data[2][int(id)]))
data_ans.append(data_now)
return data_ans
def print_line(class_name, metrics, is_header=False):
if is_header:
line = f"| {'Class':<10} | " + " | ".join([f"{metric:<10}" for metric in metrics])
else:
line = f"| {class_name:<10} | " + " | ".join([f"{metrics[metric]:<10.3f}" for metric in metrics])
print(line)
if is_header:
print('-' * len(line))
def calculate_per_class_metrics(classes, ground_truth, predictions):
# Convert ground truth and predictions to numeric format
gt_numeric = np.array([int(gt) for gt in ground_truth])
pred_numeric = np.array([int(pred) for pred in predictions])
results = {}
for i, class_name in enumerate(classes):
# For each class, calculate the 'vs rest' binary labels
gt_binary = (gt_numeric == i).astype(int)
pred_binary = (pred_numeric == i).astype(int)
# Calculate metrics, handling cases where a class is not present in predictions or ground truth
precision = precision_score(gt_binary, pred_binary, zero_division=0)
recall = recall_score(gt_binary, pred_binary, zero_division=0)
f1 = f1_score(gt_binary, pred_binary, zero_division=0)
acc = np.mean(gt_binary == pred_binary)
# Calculate recall for all other classes as 'rest'
rest_recall = recall_score(1 - gt_binary, 1 - pred_binary, zero_division=0)
results[class_name] = {
'Precision': precision,
'Recall': recall,
'F1 Score': f1,
'Accuracy': acc,
'Avg Recall (with rest)': (recall + rest_recall) / 2
}
print_line("Metric", results[classes[0]], is_header=True)
for class_name, metrics in results.items():
print_line(class_name, metrics)
overall_metrics = {metric_name: np.mean([metrics[metric_name] for metrics in results.values()]) for metric_name in results[classes[0]].keys()}
print_line("Overall", overall_metrics)
def calculate_metrics(y_true, y_pred):
accuracy = accuracy_score(y_true, y_pred)
avg_f1 = f1_score(y_true, y_pred, average='macro')
avg_recall = recall_score(y_true, y_pred, average='macro')
return accuracy, avg_f1,avg_recall
def compute_three_recalls(labels, preds):
all_n, all_p, tn, tp = 0, 0, 0, 0
for label, pred in zip(labels, preds):
if label == '0':
all_p += 1
if label == '1':
all_n += 1
if pred is not None and label == pred == '0':
tp += 1
if pred is not None and label == pred == '1':
tn += 1
if pred is None:
continue
machine_rec , human_rec= tp * 100 / all_p if all_p != 0 else 0, tn * 100 / all_n if all_n != 0 else 0
avg_rec = (human_rec + machine_rec) / 2
return (human_rec, machine_rec, avg_rec)
def compute_metrics(labels, preds,ids=None, full_labels=False):
if ids is not None:
# unique ids
dict_labels,dict_preds={},{}
for i in range(len(ids)):
dict_labels[ids[i]]=labels[i]
dict_preds[ids[i]]=preds[i]
labels=list(dict_labels.values())
preds=list(dict_preds.values())
if not full_labels:
labels_map = {(1,0): 0, (0,10^3): 1, (1,1): 2}
labels_bin = [labels_map[tup] for tup in labels]
preds_bin = [labels_map[tup] for tup in preds]
else:
labels_map ={
(1, 0, 0): 0, # Human
(0, 10^3, 1): 1, (0, 10^3, 2): 2, (0, 10^3, 3): 3, (0, 10^3, 4): 4, # AI
(1, 1, 1): 5, (1, 1, 2): 6, (1, 1, 3): 7, (1, 1, 4): 8 # Human+AI
}
labels_bin = [labels_map[tup] for tup in labels]
preds_bin = [labels_map[tup] for tup in preds]
acc = accuracy_score(labels_bin, preds_bin)
precision = precision_score(labels_bin, preds_bin, average="macro")
recall = recall_score(labels_bin, preds_bin, average="macro")
f1 = f1_score(labels_bin, preds_bin, average="macro")
mse = mean_squared_error(labels_bin, preds_bin)
mae = mean_absolute_error(labels_bin, preds_bin)
return (acc, precision, recall, f1, mse, mae)
def compute_metrics_train(labels, preds,ids=None):
if ids is not None:
# unique ids
dict_labels,dict_preds={},{}
for i in range(len(ids)):
dict_labels[ids[i]]=labels[i]
dict_preds[ids[i]]=preds[i]
labels=list(dict_labels.values())
preds=list(dict_preds.values())
human_rec, machine_rec, avg_rec = compute_three_recalls(labels, preds)
acc = accuracy_score(labels, preds)
precision = precision_score(labels, preds, average="macro")
recall = recall_score(labels, preds, average="macro")
f1 = f1_score(labels, preds, average="macro")
return (human_rec, machine_rec, avg_rec, acc, precision, recall, f1)
|