| import csv
|
| import json
|
| import os
|
| import logging
|
|
|
| import numpy as np
|
| import pandas as pd
|
| from scipy.special import softmax
|
| from sklearn.metrics import (
|
| confusion_matrix,
|
| recall_score,
|
| f1_score,
|
| accuracy_score,
|
| balanced_accuracy_score,
|
| )
|
| from fairlearn.metrics import (
|
| count,
|
| false_positive_rate,
|
| false_negative_rate,
|
| selection_rate,
|
| demographic_parity_difference,
|
| equal_opportunity_difference,
|
| equalized_odds_difference,
|
| )
|
| from fairlearn.metrics import MetricFrame
|
|
|
| logging.basicConfig(
|
| level=logging.INFO,
|
| format="%(asctime)s - %(levelname)s - %(message)s",
|
| handlers=[logging.StreamHandler()],
|
| )
|
|
|
|
|
| def logits_to_probs(logits, config):
|
|
|
|
|
| if config.get("domain_independent_loss", False):
|
| per_group = np.split(logits, config["num_groups"], axis=1)
|
| marginalized = np.sum(per_group, axis=0)
|
| return softmax(marginalized, axis=1)
|
|
|
| if config.get("domain_discriminative_loss", False):
|
|
|
| prior_shift_weight = np.array(
|
| [
|
| 1088/1072, 1088/16, 17746/17515, 17746/231, 6454/6273, 6454/181, 850/834, 850/16
|
| ]
|
| ) / 100
|
|
|
| probs_yd = softmax(logits, axis=1) * prior_shift_weight
|
| per_group = np.split(probs_yd, config["num_groups"], axis=1)
|
| marginalized = np.sum(per_group, axis=0)
|
|
|
|
|
| return softmax(marginalized, axis=1)
|
|
|
| return softmax(logits, axis=1)
|
|
|
|
|
|
|
| def compute_error_metric(metric_value, sample_size):
|
| """Compute standard error of a given metric based on the assumption of
|
| normal distribution.
|
|
|
| Parameters:
|
| metric_value: Value of the metric
|
| sample_size: Number of data points associated with the metric
|
|
|
| Returns:
|
| The standard error of the metric
|
| """
|
| metric_value = metric_value / sample_size
|
| return 1.96 * np.sqrt(metric_value * (1.0 - metric_value)) / np.sqrt(sample_size)
|
|
|
|
|
| def false_positive_error(y_true, y_pred):
|
| """Compute the standard error for the false positive rate estimate."""
|
| tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
| return compute_error_metric(fp, tn + fp)
|
|
|
|
|
| def false_negative_error(y_true, y_pred):
|
| """Compute the standard error for the false negative rate estimate."""
|
| tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
| return compute_error_metric(fn, fn + tp)
|
|
|
|
|
| def balanced_accuracy_error(y_true, y_pred):
|
| """Compute the standard error for the balanced accuracy estimate."""
|
| fpr_error, fnr_error = false_positive_error(y_true, y_pred), false_negative_error(
|
| y_true, y_pred
|
| )
|
| return np.sqrt(fnr_error**2 + fpr_error**2) / 2
|
|
|
|
|
| if __name__ == "__main__":
|
| root_dir = "C:\\Users\\Duje\\Desktop\\fer\\8. semestar\\lumen\\rezultati\\02 eksperimenti\\"
|
| common_csv = "rezultati.csv"
|
| disagg_csv = "disaggregated.csv"
|
| experiments = [
|
| "01 baseline 0304",
|
| "02 recall ce 0304",
|
| "04 cielab re based",
|
| "05 cielab ohem",
|
| "08 optim params large",
|
| "10 transformer\\normal",
|
| "11 transformer ohem",
|
| "12 domain discriminative\\new",
|
| "13 oversampler",
|
| "15 focal loss\\new",
|
| "14 domain independent\\new",
|
| "16 efficient m\\new",
|
| "17 masked\\new",
|
| "18 efficient l\\new",
|
| "19 oversampler trio\\1 base",
|
| "19 oversampler trio\\2 ifw, recall_ce",
|
| "19 oversampler trio\\3 ifw, ohem",
|
| "20 dino\\new",
|
| "21 dino oversample",
|
| "22 dino undersample",
|
| "23 long train 04"
|
| "24 dd transformer"
|
| ]
|
|
|
| logging.info(f"Collecting metrics for {len(experiments)} experiments")
|
| for exp in experiments:
|
|
|
| eval_dir = os.path.join(root_dir, exp, "eval")
|
| chkpt = next(os.walk(eval_dir))[1][0]
|
| logging.info(f"Evaluating checkpoint {chkpt} for experiment {exp}")
|
|
|
| with open(os.path.join(root_dir, exp, "config.json")) as f:
|
| config = json.load(f)
|
|
|
| y_true = np.load(os.path.join(eval_dir, chkpt, "y_true.npy"))
|
| logits = np.load(os.path.join(eval_dir, chkpt, "logits.npy"))
|
| groups = np.load(os.path.join(eval_dir, chkpt, "groups.npy"))
|
| y_prob = logits_to_probs(logits, config)
|
| y_pred = np.argmax(y_prob, axis=1)
|
|
|
| prob_path = os.path.join(eval_dir, chkpt, "probs.npy")
|
| np.save(prob_path, y_prob)
|
| logging.info(f"Saved posteriror probabilities to {prob_path}")
|
|
|
| metrics = dict(
|
| count=count,
|
| f1=f1_score,
|
| recall=recall_score,
|
| accuracy=accuracy_score,
|
| selection_rate=selection_rate,
|
| balanced_accuracy=balanced_accuracy_score,
|
| balanced_acc_error=balanced_accuracy_error,
|
| false_positive_rate=false_positive_rate,
|
| false_positive_error=false_positive_error,
|
| false_negative_rate=false_negative_rate,
|
| false_negative_error=false_negative_error,
|
| )
|
| mf = MetricFrame(
|
| metrics=metrics,
|
| y_true=y_true,
|
| y_pred=y_pred,
|
| sensitive_features=groups,
|
| )
|
|
|
| dpd = demographic_parity_difference(
|
| y_true, y_pred, sensitive_features=groups
|
| ).item()
|
| eq_odds = equalized_odds_difference(y_true, y_pred, sensitive_features=groups)
|
| eq_opp = equal_opportunity_difference(
|
| y_true, y_pred, sensitive_features=groups
|
| ).item()
|
|
|
| diffs = mf.difference()[
|
| [
|
| "f1",
|
| "recall",
|
| "accuracy",
|
| "balanced_accuracy",
|
| "false_positive_rate",
|
| "false_negative_rate",
|
| ]
|
| ]
|
| diffs = diffs.rename(
|
| dict(
|
| f1="f1_diff",
|
| recall="recall_diff",
|
| accuracy="accuracy_diff",
|
| balanced_accuracy="balanced_acc_diff",
|
| false_positive_rate="fpr_diff",
|
| false_negative_rate="fnr_diff",
|
| )
|
| )
|
|
|
|
|
| fair = pd.Series([dpd, eq_odds, eq_opp], index=["dpd", "eq_odds", "eq_opp"])
|
| fair = fair.add(diffs, fill_value=0)
|
| result = mf.overall.add(fair, fill_value=0)
|
|
|
| header = ["experiment"] + result.keys().to_list()
|
| if not os.path.isfile(common_csv):
|
| with open(common_csv, "a", newline="") as f:
|
| writer = csv.writer(f)
|
| writer.writerow(header)
|
|
|
| row = [exp] + result.to_list()
|
| with open(common_csv, "a", newline="") as f:
|
| writer = csv.writer(f)
|
| writer.writerow(row)
|
| logging.info(f"Added row to {common_csv}")
|
|
|
|
|
| group = mf.by_group
|
| group = group.rename(columns={"sensitive_feature_0": "group"})
|
| group.to_csv(os.path.join(eval_dir, chkpt, disagg_csv))
|
| logging.info(f"Saved disaggregated metrics to {disagg_csv}")
|
|
|
| logging.info("Done")
|
|
|