Upload folder using huggingface_hub

dae5c90 verified about 1 year ago

7.42 kB

	import csv
	import json
	import os
	import logging

	import numpy as np
	import pandas as pd
	from scipy.special import softmax
	from sklearn.metrics import (
	confusion_matrix,
	recall_score,
	f1_score,
	accuracy_score,
	balanced_accuracy_score,
	)
	from fairlearn.metrics import (
	count,
	false_positive_rate,
	false_negative_rate,
	selection_rate,
	demographic_parity_difference,
	equal_opportunity_difference,
	equalized_odds_difference,
	)
	from fairlearn.metrics import MetricFrame

	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s - %(levelname)s - %(message)s",
	handlers=[logging.StreamHandler()],
	)


	def logits_to_probs(logits, config):
	# Posterioir probabilities are calculated differently in some experiments

	if config.get("domain_independent_loss", False):
	per_group = np.split(logits, config["num_groups"], axis=1)
	marginalized = np.sum(per_group, axis=0)
	return softmax(marginalized, axis=1)

	if config.get("domain_discriminative_loss", False):
	# Prior shift inference, train distribution
	prior_shift_weight = np.array(
	[
	1088/1072, 1088/16, 17746/17515, 17746/231, 6454/6273, 6454/181, 850/834, 850/16
	]
	) / 100

	probs_yd = softmax(logits, axis=1) * prior_shift_weight
	per_group = np.split(probs_yd, config["num_groups"], axis=1)
	marginalized = np.sum(per_group, axis=0)

	# We shifted probs, apply softmax once more
	return softmax(marginalized, axis=1)

	return softmax(logits, axis=1)


	# Fairlearn docs
	def compute_error_metric(metric_value, sample_size):
	"""Compute standard error of a given metric based on the assumption of
	normal distribution.

	Parameters:
	metric_value: Value of the metric
	sample_size: Number of data points associated with the metric

	Returns:
	The standard error of the metric
	"""
	metric_value = metric_value / sample_size
	return 1.96 * np.sqrt(metric_value * (1.0 - metric_value)) / np.sqrt(sample_size)


	def false_positive_error(y_true, y_pred):
	"""Compute the standard error for the false positive rate estimate."""
	tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
	return compute_error_metric(fp, tn + fp)


	def false_negative_error(y_true, y_pred):
	"""Compute the standard error for the false negative rate estimate."""
	tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
	return compute_error_metric(fn, fn + tp)


	def balanced_accuracy_error(y_true, y_pred):
	"""Compute the standard error for the balanced accuracy estimate."""
	fpr_error, fnr_error = false_positive_error(y_true, y_pred), false_negative_error(
	y_true, y_pred
	)
	return np.sqrt(fnr_error2 + fpr_error2) / 2


	if __name__ == "__main__":
	root_dir = "C:\\Users\\Duje\\Desktop\\fer\\8. semestar\\lumen\\rezultati\\02 eksperimenti\\"
	common_csv = "rezultati.csv"
	disagg_csv = "disaggregated.csv"
	experiments = [
	"01 baseline 0304",
	"02 recall ce 0304",
	"04 cielab re based",
	"05 cielab ohem",
	"08 optim params large",
	"10 transformer\\normal",
	"11 transformer ohem",
	"12 domain discriminative\\new",
	"13 oversampler",
	"15 focal loss\\new",
	"14 domain independent\\new",
	"16 efficient m\\new",
	"17 masked\\new",
	"18 efficient l\\new",
	"19 oversampler trio\\1 base",
	"19 oversampler trio\\2 ifw, recall_ce",
	"19 oversampler trio\\3 ifw, ohem",
	"20 dino\\new",
	"21 dino oversample",
	"22 dino undersample",
	"23 long train 04"
	"24 dd transformer"
	]

	logging.info(f"Collecting metrics for {len(experiments)} experiments")
	for exp in experiments:

	eval_dir = os.path.join(root_dir, exp, "eval")
	chkpt = next(os.walk(eval_dir))[1][0] # checkpoint folder
	logging.info(f"Evaluating checkpoint {chkpt} for experiment {exp}")

	with open(os.path.join(root_dir, exp, "config.json")) as f:
	config = json.load(f)

	y_true = np.load(os.path.join(eval_dir, chkpt, "y_true.npy"))
	logits = np.load(os.path.join(eval_dir, chkpt, "logits.npy"))
	groups = np.load(os.path.join(eval_dir, chkpt, "groups.npy"))
	y_prob = logits_to_probs(logits, config)
	y_pred = np.argmax(y_prob, axis=1)

	prob_path = os.path.join(eval_dir, chkpt, "probs.npy")
	np.save(prob_path, y_prob)
	logging.info(f"Saved posteriror probabilities to {prob_path}")

	metrics = dict(
	count=count,
	f1=f1_score,
	recall=recall_score,
	accuracy=accuracy_score,
	selection_rate=selection_rate,
	balanced_accuracy=balanced_accuracy_score,
	balanced_acc_error=balanced_accuracy_error,
	false_positive_rate=false_positive_rate,
	false_positive_error=false_positive_error,
	false_negative_rate=false_negative_rate,
	false_negative_error=false_negative_error,
	)
	mf = MetricFrame(
	metrics=metrics,
	y_true=y_true,
	y_pred=y_pred,
	sensitive_features=groups,
	)

	dpd = demographic_parity_difference(
	y_true, y_pred, sensitive_features=groups
	).item()
	eq_odds = equalized_odds_difference(y_true, y_pred, sensitive_features=groups)
	eq_opp = equal_opportunity_difference(
	y_true, y_pred, sensitive_features=groups
	).item()

	diffs = mf.difference()[
	[
	"f1",
	"recall",
	"accuracy",
	"balanced_accuracy",
	"false_positive_rate",
	"false_negative_rate",
	]
	]
	diffs = diffs.rename(
	dict(
	f1="f1_diff",
	recall="recall_diff",
	accuracy="accuracy_diff",
	balanced_accuracy="balanced_acc_diff",
	false_positive_rate="fpr_diff",
	false_negative_rate="fnr_diff",
	)
	)

	# One row for each experiment
	fair = pd.Series([dpd, eq_odds, eq_opp], index=["dpd", "eq_odds", "eq_opp"])
	fair = fair.add(diffs, fill_value=0)
	result = mf.overall.add(fair, fill_value=0)

	header = ["experiment"] + result.keys().to_list()
	if not os.path.isfile(common_csv):
	with open(common_csv, "a", newline="") as f:
	writer = csv.writer(f)
	writer.writerow(header)

	row = [exp] + result.to_list()
	with open(common_csv, "a", newline="") as f:
	writer = csv.writer(f)
	writer.writerow(row)
	logging.info(f"Added row to {common_csv}")

	# Disaggregated metrics
	group = mf.by_group
	group = group.rename(columns={"sensitive_feature_0": "group"})
	group.to_csv(os.path.join(eval_dir, chkpt, disagg_csv))
	logging.info(f"Saved disaggregated metrics to {disagg_csv}")

	logging.info("Done")