Spaces:

gregorkrzmanc
/

jetclustering

Sleeping

jetclustering / scripts /plot_mass_resolution.py

e75a247 7 months ago

5.67 kB

	import os
	from tqdm import tqdm
	import argparse
	import pickle
	from src.plotting.eval_matrix import matrix_plot, scatter_plot
	from src.utils.paths import get_path
	import matplotlib.pyplot as plt
	import numpy as np


	parser = argparse.ArgumentParser()
	parser.add_argument("--input", type=str, required=False, default="scouting_PFNano_signals2/SVJ_hadronic_std/all_models_eval_FT_R")

	args = parser.parse_args()
	path = get_path(args.input, "results")

	models = sorted([x for x in os.listdir(path) if not os.path.isfile(os.path.join(path, x))])

	radius = {
	"LGATr_R10": 1.0,
	"LGATr_R09": 0.9,
	"LGATr_rinv_03_m_900": 0.8,
	"LGATr_R08": 0.8,
	"LGATr_R06": 0.6,
	"LGATr_R07": 0.7,
	"LGATr_R11": 1.1,
	"LGATr_R12": 1.2,
	"LGATr_R13": 1.3,
	"LGATr_R14": 1.4,
	"LGATr_R20": 2.0,
	"LGATr_R25": 2.5

	}

	out_file = {}

	sz = 5
	fig, ax = plt.subplots(len(models), 2, figsize=(sz * 2, sz/2 * len(models)))

	bins = np.linspace(0, 2, 100)
	for i, model in tqdm(enumerate(models)):
	output_path = os.path.join(path, model, "count_matched_quarks")
	f = os.path.join(output_path, "result_m.pkl")
	if not os.path.isfile(f):
	continue
	result = pickle.load(open(f, "rb"))
	f1 = os.path.join(output_path, "result_PR.pkl")
	r = result[900][20][0.3]
	ax[i, 0].hist(r["m_pred"] / r["m_true"], bins=bins, histtype="step", label="all")
	ax[i, 1].hist(r["mt_pred"] / r["mt_true"], bins=bins, histtype="step", label="all")
	if "n_jets" in r:
	m_pred_over_true = r["m_pred"] / r["m_true"]
	mt_pred_over_true = r["mt_pred"] / r["mt_true"]
	ax[i, 0].hist(m_pred_over_true[r["n_jets"] == 0], bins=bins, histtype="step", label="0 jets")
	ax[i, 1].hist(mt_pred_over_true[r["n_jets"] == 0], bins=bins, histtype="step", label="0 jets")
	ax[i, 0].hist(m_pred_over_true[r["n_jets"] == 1], bins=bins, histtype="step", label="1 jet")
	ax[i, 1].hist(mt_pred_over_true[r["n_jets"] == 1], bins=bins, histtype="step", label="1 jet")
	# 2+ jets
	ax[i, 0].hist(m_pred_over_true[r["n_jets"] > 1], bins=bins, histtype="step", label="2+ jets")
	ax[i, 1].hist(mt_pred_over_true[r["n_jets"] > 1], bins=bins, histtype="step", label="2+ jets")
	ax[i, 0].legend()
	ax[i, 1].legend()
	ax[i, 0].set_title(model)
	ax[i, 1].set_title(model)
	ax[i, 0].set_xlabel("m_pred / m_true")
	ax[i, 1].set_xlabel("mt_pred / mt_true")
	ax[i, 0].set_yscale("log")
	ax[i, 1].set_yscale("log")
	fig.tight_layout()
	fig.savefig(os.path.join(path, "mass_histograms.pdf"))



	#######

	sz = 5
	r_invs = {"03": 0.3, "07": 0.7, "05": 0.5}
	c = {}
	for r_inv in r_invs:
	fig, ax = plt.subplots(len(result), 2, figsize=(sz * 2, sz/2 * len(models)))
	bins = np.linspace(0, 2, 100)
	for i, mmed in tqdm(enumerate(sorted(result.keys()))):
	for j, model in enumerate(models):
	output_path = os.path.join(path, model, "count_matched_quarks")
	f = os.path.join(output_path, "result_m.pkl")
	if not os.path.isfile(f):
	continue
	if f not in c:
	c[f] = pickle.load(open(f, "rb"))
	result = c[f]
	r = result[mmed][20][r_invs[r_inv]]
	ax[i, 0].hist(r["m_pred"] / r["m_true"], bins=bins, histtype="step", label=model)
	ax[i, 1].hist(r["mt_pred"] / r["mt_true"], bins=bins, histtype="step", label=model)
	ax[i, 0].set_title("m_med = " + str(mmed))
	ax[i, 1].set_title("m_med = " + str(mmed))
	ax[i, 0].set_xlabel("m_pred / m_true")
	ax[i, 1].set_xlabel("mt_pred / mt_true")
	ax[i, 0].set_yscale("log")
	ax[i, 1].set_yscale("log")
	ax[i, 0].legend()
	ax[i, 1].legend()
	fig.tight_layout()
	fig.savefig(os.path.join(path, "mass_histograms_model_comparison_{}.pdf".format(r_inv)))

	##########
	blues = plt.get_cmap("Blues")
	def get_color(model):
	if model == "AK8":
	return "gray"
	if model == "AK8_GenJets":
	return "black"
	# else, get the radius
	R = radius[model]
	# normalize R between 0 and 1 (originally between 0.3 and 1.4)
	r = (R - 0.3) / (2.5 - 0.3)
	return blues(r)


	sz = 5
	r_invs = {"03": 0.3, "07": 0.7, "05": 0.5}
	c = {}
	for r_inv in r_invs:
	fig, ax = plt.subplots(len(result), 2, figsize=(sz * 2, sz/2 * len(models)))
	bins = np.linspace(0, 2, 100)
	bins2 = np.linspace(0, 2, 50)

	for i, mmed in tqdm(enumerate(sorted(result.keys()))):
	for j, model in enumerate(models):
	output_path = os.path.join(path, model, "count_matched_quarks")
	f = os.path.join(output_path, "result_m.pkl")
	if not os.path.isfile(f):
	continue
	if f not in c:
	c[f] = pickle.load(open(f, "rb"))
	result = c[f]
	r = result[mmed][20][r_invs[r_inv]]
	if "n_jets" in r and (model in radius or model in ["AK8", "AK8_GenJets"]):
	ax[i, 0].hist((r["m_pred"] / r["m_true"])[r["n_jets"] == 2], bins=bins2, histtype="step", label=model, color=get_color(model))
	ax[i, 1].hist((r["mt_pred"] / r["mt_true"])[r["n_jets"] == 2], bins=bins2, histtype="step", label=model, color=get_color(model))
	ax[i, 0].set_title("m_med = " + str(mmed))
	ax[i, 1].set_title("m_med = " + str(mmed))
	ax[i, 0].set_xlabel("m_pred / m_true")
	ax[i, 1].set_xlabel("mt_pred / mt_true")
	ax[i, 0].set_yscale("log")
	ax[i, 1].set_yscale("log")
	ax[i, 0].legend()
	ax[i, 1].legend()
	fig.tight_layout()
	fig.savefig(os.path.join(path, "mass_histograms_model_comparison_2jets_{}.pdf".format(r_inv)))