GNN4Colliders / root_gnn_dgl /scripts /inference.py

fixing inference

d129ca0 4 days ago

11.6 kB

	import sys
	file_path = "/global/cfs/projectdirs/atlas/joshua/root_gnn/root_gnn_dgl"
	sys.path.append(file_path)
	import os
	import argparse
	import yaml
	import gc

	import torch
	import dgl
	from dgl.data import DGLDataset
	from dgl.dataloading import GraphDataLoader
	from torch.utils.data import SubsetRandomSampler, SequentialSampler

	class CustomPreBatchedDataset(DGLDataset):
	def __init__(self, start_dataset, batch_size, chunkno=0, chunks=1, mask_fn=None, drop_last=False, shuffle=False, **kwargs):
	self.start_dataset = start_dataset
	self.batch_size = batch_size
	self.mask_fn = mask_fn or (lambda x: torch.ones(len(x), dtype=torch.bool))
	self.drop_last = drop_last
	self.shuffle = shuffle
	self.chunkno = chunkno
	self.chunks = chunks
	super().__init__(name=start_dataset.name + '_custom_prebatched', save_dir=start_dataset.save_dir)

	def process(self):
	mask = self.mask_fn(self.start_dataset)
	indices = torch.arange(len(self.start_dataset))[mask]
	print(f"Number of elements after masking: {len(indices)}") # Debugging print

	# --- CHUNK SPLITTING ---
	total = len(indices)
	if self.chunks == 1:
	chunk_indices = indices
	print(f"Chunks=1, using all {total} indices.")
	else:
	chunk_size = (total + self.chunks - 1) // self.chunks
	start = self.chunkno * chunk_size
	end = min((self.chunkno + 1) * chunk_size, total)
	chunk_indices = indices[start:end]
	print(f"Working on chunk {self.chunkno}/{self.chunks}: indices {start}:{end} (total {len(chunk_indices)})")

	if self.shuffle:
	sampler = SubsetRandomSampler(chunk_indices)
	else:
	sampler = SequentialSampler(chunk_indices)

	self.dataloader = GraphDataLoader(
	self.start_dataset,
	sampler=sampler,
	batch_size=self.batch_size,
	drop_last=self.drop_last
	)

	def __getitem__(self, idx):
	if isinstance(idx, int):
	idx = [idx]
	sampler = SequentialSampler(idx)
	dloader = GraphDataLoader(self.start_dataset, sampler=sampler, batch_size=self.batch_size, drop_last=False)
	return next(iter(dloader))

	def __len__(self):
	mask = self.mask_fn(self.start_dataset)
	indices = torch.arange(len(self.start_dataset))[mask]
	total = len(indices)
	if self.chunks == 1:
	return total
	chunk_size = (total + self.chunks - 1) // self.chunks
	start = self.chunkno * chunk_size
	end = min((self.chunkno + 1) * chunk_size, total)
	return end - start

	def include_config(conf):
	if 'include' in conf:
	for i in conf['include']:
	with open(i) as f:
	conf.update(yaml.load(f, Loader=yaml.FullLoader))
	del conf['include']

	def load_config(config_file):
	with open(config_file) as f:
	conf = yaml.load(f, Loader=yaml.FullLoader)
	include_config(conf)
	return conf

	def main():

	parser = argparse.ArgumentParser()
	add_arg = parser.add_argument
	add_arg('--config', type=str, nargs='+', required=True, help="List of config files")
	add_arg('--target', type=str, required=True)
	add_arg('--destination', type=str, default='')
	add_arg('--chunkno', type=int, default=0)
	add_arg('--chunks', type=int, default=1)
	add_arg('--write', action='store_true')
	add_arg('--ckpt', type=int, default=-1)
	add_arg('--var', type=str, default='Test_AUC')
	add_arg('--mode', type=str, default='max')
	add_arg('--clobber', action='store_true')
	add_arg('--tree', type=str, default='')
	add_arg('--branch_name', type=str, nargs='+', required=True, help="List of branch names corresponding to configs")
	args = parser.parse_args()

	if(len(args.config) != len(args.branch_name)):
	print(f"configs and branch names do not match")
	return

	config = load_config(args.config[0])

	# --- OUTPUT DESTINATION LOGIC ---
	if args.destination == '':
	base_dest = os.path.join(config['Training_Directory'], 'inference/', os.path.split(args.target)[1])
	else:
	base_dest = args.destination

	base_dest = base_dest.replace('.root', '').replace('.npz', '')
	if args.chunks > 1:
	chunked_dest = f"{base_dest}_chunk{args.chunkno}"
	else:
	chunked_dest = base_dest
	chunked_dest += '.root' if args.write else '.npz'
	args.destination = chunked_dest

	# --- FILE EXISTENCE CHECK ---
	if os.path.exists(args.destination):
	print(f'File {args.destination} already exists.')
	if args.clobber:
	print('Clobbering.')
	else:
	print('Exiting.')
	return
	else:
	print(f'Writing to {args.destination}')

	import time
	start = time.time()
	import ROOT
	import torch
	from array import array
	import numpy as np
	from root_gnn_base import batched_dataset as dataset
	from root_gnn_base import utils
	end = time.time()
	print('Imports finished in {:.2f} seconds'.format(end - start))

	start = time.time()
	dset_config = config['Datasets'][list(config['Datasets'].keys())[0]]
	if dset_config['class'] == 'LazyDataset':
	dset_config['class'] = 'EdgeDataset'
	elif dset_config['class'] == 'LazyMultiLabelDataset':
	dset_config['class'] = 'MultiLabelDataset'
	elif dset_config['class'] == 'PhotonIDDataset':
	dset_config['class'] = 'UnlazyPhotonIDDataset'
	elif dset_config['class'] == 'kNNDataset':
	dset_config['class'] = 'UnlazyKNNDataset'
	dset_config['args']['raw_dir'] = os.path.split(args.target)[0]
	dset_config['args']['file_names'] = os.path.split(args.target)[1]
	dset_config['args']['save'] = False
	dset_config['args']['chunks'] = args.chunks
	dset_config['args']['process_chunks'] = [args.chunkno,]
	dset_config['args']['selections'] = []

	dset_config['args']['save_dir'] = os.path.dirname(args.destination)

	if args.tree != '':
	dset_config['args']['tree_name'] = args.tree

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	dstart = time.time()
	dset = utils.buildFromConfig(dset_config)
	dend = time.time()
	print('Dataset finished in {:.2f} seconds'.format(dend - dstart))

	print(dset)

	batch_size = config['Training']['batch_size']
	lstart = time.time()
	loader = CustomPreBatchedDataset(
	dset,
	batch_size,
	chunkno=args.chunkno,
	chunks=args.chunks
	)
	loader.process()
	lend = time.time()
	print('Loader finished in {:.2f} seconds'.format(lend - lstart))
	sample_graph, _, _, global_sample = loader[0]
	global_sample = []

	print('dset length =', len(dset))
	print('loader length =', len(loader))

	all_scores = {}
	all_labels = {}
	all_tracking = {}
	with torch.no_grad():
	for config_file, branch in zip(args.config, args.branch_name):
	config = load_config(config_file)
	model = utils.buildFromConfig(config['Model'], {'sample_graph' : sample_graph, 'sample_global': global_sample}).to(device)

	if args.ckpt < 0:
	ep, checkpoint = utils.get_best_epoch(config, var=args.var, mode='max', device=device)
	else:
	ep, checkpoint = utils.get_specific_epoch(config, args.ckpt, device=device)
	# Remove distributed/compiled prefixes if present
	mds_copy = {}
	for key in checkpoint['model_state_dict'].keys():
	newkey = key.replace('module.', '')
	newkey = newkey.replace('_orig_mod.', '')
	mds_copy[newkey] = checkpoint['model_state_dict'][key]
	model.load_state_dict(mds_copy)
	model.eval()

	end = time.time()
	print('Model and dataset finished in {:.2f} seconds'.format(end - start))
	print('Starting inference')
	start = time.time()

	finish_fn = torch.nn.Sigmoid()
	if 'Loss' in config:
	finish_fn = utils.buildFromConfig(config['Loss']['finish'])

	scores = []
	labels = []
	tracking_info = []
	ibatch = 0

	for batch, label, track, globals in loader.dataloader:
	batch = batch.to(device)
	pred = model(batch, globals.to(device))
	ibatch += 1
	if (finish_fn.__class__.__name__ == "ContrastiveClusterFinish"):
	scores.append(pred.detach().cpu().numpy())
	else:
	scores.append(finish_fn(pred).detach().cpu().numpy())
	labels.append(label.detach().cpu().numpy())
	tracking_info.append(track.detach().cpu().numpy())

	score_size = scores[0].shape[1] if len(scores[0].shape) > 1 else 1
	scores = np.concatenate(scores)
	labels = np.concatenate(labels)
	tracking_info = np.concatenate(tracking_info)
	end = time.time()

	print('Inference finished in {:.2f} seconds'.format(end - start))
	all_scores[branch] = scores
	all_labels[branch] = labels
	all_tracking[branch] = tracking_info

	if args.write:
	from ROOT import std
	# Open the original ROOT file
	infile = ROOT.TFile.Open(args.target)
	tree = infile.Get(dset_config['args']['tree_name'])

	# Create the destination directory if it doesn't exist
	os.makedirs(os.path.split(args.destination)[0], exist_ok=True)

	# Create a new ROOT file to write the modified tree
	outfile = ROOT.TFile.Open(args.destination, 'RECREATE')

	# Clone the original tree structure
	outtree = tree.CloneTree(0)

	# Create branches for all scores
	branch_vectors = {}
	for branch, scores in all_scores.items():
	if isinstance(scores[0], (list, tuple, np.ndarray)) and len(scores[0]) > 1:
	# Create a new branch for vectors
	branch_vectors[branch] = std.vector('float')()
	outtree.Branch(branch, branch_vectors[branch])
	else:
	# Create a new branch for single floats
	branch_vectors[branch] = array('f', [0])
	outtree.Branch(branch, branch_vectors[branch], f'{branch}/F')

	# Fill the tree
	for i in range(tree.GetEntries()):
	tree.GetEntry(i)

	for branch, scores in all_scores.items():
	branch_data = branch_vectors[branch]
	if isinstance(branch_data, array): # Check if it's a single float array
	branch_data[0] = float(scores[i])
	else: # Assume it's a std::vector<float>
	branch_data.clear()
	for value in scores[i]:
	branch_data.push_back(float(value))

	outtree.Fill()

	# Write the modified tree to the new file
	print(f'Writing to file {args.destination}')
	print(f'Input entries: {tree.GetEntries()}, Output entries: {outtree.GetEntries()}')
	print(f'Wrote scores to {args.branch_name}')
	outtree.Write()
	outfile.Close()
	infile.Close()
	else:
	os.makedirs(os.path.split(args.destination)[0], exist_ok=True)
	np.savez(args.destination, scores=all_scores, labels=all_labels, tracking_info=all_tracking)

	if __name__ == '__main__':
	main()