Upload folder using huggingface_hub

714cf46 verified 21 days ago

5.42 kB

	import argparse
	import concurrent.futures
	import subprocess
	from pathlib import Path

	from tqdm import tqdm

	OST_COMPARE_STRUCTURE = r"""
	#!/bin/bash
	# https://openstructure.org/docs/2.7/actions/#ost-compare-structures

	IMAGE_NAME=openstructure-0.2.8

	command="compare-structures \
	-m {model_file} \
	-r {reference_file} \
	--fault-tolerant \
	--min-pep-length 4 \
	--min-nuc-length 4 \
	-o {output_path} \
	--lddt --bb-lddt --qs-score --dockq \
	--ics --ips --rigid-scores --patch-scores --tm-score"

	sudo docker run -u $(id -u):$(id -g) --rm --volume {mount}:{mount} $IMAGE_NAME $command
	"""


	OST_COMPARE_LIGAND = r"""
	#!/bin/bash
	# https://openstructure.org/docs/2.7/actions/#ost-compare-structures

	IMAGE_NAME=openstructure-0.2.8

	command="compare-ligand-structures \
	-m {model_file} \
	-r {reference_file} \
	--fault-tolerant \
	--lddt-pli --rmsd \
	--substructure-match \
	-o {output_path}"

	sudo docker run -u $(id -u):$(id -g) --rm --volume {mount}:{mount} $IMAGE_NAME $command
	"""


	def evaluate_structure(
	name: str,
	pred: Path,
	reference: Path,
	outdir: str,
	mount: str,
	executable: str = "/bin/bash",
	) -> None:
	"""Evaluate the structure."""
	# Evaluate polymer metrics
	out_path = Path(outdir) / f"{name}.json"

	if out_path.exists():
	print( # noqa: T201
	f"Skipping recomputation of {name} as protein json file already exists"
	)
	else:
	subprocess.run(
	OST_COMPARE_STRUCTURE.format(
	model_file=str(pred),
	reference_file=str(reference),
	output_path=str(out_path),
	mount=mount,
	),
	shell=True, # noqa: S602
	check=False,
	executable=executable,
	capture_output=True,
	)

	# Evaluate ligand metrics
	out_path = Path(outdir) / f"{name}_ligand.json"
	if out_path.exists():
	print(f"Skipping recomputation of {name} as ligand json file already exists") # noqa: T201
	else:
	subprocess.run(
	OST_COMPARE_LIGAND.format(
	model_file=str(pred),
	reference_file=str(reference),
	output_path=str(out_path),
	mount=mount,
	),
	shell=True, # noqa: S602
	check=False,
	executable=executable,
	capture_output=True,
	)


	def main(args):
	# Aggregate the predictions and references
	files = list(args.data.iterdir())
	names = {f.stem.lower(): f for f in files}

	# Create the output directory
	args.outdir.mkdir(parents=True, exist_ok=True)

	first_item = True
	with concurrent.futures.ThreadPoolExecutor(args.max_workers) as executor:
	futures = []
	for name, folder in names.items():
	for model_id in range(5):
	# Split the input data
	if args.format == "af3":
	pred_path = folder / f"seed-1_sample-{model_id}" / "model.cif"
	elif args.format == "chai":
	pred_path = folder / f"pred.model_idx_{model_id}.cif"
	elif args.format == "boltz":
	name_file = (
	f"{name[0].upper()}{name[1:]}"
	if args.testset == "casp"
	else name.lower()
	)
	pred_path = folder / f"{name_file}_model_{model_id}.cif"

	if args.testset == "casp":
	ref_path = args.pdb / f"{name[0].upper()}{name[1:]}.cif"
	elif args.testset == "test":
	ref_path = args.pdb / f"{name.lower()}.cif.gz"

	if first_item:
	# Evaluate the first item in the first prediction
	# Ensures that the docker image is downloaded
	evaluate_structure(
	name=f"{name}_model_{model_id}",
	pred=str(pred_path),
	reference=str(ref_path),
	outdir=str(args.outdir),
	mount=args.mount,
	executable=args.executable,
	)
	first_item = False
	else:
	future = executor.submit(
	evaluate_structure,
	name=f"{name}_model_{model_id}",
	pred=str(pred_path),
	reference=str(ref_path),
	outdir=str(args.outdir),
	mount=args.mount,
	executable=args.executable,
	)
	futures.append(future)

	# Wait for all tasks to complete
	with tqdm(total=len(futures)) as pbar:
	for _ in concurrent.futures.as_completed(futures):
	pbar.update(1)


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("data", type=Path)
	parser.add_argument("pdb", type=Path)
	parser.add_argument("outdir", type=Path)
	parser.add_argument("--format", type=str, default="af3")
	parser.add_argument("--testset", type=str, default="casp")
	parser.add_argument("--mount", type=str)
	parser.add_argument("--executable", type=str, default="/bin/bash")
	parser.add_argument("--max-workers", type=int, default=32)
	args = parser.parse_args()
	main(args)