Spaces:

atomind
/

mlip-arena

Running

github-actions[ci]

Clean sync from main branch - 2025-10-16 23:00:12

afe68b4 2 months ago

5.39 kB

	from itertools import product
	from pathlib import Path

	import numpy as np
	import pandas as pd
	from dask.distributed import Client
	from dask_jobqueue import SLURMCluster
	from mlip_arena.models import MLIPEnum
	from mlip_arena.tasks import ELASTICITY, OPT, PHONON
	from mlip_arena.tasks.optimize import run as OPT
	from mlip_arena.tasks.utils import get_calculator
	from numpy import linalg as LA
	from prefect import flow, task
	from prefect_dask import DaskTaskRunner
	from tqdm.auto import tqdm

	from ase.db import connect

	select_models = [
	"ALIGNN",
	"CHGNet",
	"M3GNet",
	"MACE-MP(M)",
	"MACE-MPA",
	"MatterSim",
	"ORBv2",
	"SevenNet",
	]


	def elastic_tensor_to_voigt(C):
	"""
	Convert a rank-4 (3x3x3x3) elastic tensor into a rank-2 (6x6) tensor using Voigt notation.

	Parameters:
	C (numpy.ndarray): A 3x3x3x3 elastic tensor.

	Returns:
	numpy.ndarray: A 6x6 elastic tensor in Voigt notation.
	"""
	# voigt_map = {
	# (0, 0): 0, (1, 1): 1, (2, 2): 2, # Normal components
	# (1, 2): 3, (2, 1): 3, # Shear components
	# (0, 2): 4, (2, 0): 4,
	# (0, 1): 5, (1, 0): 5
	# }
	voigt_map = {
	(0, 0): 0,
	(1, 1): 1,
	(2, 2): -1, # Normal components
	(1, 2): -1,
	(2, 1): -1, # Shear components
	(0, 2): -1,
	(2, 0): -1,
	(0, 1): 2,
	(1, 0): 2,
	}

	C_voigt = np.zeros((3, 3))

	for i in range(3):
	for j in range(3):
	for k in range(3):
	for l in range(3):
	alpha = voigt_map[(i, j)]
	beta = voigt_map[(k, l)]

	if alpha == -1 or beta == -1:
	continue

	factor = 1
	# if alpha in [3, 4, 5]:
	if alpha == 2:
	factor = factor * (2**0.5)
	if beta == 2:
	factor = factor * (2**0.5)

	C_voigt[alpha, beta] = C[i, j, k, l] * factor

	return C_voigt


	# -


	@task
	def run_one(model, row):
	if Path(f"{model.name}.pkl").exists():
	df = pd.read_pickle(f"{model.name}.pkl")

	# if row.key_value_pairs.get('uid', None) in df['uid'].unique():
	# pass
	else:
	df = pd.DataFrame(columns=["model", "uid", "eigenvalues", "frequencies"])

	atoms = row.toatoms()
	# print(data := row.key_value_pairs)

	calc = get_calculator(model)

	result_opt = OPT(
	atoms,
	calc,
	optimizer="FIRE",
	criterion=dict(fmax=0.05, steps=500),
	symmetry=True,
	)

	atoms = result_opt["atoms"]

	result_elastic = ELASTICITY(
	atoms,
	calc,
	optimizer="FIRE",
	criterion=dict(fmax=0.05, steps=500),
	pre_relax=False,
	)

	elastic_tensor = elastic_tensor_to_voigt(result_elastic["elastic_tensor"])
	eigenvalues, eigenvectors = LA.eig(elastic_tensor)

	outdir = Path(f"{model.name}") / row.key_value_pairs.get(
	"uid", atoms.get_chemical_formula()
	)
	outdir.mkdir(parents=True, exist_ok=True)

	np.savez(outdir / "elastic.npz", tensor=elastic_tensor, eigenvalues=eigenvalues)

	result_phonon = PHONON(
	atoms,
	calc,
	supercell_matrix=(2, 2, 1),
	outdir=outdir,
	)

	frequencies = result_phonon["phonon"].get_frequencies(q=(0, 0, 0))

	new_row = pd.DataFrame(
	[
	{
	"model": model.name,
	"uid": row.key_value_pairs.get("uid", None),
	"eigenvalues": eigenvalues,
	"frequencies": frequencies,
	}
	]
	)

	df = pd.concat([df, new_row], ignore_index=True)
	df.drop_duplicates(subset=["model", "uid"], keep="last", inplace=True)

	df.to_pickle(f"{model.name}.pkl")


	@flow
	def run_all():
	import random

	random.seed(0)

	futures = []
	with connect("c2db.db") as db:
	random_indices = random.sample(range(1, len(db) + 1), 1000)
	for row, model in tqdm(
	product(db.select(filter=lambda r: r["id"] in random_indices), MLIPEnum)
	):
	if model.name not in select_models:
	continue
	future = run_one.submit(model, row)
	futures.append(future)
	return [f.result(raise_on_failure=False) for f in futures]


	# +


	if __name__ == "__main__":
	nodes_per_alloc = 1
	gpus_per_alloc = 1
	ntasks = 1

	cluster_kwargs = dict(
	cores=1,
	memory="64 GB",
	processes=1,
	shebang="#!/bin/bash",
	account="matgen",
	walltime="00:30:00",
	# job_cpu=128,
	job_mem="0",
	job_script_prologue=[
	"source ~/.bashrc",
	"module load python",
	"source activate /pscratch/sd/c/cyrusyc/.conda/dev",
	],
	job_directives_skip=["-n", "--cpus-per-task", "-J"],
	job_extra_directives=[
	"-J c2db",
	"-q regular",
	f"-N {nodes_per_alloc}",
	"-C gpu",
	f"-G {gpus_per_alloc}",
	],
	)

	cluster = SLURMCluster(**cluster_kwargs)
	print(cluster.job_script())
	cluster.adapt(minimum_jobs=25, maximum_jobs=50)
	client = Client(cluster)
	# -

	run_all.with_options(
	task_runner=DaskTaskRunner(address=client.scheduler.address), log_prints=True
	)()