Spaces:

AccelerationConsortium
/

crabnet-hyperparameter

Running

App Files Files Community

crabnet-hyperparameter / app.py

sgbaird

pydantic validation of x1, x2, ... syntax

f618644 about 2 years ago

raw

history blame

7.11 kB

	import numpy as np
	import gradio as gr
	import pandas as pd
	from sklearn.preprocessing import MinMaxScaler
	from surrogate import CrabNetSurrogateModel, PARAM_BOUNDS
	from pydantic import (
	BaseModel,
	ValidationError,
	ValidationInfo,
	field_validator,
	model_validator,
	)

	model = CrabNetSurrogateModel()

	# Define the input parameters
	example_parameterization = {
	"N": 3,
	"alpha": 0.5,
	"d_model": 512,
	"dim_feedforward": 2048,
	"dropout": 0.1,
	"emb_scaler": 0.5,
	"epochs_step": 10,
	"eps": 0.000001,
	"fudge": 0.02,
	"heads": 4,
	"k": 6,
	"lr": 0.001,
	"pe_resolution": 5000,
	"ple_resolution": 5000,
	"pos_scaler": 0.5,
	"weight_decay": 0,
	"batch_size": 32,
	"out_hidden4": 128,
	"betas1": 0.9,
	"betas2": 0.999,
	"bias": False,
	"criterion": "RobustL1",
	"elem_prop": "mat2vec",
	"train_frac": 0.5,
	}

	example_results = model.surrogate_evaluate([example_parameterization])
	example_result = example_results[0]

	scalers = {
	param_info["name"]: MinMaxScaler()
	for param_info in PARAM_BOUNDS
	if param_info["type"] == "range"
	}


	class BlindedParameterization(BaseModel):
	x1: float # int
	x2: float
	x3: float # int
	x4: float # int
	x5: float
	x6: float
	x7: float # int
	x8: float
	x9: float
	x10: float # int
	x11: float # int
	x12: float
	x13: float # int
	x14: float # int
	x15: float
	x16: float # int
	x17: float # int
	x18: float # int
	x19: float
	x20: float
	c1: bool
	c2: str
	c3: str
	f1: float

	@field_validator("*")
	def check_bounds(cls, v: int, info: ValidationInfo) -> int:
	param = next(
	(item for item in PARAM_BOUNDS if item["name"] == info.field_name),
	None,
	)
	if param is None:
	return v

	if param["type"] == "range":
	min_val, max_val = param["bounds"]
	if not min_val <= v <= max_val:
	raise ValueError(
	f"{info.field_name} must be between {min_val} and {max_val}"
	)
	elif param["type"] == "choice":
	if v not in param["values"]:
	raise ValueError(f"{info.field_name} must be one of {param['values']}")

	return v

	@model_validator(mode="after")
	def check_constraints(self) -> "BlindedParameterization":
	if self.x19 > self.x20:
	raise ValueError(
	f"Received x19={self.x19} which should be less than x20={self.x20}"
	)
	if self.x6 + self.x15 > 1.0:
	raise ValueError(
	f"Received x6={self.x6} and x15={self.x15} which should sum to less than or equal to 1.0" # noqa: E501
	)


	def evaluate(*args):
	# Create a DataFrame with the parameter names and scaled values
	params_df = pd.DataFrame([args], columns=[param["name"] for param in PARAM_BOUNDS])

	# error checking
	BlindedParameterization(**params_df.to_dict("records")[0])

	# Reverse the scaling for each parameter and reverse the renaming for choice parameters
	for param_info in PARAM_BOUNDS:
	key = param_info["name"]
	if param_info["type"] == "range":
	scaler = scalers[key]
	params_df[key] = scaler.inverse_transform(params_df[[key]])
	elif param_info["type"] == "choice":
	# Extract the index from the renamed choice and use it to get the original choice
	choice_index = int(params_df[key].str.split("_").str[-1].iloc[0])
	params_df[key] = param_info["values"][choice_index]

	# Convert the DataFrame to a list of dictionaries
	params_list = params_df.to_dict("records")

	# Evaluate the model with the unscaled parameters
	results = model.surrogate_evaluate(params_list)

	# Convert list of dictionaries to list of lists
	results_list = [list(result.values()) for result in results]
	return results_list


	def get_interface(param_info, numeric_index, choice_index):
	key = param_info["name"]
	default_value = example_parameterization[key]
	if param_info["type"] == "range":
	# Rescale the parameter to be between 0 and 1
	scaler = scalers[key]
	scaler.fit([[bound] for bound in param_info["bounds"]])
	scaled_value = scaler.transform([[default_value]])[0][0]
	scaled_bounds = scaler.transform([[bound] for bound in param_info["bounds"]])
	label = f"f1" if key == "train_frac" else f"x{numeric_index}"
	return (
	gr.Slider( # Change this line
	value=scaled_value,
	minimum=scaled_bounds[0][0],
	maximum=scaled_bounds[1][0],
	label=label,
	step=(scaled_bounds[1][0] - scaled_bounds[0][0]) / 100,
	),
	numeric_index + 1,
	choice_index,
	)
	elif param_info["type"] == "choice":
	return (
	gr.Radio(
	choices=[
	f"c{choice_index}_{i}" for i in range(len(param_info["values"]))
	],
	label=f"c{choice_index}",
	value=f"c{choice_index}_{param_info['values'].index(default_value)}",
	),
	numeric_index,
	choice_index + 1,
	)


	numeric_index = 1
	choice_index = 1
	inputs = []
	for param in PARAM_BOUNDS:
	input, numeric_index, choice_index = get_interface(
	param, numeric_index, choice_index
	)
	inputs.append(input)

	iface = gr.Interface(
	title="CrabNetSurrogateModel",
	fn=evaluate,
	inputs=inputs,
	outputs=gr.Numpy(
	value=np.array([list(example_result.values())]),
	headers=[f"y{i+1}" for i in range(len(example_result))],
	col_count=(len(example_result), "fixed"),
	datatype=["number"] * len(example_result),
	),
	description="""
	`y1`, `y2`, `y3`, and `y4`, should all be minimized. `y1` and `y2` are
	correlated, whereas `y1` and `y2` are both anticorrelated with `y3`. `y1`,
	`y2`, and `y3` are stochastic (heteroskedastic, parameter-free noise),
	whereas `y4` is deterministic, but still considered 'black-box'. In other
	words, repeat calls with the same input arguments will result in different
	values for `y1`, `y2`, and `y3`, but the same value for `y4`.

	If `y1` is less than 0.2, the result is considered "bad" no matter how good
	the other values are. If `y2` is less than 0.7, the result is considered
	"bad" no matter how good the other values are. If `y3` is greater than 1800,
	the result is considered "bad" no matter how good the other values are. If `y4`
	is greater than 40e6, the result is considered "bad" no matter how good the
	other values are.

	`fidelity1` is a fidelity parameter. 0 is the lowest fidelity, and 1 is the
	highest fidelity. The higher the fidelity, typically the more expensive the
	evaluation. However, this also typically means higher quality and relevance
	to the optimization campaign goals. `fidelity1` and `y3` are
	correlated.
	""",
	)
	iface.launch()