Spaces:

AccelerationConsortium
/

crabnet-hyperparameter

Running

App Files Files Community

sgbaird commited on Mar 27, 2024

Commit

b0c702c

1 Parent(s): f618644

Refactor evaluate function in app.py to include parameter scaling and unscaled evaluation

Browse files

Files changed (1) hide show

app.py +95 -34

app.py CHANGED Viewed

@@ -44,11 +44,42 @@ example_parameterization = {
 example_results = model.surrogate_evaluate([example_parameterization])
 example_result = example_results[0]
-scalers = {
-    param_info["name"]: MinMaxScaler()
-    for param_info in PARAM_BOUNDS
-    if param_info["type"] == "range"
-}
 class BlindedParameterization(BaseModel):
@@ -72,15 +103,15 @@ class BlindedParameterization(BaseModel):
     x18: float  # int
     x19: float
     x20: float
-    c1: bool
     c2: str
     c3: str
-    f1: float
     @field_validator("*")
     def check_bounds(cls, v: int, info: ValidationInfo) -> int:
         param = next(
-            (item for item in PARAM_BOUNDS if item["name"] == info.field_name),
             None,
         )
         if param is None:
@@ -110,31 +141,54 @@ class BlindedParameterization(BaseModel):
             )
 def evaluate(*args):
-    # Create a DataFrame with the parameter names and scaled values
-    params_df = pd.DataFrame([args], columns=[param["name"] for param in PARAM_BOUNDS])
-    # error checking
-    BlindedParameterization(**params_df.to_dict("records")[0])
-    # Reverse the scaling for each parameter and reverse the renaming for choice parameters
-    for param_info in PARAM_BOUNDS:
-        key = param_info["name"]
-        if param_info["type"] == "range":
-            scaler = scalers[key]
-            params_df[key] = scaler.inverse_transform(params_df[[key]])
-        elif param_info["type"] == "choice":
-            # Extract the index from the renamed choice and use it to get the original choice
-            choice_index = int(params_df[key].str.split("_").str[-1].iloc[0])
-            params_df[key] = param_info["values"][choice_index]
-    # Convert the DataFrame to a list of dictionaries
-    params_list = params_df.to_dict("records")
-    # Evaluate the model with the unscaled parameters
-    results = model.surrogate_evaluate(params_list)
-    # Convert list of dictionaries to list of lists
     results_list = [list(result.values()) for result in results]
     return results_list
@@ -148,7 +202,7 @@ def get_interface(param_info, numeric_index, choice_index):
         scaler.fit([[bound] for bound in param_info["bounds"]])
         scaled_value = scaler.transform([[default_value]])[0][0]
         scaled_bounds = scaler.transform([[bound] for bound in param_info["bounds"]])
-        label = f"f1" if key == "train_frac" else f"x{numeric_index}"
         return (
             gr.Slider(  # Change this line
                 value=scaled_value,
@@ -174,6 +228,9 @@ def get_interface(param_info, numeric_index, choice_index):
         )
 numeric_index = 1
 choice_index = 1
 inputs = []
@@ -201,8 +258,8 @@ iface = gr.Interface(
     words, repeat calls with the same input arguments will result in different
     values for `y1`, `y2`, and `y3`, but the same value for `y4`.
-    If `y1` is less than 0.2, the result is considered "bad" no matter how good
-    the other values are. If `y2` is less than 0.7, the result is considered
     "bad" no matter how good the other values are. If `y3` is greater than 1800,
     the result is considered "bad" no matter how good the other values are. If `y4`
     is greater than 40e6, the result is considered "bad" no matter how good the
@@ -213,6 +270,10 @@ iface = gr.Interface(
     evaluation. However, this also typically means higher quality and relevance
     to the optimization campaign goals. `fidelity1` and `y3` are
     correlated.
     """,
 )
 iface.launch()

 example_results = model.surrogate_evaluate([example_parameterization])
 example_result = example_results[0]
+# Initialize and fit scalers for each parameter
+scalers = {}
+for param_info in PARAM_BOUNDS:
+    if param_info["type"] == "range":
+        scaler = MinMaxScaler()
+        # Fit the scaler using the parameter bounds
+        scaler.fit([[bound] for bound in param_info["bounds"]])
+        scalers[param_info["name"]] = scaler
+# HACK: Hardcoded
+BLINDED_PARAM_BOUNDS = [
+    {"name": "x1", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x2", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x3", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x4", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x5", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x6", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x7", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x8", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x9", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x10", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x11", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x12", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x13", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x14", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x15", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x16", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x17", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x18", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x19", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "x20", "type": "range", "bounds": [0.0, 1.0]},
+    {"name": "c1", "type": "choice", "values": ["c1_0", "c1_1"]},
+    {"name": "c2", "type": "choice", "values": ["c2_0", "c2_1"]},
+    {"name": "c3", "type": "choice", "values": ["c3_0", "c3_1"]},
+    {"name": "fidelity1", "type": "range", "bounds": [0.0, 1.0]},
+]
 class BlindedParameterization(BaseModel):
     x18: float  # int
     x19: float
     x20: float
+    c1: str  # bool
     c2: str
     c3: str
+    fidelity1: float
     @field_validator("*")
     def check_bounds(cls, v: int, info: ValidationInfo) -> int:
         param = next(
+            (item for item in BLINDED_PARAM_BOUNDS if item["name"] == info.field_name),
             None,
         )
         if param is None:
             )
+# Conversion from original to blinded representation
+def convert_to_blinded(params):
+    blinded_params = {}
+    numeric_index = 1
+    choice_index = 1
+    for param in PARAM_BOUNDS:
+        if param["type"] == "range":
+            key = f"x{numeric_index}" if param["name"] != "train_frac" else "fidelity1"
+            blinded_params[key] = scalers[param["name"]].transform(
+                [[params[param["name"]]]]
+            )[0][0]
+            numeric_index += 1 if param["name"] != "train_frac" else 0
+        elif param["type"] == "choice":
+            key = f"c{choice_index}"
+            choice_index = param["values"].index(params[param["name"]])
+            blinded_params[key] = f"{key}_{choice_index}"
+            choice_index += 1
+    return blinded_params
+# Conversion from blinded to original representation
+def convert_from_blinded(blinded_params):
+    original_params = {}
+    numeric_index = 1
+    choice_index = 1
+    for param in PARAM_BOUNDS:
+        if param["type"] == "range":
+            key = f"x{numeric_index}" if param["name"] != "train_frac" else "fidelity1"
+            original_params[param["name"]] = scalers[param["name"]].inverse_transform(
+                [[blinded_params[key]]]
+            )[0][0]
+            numeric_index += 1 if param["name"] != "train_frac" else 0
+        elif param["type"] == "choice":
+            key = f"c{choice_index}"
+            choice_value = blinded_params[key].split("_")[-1]
+            original_params[param["name"]] = param["values"][int(choice_value)]
+            choice_index += 1
+    return original_params
 def evaluate(*args):
+    # Assume args are in the order of BLINDED_PARAM_BOUNDS
+    blinded_params = dict(zip([param["name"] for param in BLINDED_PARAM_BOUNDS], args))
+    original_params = convert_from_blinded(blinded_params)
+    BlindedParameterization(**blinded_params)  # Validation
+    params_list = [original_params]
+    results = model.surrogate_evaluate(params_list)
     results_list = [list(result.values()) for result in results]
     return results_list
         scaler.fit([[bound] for bound in param_info["bounds"]])
         scaled_value = scaler.transform([[default_value]])[0][0]
         scaled_bounds = scaler.transform([[bound] for bound in param_info["bounds"]])
+        label = f"fidelity1" if key == "train_frac" else f"x{numeric_index}"
         return (
             gr.Slider(  # Change this line
                 value=scaled_value,
         )
+# test the evaluate function
+blinded_results = evaluate(*[0.5] * 20, "c1_0", "c2_0", "c3_0", 0.5)
 numeric_index = 1
 choice_index = 1
 inputs = []
     words, repeat calls with the same input arguments will result in different
     values for `y1`, `y2`, and `y3`, but the same value for `y4`.
+    If `y1` is greater than 0.2, the result is considered "bad" no matter how good
+    the other values are. If `y2` is greater than 0.7, the result is considered
     "bad" no matter how good the other values are. If `y3` is greater than 1800,
     the result is considered "bad" no matter how good the other values are. If `y4`
     is greater than 40e6, the result is considered "bad" no matter how good the
     evaluation. However, this also typically means higher quality and relevance
     to the optimization campaign goals. `fidelity1` and `y3` are
     correlated.
+    Constraints:
+    - `x19` should be less than `x20`.
+    - `x6` and `x15` should sum to no more than 1.0.
     """,
 )
 iface.launch()