File size: 13,019 Bytes
3961ee7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
import pandas as pd
import numpy as np
import re
from .constants import ATMOSPHERE_CONFIG, MIXING_METHOD_CONFIG
def _extract_numerical_value_from_op_condition(condition_entry, target_keys=['value', 'max_value', 'values']):
if isinstance(condition_entry, list) and condition_entry:
if isinstance(condition_entry[0], dict):
for key in target_keys:
val = condition_entry[0].get(key)
if val is not None:
if isinstance(val, list) and val:
try: return float(val[0])
except: continue
try: return float(val)
except: continue
elif isinstance(condition_entry[0], (int, float, np.number)):
try: return float(condition_entry[0])
except: pass
elif isinstance(condition_entry, dict):
for key in target_keys:
val = condition_entry.get(key)
if val is not None:
if isinstance(val, list) and val:
try: return float(val[0])
except: continue
try: return float(val)
except: continue
elif isinstance(condition_entry, (int, float, np.number)):
try: return float(condition_entry)
except: pass
return np.nan
def _extract_atmosphere_from_op(op_conditions_dict, op_string, atm_config_local, entry_identifier):
atm_specific = atm_config_local["default_specific"]
atm_category = atm_config_local["default_category"]
found_atm = False
if isinstance(op_conditions_dict, dict):
atm_source_key_val = op_conditions_dict.get('atmosphere')
if not atm_source_key_val and 'text' in op_conditions_dict and isinstance(op_conditions_dict['text'], str) :
atm_source_key_val = op_conditions_dict['text']
if atm_source_key_val:
atm_str_to_parse = None
if isinstance(atm_source_key_val, list) and atm_source_key_val: atm_str_to_parse = str(atm_source_key_val[0])
elif isinstance(atm_source_key_val, str): atm_str_to_parse = atm_source_key_val
elif isinstance(atm_source_key_val, dict): atm_str_to_parse = str(atm_source_key_val.get('gas', atm_source_key_val.get('value', '')))
if atm_str_to_parse:
atm_str_lower = atm_str_to_parse.lower()
for pattern_regex, specific, category in atm_config_local["patterns"]:
if specific.lower() == atm_str_lower or re.search(pattern_regex, atm_str_to_parse, re.IGNORECASE):
atm_specific, atm_category, found_atm = specific, category, True; break
if not found_atm and '/' in atm_str_lower: atm_specific, atm_category, found_atm = atm_str_to_parse, "Mixed", True
if not found_atm and isinstance(op_string, str) and op_string:
for pattern_regex, specific, category in atm_config_local["patterns"]:
if re.search(pattern_regex, op_string, re.IGNORECASE):
atm_specific, atm_category, found_atm = specific, category, True; break
return atm_specific, atm_category
def _extract_mixing_method_from_op(op_dict, op_string, mix_config_local, entry_identifier):
mix_method = mix_config_local["default_method"]
op_type = str(op_dict.get('type', '')).lower()
if isinstance(op_string, str) and op_string:
for pattern_regex, method_name in mix_config_local["patterns"]:
if re.search(pattern_regex, op_string, re.IGNORECASE): return method_name
if 'mix' in op_type or 'grind' in op_type or 'mill' in op_type:
for pattern_regex, method_name in mix_config_local["patterns"]:
if re.search(pattern_regex, op_type, re.IGNORECASE): return method_name
if op_type.strip() and op_type not in ["mixing", "liquidgrinding", "solutionmixing", "grinding"]: return op_type
return mix_method
def _extract_thermal_conditions(conditions_dict, op_string, entry_identifier):
temps, durs = [], []
if isinstance(conditions_dict, dict):
temp_data = conditions_dict.get('heating_temperature')
if temp_data:
extracted_temp = _extract_numerical_value_from_op_condition(temp_data)
if pd.notna(extracted_temp): temps.append(extracted_temp)
dur_data = conditions_dict.get('heating_time')
if dur_data:
extracted_dur = _extract_numerical_value_from_op_condition(dur_data)
if pd.notna(extracted_dur): durs.append(extracted_dur)
return temps, durs
def parse_single_operation_detailed_for_input(op_dict_raw, entry_identifier="predict_op"):
if not isinstance(op_dict_raw, dict): return {}
op_type_lower = str(op_dict_raw.get('type', 'UnknownType')).lower()
op_string_lower = str(op_dict_raw.get('string', '')).lower()
conditions = op_dict_raw.get('conditions', {})
op_features = {}
temps, durs = _extract_thermal_conditions(conditions, op_string_lower, entry_identifier)
op_features['op_temp_C_list'], op_features['op_duration_h_list'] = temps, durs
op_features['op_atmosphere_specific'], op_features['op_atmosphere_category'] = _extract_atmosphere_from_op(conditions, op_string_lower, ATMOSPHERE_CONFIG, entry_identifier)
op_features['op_mixing_method'] = _extract_mixing_method_from_op(op_dict_raw, op_string_lower, MIXING_METHOD_CONFIG, entry_identifier)
op_features['op_is_heating'] = any(k in op_type_lower for k in ['heat', 'anneal', 'sinter', 'calcination'])
op_features['op_is_mixing'] = 'mix' in op_type_lower or op_features['op_mixing_method'] != MIXING_METHOD_CONFIG["default_method"]
op_features['op_is_grinding'] = any(k in op_type_lower for k in ['grind', 'mill']) or 'pulverize' in op_string_lower or op_features['op_mixing_method'] in ['grinding', 'ball_milling', 'planetary_milling', 'attritor_milling', 'shaker_milling', 'mortar_pestle']
op_features['op_is_shaping'] = 'shap' in op_type_lower
op_features['op_is_drying'] = 'dry' in op_type_lower or 'drying' in op_type_lower
op_features['op_is_quenching'] = 'quench' in op_type_lower
op_features['op_is_annealing'] = 'anneal' in op_type_lower or 'anneal' in op_string_lower
op_features['op_is_sintering'] = 'sinter' in op_type_lower or 'sinter' in op_string_lower
op_features['op_is_calcination'] = any(k in op_type_lower for k in ['calcine', 'calcination']) or 'calcination' in op_string_lower
return op_features
def generate_process_features_for_input(operations_simplified_list, all_possible_atm_categories, all_possible_mix_methods):
aggregated_ops_features = {
'proc_total_heating_duration_h': 0.0, 'proc_max_temperature_C': np.nan,
'proc_min_temperature_C': np.nan, 'proc_avg_temperature_C': np.nan,
'proc_primary_heating_temp_C': np.nan,
'proc_num_total_steps': 0, 'proc_num_heating_steps': 0,
'proc_num_mixing_steps': 0, 'proc_num_grinding_steps': 0,
'proc_has_annealing': False, 'proc_has_sintering': False,
'proc_has_calcination': False, 'proc_has_quenching': False,
'proc_has_shaping': False, 'proc_has_drying': False,
}
for cat in all_possible_atm_categories: aggregated_ops_features[f"ops_atm_cat_{cat}"] = 0
for meth in all_possible_mix_methods: aggregated_ops_features[f"ops_mix_meth_{meth}"] = 0
if not isinstance(operations_simplified_list, list): operations_simplified_list = []
aggregated_ops_features['proc_num_total_steps'] = len(operations_simplified_list)
all_temps_in_reaction, heating_steps_details_for_reaction, mixing_methods_found_in_reaction = [], [], []
atm_set_for_reaction_flag = False
parsed_atm_category_for_input = ATMOSPHERE_CONFIG["default_category"]
parsed_mix_method_for_input = MIXING_METHOD_CONFIG["default_method"]
for op_idx, op_dict_raw in enumerate(operations_simplified_list):
op_features = parse_single_operation_detailed_for_input(op_dict_raw, f"predict_op_{op_idx}")
if op_features.get('op_temp_C_list'): all_temps_in_reaction.extend(op_features['op_temp_C_list'])
if op_features.get('op_is_heating'):
aggregated_ops_features['proc_num_heating_steps'] += 1
if op_features.get('op_duration_h_list'): aggregated_ops_features['proc_total_heating_duration_h'] += np.nansum(op_features['op_duration_h_list'])
heating_steps_details_for_reaction.append({'temp': np.nanmax(op_features['op_temp_C_list']) if op_features.get('op_temp_C_list') and len(op_features['op_temp_C_list']) > 0 else np.nan,
'duration': np.nansum(op_features.get('op_duration_h_list', [0.0])),
'atm_category': op_features.get('op_atmosphere_category'),
'is_anneal': op_features.get('op_is_annealing'), 'is_sinter': op_features.get('op_is_sintering'), 'is_calcine': op_features.get('op_is_calcination')})
if op_features.get('op_is_mixing'):
aggregated_ops_features['proc_num_mixing_steps'] += 1
current_mix_method = op_features.get('op_mixing_method', MIXING_METHOD_CONFIG["default_method"])
if current_mix_method != MIXING_METHOD_CONFIG["default_method"]: mixing_methods_found_in_reaction.append(current_mix_method)
if op_features.get('op_is_grinding'): aggregated_ops_features['proc_num_grinding_steps'] += 1
if op_features.get('op_is_shaping'): aggregated_ops_features['proc_has_shaping'] = True
if op_features.get('op_is_sintering'): aggregated_ops_features['proc_has_sintering'] = True
if op_features.get('op_is_drying'): aggregated_ops_features['proc_has_drying'] = True
if op_features.get('op_is_quenching'): aggregated_ops_features['proc_has_quenching'] = True
if op_features.get('op_is_annealing'): aggregated_ops_features['proc_has_annealing'] = True
if op_features.get('op_is_calcination'): aggregated_ops_features['proc_has_calcination'] = True
if not atm_set_for_reaction_flag and op_features.get('op_atmosphere_category') != ATMOSPHERE_CONFIG["default_category"]:
parsed_atm_category_for_input = op_features['op_atmosphere_category']
atm_set_for_reaction_flag = True
if heating_steps_details_for_reaction:
primary_heat_step = max(heating_steps_details_for_reaction, key=lambda x: (x['temp'] if pd.notna(x['temp']) else -float('inf'), x['duration']))
if pd.notna(primary_heat_step['temp']): aggregated_ops_features['proc_primary_heating_temp_C'] = primary_heat_step['temp']
if not atm_set_for_reaction_flag and primary_heat_step.get('atm_category') != ATMOSPHERE_CONFIG["default_category"]:
parsed_atm_category_for_input = primary_heat_step['atm_category']
if mixing_methods_found_in_reaction:
parsed_mix_method_for_input = mixing_methods_found_in_reaction[0]
atm_ohe_col = f"ops_atm_cat_{parsed_atm_category_for_input}"
if atm_ohe_col in aggregated_ops_features: aggregated_ops_features[atm_ohe_col] = 1
mix_ohe_col = f"ops_mix_meth_{parsed_mix_method_for_input}"
if mix_ohe_col in aggregated_ops_features: aggregated_ops_features[mix_ohe_col] = 1
if all_temps_in_reaction :
aggregated_ops_features['proc_max_temperature_C'] = np.nanmax(all_temps_in_reaction)
aggregated_ops_features['proc_min_temperature_C'] = np.nanmin(all_temps_in_reaction)
aggregated_ops_features['proc_avg_temperature_C'] = np.nanmean(all_temps_in_reaction)
if aggregated_ops_features['proc_num_heating_steps'] == 0 or pd.isna(aggregated_ops_features['proc_total_heating_duration_h']) or aggregated_ops_features['proc_total_heating_duration_h'] == 0:
aggregated_ops_features['proc_total_heating_duration_h'] = np.nan
return aggregated_ops_features
def generate_stoichiometry_features_for_input(reactants_simplified, products_simplified, standardize_fn_local):
stoich_features = {}
max_r, max_p = 3, 2
for i in range(max_r): stoich_features[f'reactant{i+1}_coeff'] = np.nan
for i in range(max_p): stoich_features[f'product{i+1}_coeff'] = np.nan
stoich_features['num_reactants_in_reaction'] = len(reactants_simplified) if reactants_simplified else 0
if reactants_simplified:
for i, r_item in enumerate(reactants_simplified[:max_r]):
if isinstance(r_item, dict):
stoich_features[f'reactant{i+1}_coeff'] = float(r_item.get('amount')) if pd.notna(r_item.get('amount')) else np.nan
stoich_features['num_products_in_reaction'] = len(products_simplified) if products_simplified else 0
if products_simplified:
for i, p_item in enumerate(products_simplified[:max_p]):
if isinstance(p_item, dict):
stoich_features[f'product{i+1}_coeff'] = float(p_item.get('amount')) if pd.notna(p_item.get('amount')) else np.nan
return stoich_features
|