legend1234 commited on
Commit
46411f2
·
1 Parent(s): 87c739b

Fix data shape mismatch

Browse files
Files changed (1) hide show
  1. utils.py +8 -4
utils.py CHANGED
@@ -59,7 +59,7 @@ def predict_permeability(
59
 
60
  if type(mol_features) == pd.DataFrame:
61
  if mol_features.index.tolist() != info_df.index.tolist():
62
- raise ValueError("Features_df and Info_df do not have the same index.")
63
 
64
  # get predicted probabilities
65
  info_df.loc[:, "B3clf_predicted_probability"] = pred_model.predict_proba(
@@ -99,7 +99,7 @@ def generate_predictions(
99
  if mol_features is None and info_df is None:
100
  if input_fname is None:
101
  raise ValueError("Either input_fname or mol_features/info_df must be provided")
102
-
103
  mol_tag = os.path.basename(input_fname).split(".")[0]
104
  file_ext = os.path.splitext(input_fname)[1].lower()
105
  internal_sdf = f"{mol_tag}_optimized_3d.sdf"
@@ -136,7 +136,11 @@ def generate_predictions(
136
  mol_features = mol_features.apply(pd.to_numeric, errors='coerce')
137
  if mol_features.isnull().any().any():
138
  st.warning("Some descriptors contained invalid values and were removed")
139
- mol_features = mol_features.dropna()
 
 
 
 
140
  if len(mol_features) == 0:
141
  raise ValueError("No valid data remains after cleaning")
142
 
@@ -174,7 +178,7 @@ def generate_predictions(
174
  ]
175
 
176
  return mol_features, info_df, result_df
177
-
178
  except Exception as e:
179
  import traceback
180
  st.error(f"Error in generate_predictions: {str(e)}\n{traceback.format_exc()}")
 
59
 
60
  if type(mol_features) == pd.DataFrame:
61
  if mol_features.index.tolist() != info_df.index.tolist():
62
+ raise ValueError("mol_features and Info_df do not have the same index.")
63
 
64
  # get predicted probabilities
65
  info_df.loc[:, "B3clf_predicted_probability"] = pred_model.predict_proba(
 
99
  if mol_features is None and info_df is None:
100
  if input_fname is None:
101
  raise ValueError("Either input_fname or mol_features/info_df must be provided")
102
+
103
  mol_tag = os.path.basename(input_fname).split(".")[0]
104
  file_ext = os.path.splitext(input_fname)[1].lower()
105
  internal_sdf = f"{mol_tag}_optimized_3d.sdf"
 
136
  mol_features = mol_features.apply(pd.to_numeric, errors='coerce')
137
  if mol_features.isnull().any().any():
138
  st.warning("Some descriptors contained invalid values and were removed")
139
+ # Get indices of valid rows
140
+ valid_indices = ~mol_features.isnull().any(axis=1)
141
+ # Update both dataframes to keep only valid rows
142
+ mol_features = mol_features[valid_indices]
143
+ info_df = info_df[valid_indices]
144
  if len(mol_features) == 0:
145
  raise ValueError("No valid data remains after cleaning")
146
 
 
178
  ]
179
 
180
  return mol_features, info_df, result_df
181
+
182
  except Exception as e:
183
  import traceback
184
  st.error(f"Error in generate_predictions: {str(e)}\n{traceback.format_exc()}")