Bachstelze commited on
Commit
305c75a
·
1 Parent(s): e982008

create and import CorrelationFilter class

Browse files
Files changed (2) hide show
  1. A5/CorrelationFilter.py +21 -0
  2. app.py +10 -8
A5/CorrelationFilter.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.base import BaseEstimator, TransformerMixin
2
+
3
+ # Finds similar features that are highly correlated and remove it
4
+ class CorrelationFilter(BaseEstimator, TransformerMixin):
5
+ def __init__(self, threshold=0.99):
6
+ self.threshold = threshold
7
+ self.keep_cols_ = None
8
+
9
+ def fit(self, X, y=None):
10
+ Xdf = pd.DataFrame(X) if not isinstance(X, pd.DataFrame) else X
11
+ # calculates the correlation matrix and takes absolutte values since negative values are also calculated
12
+ corr = Xdf.corr(numeric_only=True).abs()
13
+ upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
14
+ to_drop = [col for col in upper.columns if any(upper[col] >= self.threshold)]
15
+ self.keep_cols_ = [c for c in Xdf.columns if c not in to_drop]
16
+ return self
17
+
18
+ # Applies transformation ad return result as pd dataframe
19
+ def transform(self, X):
20
+ Xdf = pd.DataFrame(X) if not isinstance(X, pd.DataFrame) else X
21
+ return Xdf[self.keep_cols_].copy()
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  import pandas as pd
3
  import pickle
4
  import os
 
 
5
 
6
  # Get directory where this script is located
7
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -9,11 +11,11 @@ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
9
  # Local paths - models loaded from A4/models/ directory
10
  MODEL_PATH = os.path.join(
11
  SCRIPT_DIR,
12
- "A4/models/gDriveVersion/champion_model_final_2.pkl"
13
  )
14
  CLASSIFICATION_MODEL_PATH = os.path.join(
15
  SCRIPT_DIR,
16
- "A4/models/gDriveVersion/final_champion_model_A3.pkl"
17
  )
18
  DATA_PATH = os.path.join(
19
  SCRIPT_DIR,
@@ -86,10 +88,6 @@ def load_classification_model():
86
  return False
87
 
88
 
89
- load_champion_model()
90
- load_classification_model()
91
-
92
-
93
  def predict_score(*feature_values):
94
  if model is None:
95
  return "Error", "Model not loaded", ""
@@ -415,7 +413,11 @@ def create_interface():
415
  return demo
416
 
417
 
418
- demo = create_interface()
419
-
420
  if __name__ == "__main__":
 
 
 
 
 
 
421
  demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
 
2
  import pandas as pd
3
  import pickle
4
  import os
5
+ from A5.CorrelationFilter import CorrelationFilter
6
+
7
 
8
  # Get directory where this script is located
9
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 
11
  # Local paths - models loaded from A4/models/ directory
12
  MODEL_PATH = os.path.join(
13
  SCRIPT_DIR,
14
+ "A5/models/aimoscores_improved_A4.pkl"
15
  )
16
  CLASSIFICATION_MODEL_PATH = os.path.join(
17
  SCRIPT_DIR,
18
+ "A5/models/weaklink_classifier_rfc_A4.pkl"
19
  )
20
  DATA_PATH = os.path.join(
21
  SCRIPT_DIR,
 
88
  return False
89
 
90
 
 
 
 
 
91
  def predict_score(*feature_values):
92
  if model is None:
93
  return "Error", "Model not loaded", ""
 
413
  return demo
414
 
415
 
 
 
416
  if __name__ == "__main__":
417
+ # load the pickled models
418
+ load_champion_model()
419
+ load_classification_model()
420
+
421
+ # create the interface
422
+ demo = create_interface()
423
  demo.launch(share=False, server_name="0.0.0.0", server_port=7860)