Spaces:
Sleeping
Sleeping
Bachstelze commited on
Commit ·
305c75a
1
Parent(s): e982008
create and import CorrelationFilter class
Browse files- A5/CorrelationFilter.py +21 -0
- app.py +10 -8
A5/CorrelationFilter.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.base import BaseEstimator, TransformerMixin
|
| 2 |
+
|
| 3 |
+
# Finds similar features that are highly correlated and remove it
|
| 4 |
+
class CorrelationFilter(BaseEstimator, TransformerMixin):
|
| 5 |
+
def __init__(self, threshold=0.99):
|
| 6 |
+
self.threshold = threshold
|
| 7 |
+
self.keep_cols_ = None
|
| 8 |
+
|
| 9 |
+
def fit(self, X, y=None):
|
| 10 |
+
Xdf = pd.DataFrame(X) if not isinstance(X, pd.DataFrame) else X
|
| 11 |
+
# calculates the correlation matrix and takes absolutte values since negative values are also calculated
|
| 12 |
+
corr = Xdf.corr(numeric_only=True).abs()
|
| 13 |
+
upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(bool))
|
| 14 |
+
to_drop = [col for col in upper.columns if any(upper[col] >= self.threshold)]
|
| 15 |
+
self.keep_cols_ = [c for c in Xdf.columns if c not in to_drop]
|
| 16 |
+
return self
|
| 17 |
+
|
| 18 |
+
# Applies transformation ad return result as pd dataframe
|
| 19 |
+
def transform(self, X):
|
| 20 |
+
Xdf = pd.DataFrame(X) if not isinstance(X, pd.DataFrame) else X
|
| 21 |
+
return Xdf[self.keep_cols_].copy()
|
app.py
CHANGED
|
@@ -2,6 +2,8 @@ import gradio as gr
|
|
| 2 |
import pandas as pd
|
| 3 |
import pickle
|
| 4 |
import os
|
|
|
|
|
|
|
| 5 |
|
| 6 |
# Get directory where this script is located
|
| 7 |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
@@ -9,11 +11,11 @@ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
| 9 |
# Local paths - models loaded from A4/models/ directory
|
| 10 |
MODEL_PATH = os.path.join(
|
| 11 |
SCRIPT_DIR,
|
| 12 |
-
"
|
| 13 |
)
|
| 14 |
CLASSIFICATION_MODEL_PATH = os.path.join(
|
| 15 |
SCRIPT_DIR,
|
| 16 |
-
"
|
| 17 |
)
|
| 18 |
DATA_PATH = os.path.join(
|
| 19 |
SCRIPT_DIR,
|
|
@@ -86,10 +88,6 @@ def load_classification_model():
|
|
| 86 |
return False
|
| 87 |
|
| 88 |
|
| 89 |
-
load_champion_model()
|
| 90 |
-
load_classification_model()
|
| 91 |
-
|
| 92 |
-
|
| 93 |
def predict_score(*feature_values):
|
| 94 |
if model is None:
|
| 95 |
return "Error", "Model not loaded", ""
|
|
@@ -415,7 +413,11 @@ def create_interface():
|
|
| 415 |
return demo
|
| 416 |
|
| 417 |
|
| 418 |
-
demo = create_interface()
|
| 419 |
-
|
| 420 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import pickle
|
| 4 |
import os
|
| 5 |
+
from A5.CorrelationFilter import CorrelationFilter
|
| 6 |
+
|
| 7 |
|
| 8 |
# Get directory where this script is located
|
| 9 |
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
| 11 |
# Local paths - models loaded from A4/models/ directory
|
| 12 |
MODEL_PATH = os.path.join(
|
| 13 |
SCRIPT_DIR,
|
| 14 |
+
"A5/models/aimoscores_improved_A4.pkl"
|
| 15 |
)
|
| 16 |
CLASSIFICATION_MODEL_PATH = os.path.join(
|
| 17 |
SCRIPT_DIR,
|
| 18 |
+
"A5/models/weaklink_classifier_rfc_A4.pkl"
|
| 19 |
)
|
| 20 |
DATA_PATH = os.path.join(
|
| 21 |
SCRIPT_DIR,
|
|
|
|
| 88 |
return False
|
| 89 |
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
def predict_score(*feature_values):
|
| 92 |
if model is None:
|
| 93 |
return "Error", "Model not loaded", ""
|
|
|
|
| 413 |
return demo
|
| 414 |
|
| 415 |
|
|
|
|
|
|
|
| 416 |
if __name__ == "__main__":
|
| 417 |
+
# load the pickled models
|
| 418 |
+
load_champion_model()
|
| 419 |
+
load_classification_model()
|
| 420 |
+
|
| 421 |
+
# create the interface
|
| 422 |
+
demo = create_interface()
|
| 423 |
demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
|