| | import sys |
| | import json |
| | import pandas as pd |
| | import numpy as np |
| | import pickle |
| |
|
| |
|
| | def extract_year(df, eoy_date): |
| | """ |
| | Extract 1 year of data |
| | -------- |
| | :param df: dataframe to extract from |
| | :param eoy_date: user-specified end of year date |
| | :return: data from chosen year |
| | """ |
| | return df[df.eoy == eoy_date] |
| |
|
| |
|
| | def read_yearly_data(data_path, data_type, eoy_date): |
| | """ |
| | Read in data for year required |
| | -------- |
| | :param data_path: path to generated data |
| | :param data_type: type of data to read in |
| | :param eoy_date: user-specified end of year date |
| | :return: data from chosen year and ids |
| | """ |
| | df = pd.read_pickle(data_path + 'min_max_' + data_type + '.pkl') |
| | df_year = extract_year(df, eoy_date) |
| | ids = df_year.pop('SafeHavenID').to_list() |
| | df_year = df_year.drop('eoy', axis=1) |
| |
|
| | return df_year, ids |
| |
|
| |
|
| | def main(): |
| |
|
| | |
| | with open('../../../config.json') as json_config_file: |
| | config = json.load(json_config_file) |
| |
|
| | |
| | eoy_date = config['date'] |
| | data_path = config['model_data_path'] |
| |
|
| | |
| | data_type = sys.argv[1] |
| | run_name = sys.argv[2] |
| |
|
| | |
| | print('Loading data') |
| | columns = np.load(data_path + run_name + '_cols.npy', allow_pickle=True) |
| | df_scaled, ids = read_yearly_data(data_path, data_type, eoy_date) |
| | df_scaled_reduced = df_scaled[columns] |
| | df_unscaled_full = pd.read_pickle(data_path + 'filled_' + data_type + '.pkl') |
| | df_unscaled = extract_year(df_unscaled_full, eoy_date) |
| | |
| | |
| | print('Loading model') |
| | clf_model_file = data_path + run_name + '_dtc_model.pkl' |
| | clf = pickle.load(open(clf_model_file, 'rb')) |
| |
|
| | |
| | print('Predicting clusters') |
| | labels = clf.predict(df_scaled_reduced.to_numpy()) |
| | df_unscaled['cluster'] = labels |
| | df_unscaled.to_pickle(data_path + '_'.join((run_name, data_type, 'clusters.pkl'))) |
| |
|
| |
|
| | main() |
| |
|