Spaces:
Running
Running
File size: 3,803 Bytes
c4ac745 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | import numpy as np
import torch
import pandas as pd
import os
import sys
import json
from eval.mle.mle import get_evaluator
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import warnings
warnings.filterwarnings("ignore")
import argparse
def eval_mle(syn_data, test_data, info):
task_type = info['task_type']
evaluator = get_evaluator(task_type)
if task_type == 'regression':
best_r2_scores, best_rmse_scores = evaluator(
syn_data,
test_data,
info
)
overall_scores = {}
for score_name in ['best_r2_scores', 'best_rmse_scores']:
overall_scores[score_name] = {}
scores = eval(score_name)
for method in scores:
name = method['name']
method.pop('name')
overall_scores[score_name][name] = method
else:
best_f1_scores, best_weighted_scores, best_auroc_scores, best_acc_scores, best_avg_scores = evaluator(
syn_data,
test_data,
info
)
overall_scores = {}
for score_name in ['best_f1_scores', 'best_weighted_scores', 'best_auroc_scores', 'best_acc_scores', 'best_avg_scores']:
overall_scores[score_name] = {}
scores = eval(score_name)
for method in scores:
name = method['name']
method.pop('name')
overall_scores[score_name][name] = method
return overall_scores
# def preprocess(train, test, info)
# def norm_data(data, )
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--dataname', type=str, default='adult')
parser.add_argument('--model', type=str, default='real')
parser.add_argument('--path', type=str, default = None, help='The file path of the synthetic data')
args = parser.parse_args()
dataname = args.dataname
model = args.model
if not args.path:
train_path = f'synthetic/{dataname}/{model}.csv'
else:
train_path = args.path
test_path = f'synthetic/{dataname}/test.csv'
train = pd.read_csv(train_path).to_numpy()
test = pd.read_csv(test_path).to_numpy()
with open(f'data/{dataname}/info.json', 'r') as f:
info = json.load(f)
task_type = info['task_type']
evaluator = get_evaluator(task_type)
if task_type == 'regression':
best_r2_scores, best_rmse_scores = evaluator(train, test, info)
overall_scores = {}
for score_name in ['best_r2_scores', 'best_rmse_scores']:
overall_scores[score_name] = {}
scores = eval(score_name)
for method in scores:
name = method['name']
method.pop('name')
overall_scores[score_name][name] = method
else:
best_f1_scores, best_weighted_scores, best_auroc_scores, best_acc_scores, best_avg_scores = evaluator(train, test, info)
overall_scores = {}
for score_name in ['best_f1_scores', 'best_weighted_scores', 'best_auroc_scores', 'best_acc_scores', 'best_avg_scores']:
overall_scores[score_name] = {}
scores = eval(score_name)
for method in scores:
name = method['name']
method.pop('name')
overall_scores[score_name][name] = method
if not os.path.exists(f'eval/mle/{dataname}'):
os.makedirs(f'eval/mle/{dataname}')
save_path = f'eval/mle/{dataname}/{model}.json'
print('Saving scores to ', save_path)
with open(save_path, "w") as json_file:
json.dump(overall_scores, json_file, indent=4, separators=(", ", ": "))
|