File size: 8,147 Bytes
093b0a5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 | import os
from pprint import pprint
from typing import Any
import pandas as pd
import numpy as np
from utils.stock_metrics import (
LogPctProfitDirection,
LogPctProfitTanhV1,
LogPctProfitTanhV2,
apply_threshold_metric,
pct_direction,
)
from utils.tools import dotdict
def open_results(
log_dir: str, args: dotdict, df: pd.DataFrame
) -> dict[str, dict[str, Any]]:
"""Function to open an experiment and return its tpd_dict"""
tpd_dict_tuple: dict[str, tuple[Any, Any, Any]] = {}
for data_group in ["train", "val", "test"]:
device = 0
while True: # Device Loop
preds_path = os.path.join(
log_dir, f"results/pred_{data_group}_{device}.npy"
)
trues_path = os.path.join(
log_dir, f"results/true_{data_group}_{device}.npy"
)
dates_path = os.path.join(
log_dir, f"results/date_{data_group}_{device}.npy"
)
if (
os.path.exists(preds_path)
and os.path.exists(trues_path)
and os.path.exists(dates_path)
):
dp = [
np.load(trues_path)[:, 0, 0],
np.load(preds_path)[:, 0, 0],
np.load(dates_path)[:, 0],
]
tpd_dict_tuple[data_group] = (
dp
if data_group not in tpd_dict_tuple
else [
np.append(tpdfi, dpi, axis=0)
for tpdfi, dpi in zip(tpd_dict_tuple[data_group], dp)
]
)
s = np.argsort(tpd_dict_tuple[data_group][2], axis=None)
tpd_dict_tuple[data_group] = list(
map(lambda x: x[s], tpd_dict_tuple[data_group])
)
tpd_dict_tuple[data_group][2] = pd.DatetimeIndex(
tpd_dict_tuple[data_group][2], tz="UTC"
)
# Override trues with df target data to get original numerical precision
if (
not ("mse" in args.loss and not args.inverse_output)
and df is not None
):
print("OVERRIDING trues with df target")
df_data_group = df.loc[tpd_dict_tuple[data_group][2]]
t = args.target.split("_")
df_target = df_data_group[t[0]][t[1]].to_numpy()
tpd_dict_tuple[data_group][0] = df_target
else:
# Done searching for devices
break
device += 1
tpd_dict: dict[str, dict[str, Any]] = {}
for data_group in tpd_dict_tuple:
tpd_dict[data_group] = {
"trues": tpd_dict_tuple[data_group][0],
"preds": tpd_dict_tuple[data_group][1],
"dates": tpd_dict_tuple[data_group][2],
}
return tpd_dict
def get_metrics(
args: dotdict | None, pred: np.ndarray, true: np.ndarray, thresh: float = 0.0
) -> dict:
"""Function to return metrics based on outputs and labels"""
# Filter by a threshold
pred_f, true_f = apply_threshold_metric(pred, true, thresh)
# df_f = df.loc[date[np.abs(pred) >= thresh]]
# Percent direction correct, ie up or down
pct_dir_correct = pct_direction(pred_f, true_f)
# Percent profit all in
pct_profit_dir = LogPctProfitDirection.metric(pred_f, true_f, short_filter=None)
pct_profit_dir_nshort = LogPctProfitDirection.metric(
pred_f, true_f, short_filter="ns"
)
pct_profit_dir_oshort = LogPctProfitDirection.metric(
pred_f, true_f, short_filter="os"
)
# Percent profit with tanh partial purchase
pct_profit_tanhv1 = LogPctProfitTanhV1.metric(pred_f, true_f, short_filter=None)
pct_profit_tanhv1_nshort = LogPctProfitTanhV1.metric(
pred_f, true_f, short_filter="ns"
)
pct_profit_tanhv1_oshort = LogPctProfitTanhV1.metric(
pred_f, true_f, short_filter="os"
)
pct_profit_tanhv2 = LogPctProfitTanhV2.metric(pred_f, true_f, short_filter=None)
pct_profit_tanhv2_nshort = LogPctProfitTanhV2.metric(
pred_f, true_f, short_filter="ns"
)
pct_profit_tanhv2_oshort = LogPctProfitTanhV2.metric(
pred_f, true_f, short_filter="os"
)
# Optimal percent profit
pct_profit_dir_opt = LogPctProfitDirection.metric(true_f, true_f)
# Always 1 direction
avg_pct_profit_always_short = np.power(
LogPctProfitDirection.metric(-np.ones(pred_f.shape), true_f, short_filter=None),
(1 / len(pred_f)),
)
avg_pct_profit_always_buy = np.power(
LogPctProfitDirection.metric(np.ones(pred_f.shape), true_f, short_filter=None),
(1 / len(pred_f)),
)
# Return metrics
metrics = {
"avg_pct_profit_tanhv1": np.power(pct_profit_tanhv1, (1 / len(pred_f))),
"pct_profit_dir": pct_profit_dir,
"pct_profit_dir_nshort": pct_profit_dir_nshort,
"pct_profit_dir_oshort": pct_profit_dir_oshort,
"pct_profit_tanhv1": pct_profit_tanhv1,
"pct_profit_tanhv1_nshort": pct_profit_tanhv1_nshort,
"pct_profit_tanhv1_oshort": pct_profit_tanhv1_oshort,
"pct_profit_tanhv2": pct_profit_tanhv2,
"pct_profit_tanhv2_nshort": pct_profit_tanhv2_nshort,
"pct_profit_tanhv2_oshort": pct_profit_tanhv2_oshort,
"pct_excluded": (len(pred) - len(pred_f)) / len(pred),
"pct_excluded_nshort": (len(pred) - len(pred_f[pred_f > 0])) / len(pred),
"pct_excluded_oshort": (len(pred) - len(pred_f[pred_f < 0])) / len(pred),
"pct_dir_correct": pct_dir_correct,
"pct_profit_dir_opt": pct_profit_dir_opt,
"avg_pct_profit_always_short": avg_pct_profit_always_short,
"avg_pct_profit_always_buy": avg_pct_profit_always_buy,
}
return metrics
def get_tuned_metrics(args: dotdict, tpd_dict: dict):
# df = read_data(os.path.join(args.root_path, args.data_path))
# Get the percentile to check thresh until
what_percentile = 50
percentile_value = 0.0
for data_group in ["train"]: # tpd_dict:
preds = tpd_dict[data_group]["preds"]
percentile_value += np.percentile(np.abs(preds), what_percentile)
# percentile_value /= len(tpd_dict)
print(f"{what_percentile}'th percentile: {percentile_value}")
# Safety check
ticker, field = args.target.split("_")
assert field == "pctchange" or field == "logpctchange"
# Tune threshhold based off of train's metric we care about
if args.loss == "stock_tanhv1":
tune_metric = "pct_profit_tanhv1"
elif args.loss == "stock_tanhv2":
tune_metric = "pct_profit_tanhv2"
elif args.loss == "stock_tanhv4":
tune_metric = "pct_profit_tanhv1" # this is on purpose
elif args.loss == "stock_tanh":
tune_metric = "pct_profit_tanh"
else:
tune_metric = "pct_profit_dir"
max_tracker = (0, 0)
# Tracks results
tracker = {}
# Try all of the thresholds
for thresh in np.linspace(0, percentile_value, 501):
tracker[thresh] = {}
for data_group in tpd_dict:
true = tpd_dict[data_group]["trues"]
pred = tpd_dict[data_group]["preds"]
date = tpd_dict[data_group]["dates"]
tracker[thresh][data_group] = get_metrics(args, pred, true, thresh)
# Tune threshhold based off of train's metric we care about
tune_value = tracker[thresh][data_group][tune_metric]
if tune_value > max_tracker[0] and data_group == "train":
max_tracker = (tune_value, thresh)
best_thresh = max_tracker[1]
print("zeros thresh:")
for data_group in tracker[best_thresh]:
print(data_group, end="\t")
pprint(tracker[0.0][data_group], indent=3)
print("\n\n")
print("best thresh:", best_thresh)
for data_group in tracker[best_thresh]:
print(data_group, end="\t")
pprint(tracker[best_thresh][data_group], indent=3)
return best_thresh, tracker[best_thresh], tracker[0.0]
|