ostock-backend / model /src /optimization /grid_search.py
johnaness's picture
Deploy OStock FastAPI backend to HF Space (Docker SDK, port 7860)
4be2d4d
"""
๊ทธ๋ฆฌ๋“œ ์„œ์น˜๋ฅผ ํ†ตํ•œ ๋ชจ๋ธ ์ตœ์ ํ™” ๋ชจ๋“ˆ
"""
import numpy as np
import itertools
import tensorflow as tf
from pathlib import Path
import matplotlib.pyplot as plt
from ..models.contime import build_contime_lstm_model
from ..data.processors import prepare_data
from ..data.normalize import clean_numeric_data
from ..evaluation.backtest import backtest_by_ticker, get_risk_free_rate
from ..evaluation.model_evaluation import evaluate_model, calculate_combined_score
from ..data.hierarchical_embedding import create_sector_industry_mapping, apply_sector_industry_mapping
from .utils import TqdmProgressCallback, save_model, save_results, save_metadata, get_project_root
from ..visualization.plots import plot_training_history, plot_performance_grid, plot_signal_distribution, plot_price_predictions, plot_graph_embeddings
def evaluate_config(config, data_dict, ticker_encoder, risk_free_rate, sector_industry_df=None, selection_method='combined_score'):
"""
ํŠน์ • ์„ค์ •์— ๋Œ€ํ•œ ๋ชจ๋ธ์„ ํ›ˆ๋ จํ•˜๊ณ  ํ‰๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.
"""
try:
# ๋ฐ์ดํ„ฐ ์ถ”์ถœ
x_train = data_dict['x_train']
y_train = data_dict['y_train']
ticker_train = data_dict['ticker_train']
y_train_dt = data_dict['y_train_dt']
x_val = data_dict['x_val']
y_val = data_dict['y_val']
ticker_val = data_dict['ticker_val']
y_val_dt = data_dict['y_val_dt']
# ํŠน์„ฑ ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
x_train = clean_numeric_data(x_train, verbose=False)
x_val = clean_numeric_data(x_val, verbose=False)
# ์„นํ„ฐ/์‚ฐ์—… ์ •๋ณด ์ถ”์ถœ
sector_train = np.zeros_like(ticker_train)
industry_train = np.zeros_like(ticker_train)
sector_val = np.zeros_like(ticker_val)
industry_val = np.zeros_like(ticker_val)
if sector_industry_df is not None:
id_to_ticker = {v: k for k, v in ticker_encoder.mapping.items()}
ticker_list = list(id_to_ticker.values())
sector_mapping, industry_mapping = create_sector_industry_mapping(
ticker_list, sector_industry_df
)
sector_train, industry_train = apply_sector_industry_mapping(
ticker_train, ticker_encoder, sector_mapping, industry_mapping
)
sector_val, industry_val = apply_sector_industry_mapping(
ticker_val, ticker_encoder, sector_mapping, industry_mapping
)
# ๊ธฐ์กด ์„นํ„ฐ/์‚ฐ์—… ๋ฐ์ดํ„ฐ ์‚ฌ์šฉ
if 'sector_train' in data_dict and 'industry_train' in data_dict:
sector_train = data_dict['sector_train']
industry_train = data_dict['industry_train']
sector_val = data_dict['sector_val']
industry_val = data_dict['industry_val']
# ๋ฐ์ดํ„ฐ ํƒ€์ž… ๋ณ€ํ™˜
inputs_train = [
np.asarray(x_train, dtype=np.float32),
np.asarray(ticker_train, dtype=np.int32),
np.asarray(sector_train, dtype=np.int32),
np.asarray(industry_train, dtype=np.int32),
np.asarray(data_dict['time_diffs_train'], dtype=np.float32)
]
inputs_val = [
np.asarray(x_val, dtype=np.float32),
np.asarray(ticker_val, dtype=np.int32),
np.asarray(sector_val, dtype=np.int32),
np.asarray(industry_val, dtype=np.int32),
np.asarray(data_dict['time_diffs_val'], dtype=np.float32)
]
targets_train = {
'value_output': np.asarray(y_train, dtype=np.float32),
'derivative_output': np.asarray(y_train_dt, dtype=np.float32)
}
targets_val = {
'value_output': np.asarray(y_val, dtype=np.float32),
'derivative_output': np.asarray(y_val_dt, dtype=np.float32)
}
# ๋ชจ๋ธ ์ƒ์„ฑ
model = build_contime_lstm_model(
seq_len=x_train.shape[1],
num_features=x_train.shape[2],
hidden_dim=config['hidden_dim'],
dropout_rate=config['dropout_rate'],
num_tickers=len(np.unique(ticker_train)),
dt=config['dt'],
ode_steps=config['ode_steps'],
value_weight=config['value_weight'],
derivative_weight=config['derivative_weight'],
num_sectors=len(np.unique(sector_train)),
num_industries=len(np.unique(industry_train)),
)
# ์ฝœ๋ฐฑ ์ •์˜
callbacks = [
tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=config['patience'],
restore_best_weights=True,
verbose=0
),
tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=config['factor'],
patience=config['patience'] // 2,
min_lr=config['min_lr'],
verbose=0
),
TqdmProgressCallback(epochs=config['epochs'])
]
# ํ•™์Šต
history = model.fit(
inputs_train,
targets_train,
validation_data=(inputs_val, targets_val),
epochs=config['epochs'],
batch_size=config['batch_size'],
callbacks=callbacks,
verbose=2
)
# ํ‰๊ฐ€
metrics = evaluate_model(
model, x_val, y_val, ticker_val, y_val_dt,
sector_test=sector_val,
industry_test=industry_val,
time_diffs_test=inputs_val[4],
verbose=False
)
# ์˜ˆ์ธก ์ˆ˜ํ–‰
pred_val = model.predict(inputs_val, verbose=0)
# ์˜ˆ์ธก๊ฐ’ ์ฒ˜๋ฆฌ
y_pred_val = pred_val[0] if isinstance(pred_val, list) else pred_val
# ๋งˆ์ง€๋ง‰ ํƒ€์ž„์Šคํ… ์ถ”์ถœ
if len(y_pred_val.shape) == 3:
y_pred_val = y_pred_val[:, -1, 0]
else:
y_pred_val = y_pred_val.flatten()
# ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
y_pred_val = np.asarray(y_pred_val).flatten()
y_val_flat = np.asarray(y_val).flatten()
ticker_val_flat = np.asarray(ticker_val).flatten()
# ๊ธธ์ด ๋งž์ถ”๊ธฐ
min_len = min(len(y_pred_val), len(y_val_flat), len(ticker_val_flat))
y_pred_val = y_pred_val[:min_len]
y_val_flat = y_val_flat[:min_len]
ticker_val_flat = ticker_val_flat[:min_len]
# ๊ฑฐ๋ž˜ ๊ธฐํšŒ ๊ณ„์‚ฐ
num_tickers = len(np.unique(ticker_val_flat))
trading_days = len(y_val_flat) // num_tickers
total_opportunities = trading_days * num_tickers
min_expected_trades = max(10, int(total_opportunities * 0.05))
# ์ตœ์  ์ž„๊ณ„๊ฐ’ ์ฐพ๊ธฐ
use_combined_score = (selection_method == 'combined_score')
best_threshold, best_backtest, all_thresholds = find_optimal_threshold(
y_pred_val, y_val_flat, ticker_val_flat, risk_free_rate, min_expected_trades, use_combined_score
)
# ๋ฉ”ํŠธ๋ฆญ ๊ณ„์‚ฐ
if selection_method == 'combined_score':
combined_metric = calculate_combined_score(
best_backtest,
min_trades=50,
max_trades=150
)
metrics.update({
'combined_score': combined_metric,
'best_threshold': best_threshold,
'total_return': best_backtest['portfolio']['total_return'],
'sharpe_ratio': best_backtest['portfolio']['sharpe_ratio'],
'max_drawdown': best_backtest['portfolio']['max_drawdown'],
'trade_count': len(best_backtest['portfolio'].get('trades', [])),
'win_rate': best_backtest['portfolio'].get('win_rate', 0),
'avg_ticker_sharpe': best_backtest['avg_ticker_sharpe']
})
else:
# ๊ธฐ์กด ๋ฐฉ์‹ ์œ ์ง€
metrics.update({
'best_threshold': best_threshold,
'total_return': best_backtest['portfolio']['total_return'],
'sharpe_ratio': best_backtest['portfolio']['sharpe_ratio'],
'max_drawdown': best_backtest['portfolio']['max_drawdown'],
'trade_count': len(best_backtest['portfolio'].get('trades', [])),
'win_rate': best_backtest['portfolio'].get('win_rate', 0),
'avg_ticker_sharpe': best_backtest['avg_ticker_sharpe']
})
print(f"์ž„๊ณ„๊ฐ’: {best_threshold:.4f}, ์ˆ˜์ต๋ฅ : {best_backtest['portfolio']['total_return']:.4f}")
print(f"๊ฑฐ๋ž˜: {len(best_backtest['portfolio'].get('trades', []))}/{total_opportunities} "
f"({len(best_backtest['portfolio'].get('trades', [])) / total_opportunities:.1%})")
if selection_method == 'combined_score':
print(f"๋ณตํ•ฉ ์ ์ˆ˜: {combined_metric:.4f}")
return {
'config': config,
'metrics': metrics,
'model': model,
'history': history.history,
'best_threshold': best_threshold,
'ticker_metrics': best_backtest['by_ticker'],
'total_opportunities': total_opportunities,
'min_expected_trades': min_expected_trades,
'all_thresholds': all_thresholds
}
except Exception as e:
print(f"๋ชจ๋ธ ํ‰๊ฐ€ ์‹คํŒจ: {str(e)}")
return None
def find_optimal_threshold(y_pred_val, y_val_flat, ticker_val_flat, risk_free_rate, min_expected_trades, use_combined_score=True):
"""
์ตœ์  ์ž„๊ณ„๊ฐ’์„ ์ฐพ๋Š” ํ—ฌํผ ํ•จ์ˆ˜
"""
thresholds = np.arange(0.00, 0.05, 0.001)
best_weighted_score = -np.inf
best_threshold = 0
best_backtest = None
# ๋ชจ๋“  ์ž„๊ณ„๊ฐ’ ๊ฒฐ๊ณผ ์ €์žฅ
all_thresholds = {}
for threshold in thresholds:
result = backtest_by_ticker(
predictions=y_pred_val,
actual_returns=y_val_flat,
ticker_ids=ticker_val_flat,
threshold=threshold,
commission=0.0025,
risk_free_rate=risk_free_rate
)
# ๋ชจ๋“  ์ž„๊ณ„๊ฐ’ ๊ฒฐ๊ณผ ์ €์žฅ
all_thresholds[float(threshold)] = {
'total_return': result['portfolio']['total_return'],
'sharpe_ratio': result['portfolio']['sharpe_ratio'],
'max_drawdown': result['portfolio']['max_drawdown'],
'trades': result['portfolio'].get('trades', [])
}
trade_count = len(result['portfolio'].get('trades', []))
if use_combined_score:
min_trades = max(10, min_expected_trades // 2) # ์ตœ์†Œ ๊ฑฐ๋ž˜์ˆ˜ ์กฐ์ •
max_trades = min_expected_trades * 2 # ์ตœ๋Œ€ ๊ฑฐ๋ž˜์ˆ˜ ์„ค์ •
weighted_score = calculate_combined_score(
result,
min_trades=min_trades,
max_trades=max_trades
)
else:
trade_ratio_score = min(1.0, trade_count / min_expected_trades) if trade_count >= (min_expected_trades * 0.5) else (trade_count / min_expected_trades) ** 2
weighted_score = result['avg_ticker_sharpe'] * trade_ratio_score
if weighted_score > best_weighted_score:
best_weighted_score = weighted_score
best_threshold = threshold
best_backtest = result
return best_threshold, best_backtest, all_thresholds
def run_optimization_pipeline(data_dict, ticker_encoder, metric='combined_score',
output_path=None, save=True, model_output=None,
sector_industry_df=None, run_visualizations=False):
"""
์—ฐ์† ์‹œ๊ฐ„ ๋ชจ๋ธ ์ตœ์ ํ™” ํŒŒ์ดํ”„๋ผ์ธ
"""
print("===== ์—ฐ์† ์‹œ๊ฐ„ ๋ชจ๋ธ ์ตœ์ ํ™” =====")
print(f"์„ ํƒ ๊ธฐ์ค€: {metric}")
# ๋ฌด์œ„ํ—˜ ์ˆ˜์ต๋ฅ  ๊ณ„์‚ฐ
start_date = data_dict.get('start_date')
end_date = data_dict.get('end_date')
risk_free_rate = get_risk_free_rate(start_date, end_date)
print(f"๋ฌด์œ„ํ—˜ ์ˆ˜์ต๋ฅ : {risk_free_rate:.6f}")
# ์‹œ๊ฐํ™” ์ €์žฅ ๊ฒฝ๋กœ ์„ค์ •
plots_dir = Path(get_project_root()) / "models" / "plots"
if run_visualizations:
plots_dir.mkdir(parents=True, exist_ok=True)
print(f"์‹œ๊ฐํ™” ๊ฒฐ๊ณผ๋Š” {plots_dir}์— ์ €์žฅ๋ฉ๋‹ˆ๋‹ค.")
# ํŒŒ๋ผ๋ฏธํ„ฐ ๊ทธ๋ฆฌ๋“œ
param_grid = {
'hidden_dim': [128],
'dropout_rate': [0.3],
'dt': [0.1],
'ode_steps': [5],
'value_weight': [0.8],
'factor': [0.5],
'patience': [10],
'min_lr': [1e-6],
'epochs': [1],
'batch_size': [64]
}
# param_grid = {
# 'hidden_dim': [96, 128, 256],
# 'dropout_rate': [0.3],
# 'dt': [0.1],
# 'ode_steps': [5],
# 'value_weight': [0.5, 0.6, 0.7, 0.8, 0.9],
# 'factor': [0.5],
# 'patience': [10],
# 'min_lr': [1e-6],
# 'epochs': [50],
# 'batch_size': [64, 96]
# }
# ๋ฐ์ดํ„ฐ ์œ ํšจ์„ฑ ํ™•์ธ
required_keys = ['x_train', 'y_train', 'ticker_train', 'time_diffs_train',
'x_val', 'y_val', 'ticker_val', 'time_diffs_val']
if not all(key in data_dict for key in required_keys):
print("ํ•„์š”ํ•œ ๋ฐ์ดํ„ฐ ํ‚ค๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ๋ฐ์ดํ„ฐ๋ฅผ ์ค€๋น„ํ•ฉ๋‹ˆ๋‹ค...")
data_dict, _, _ = prepare_data(data_dict.get('data'), window_size=60)
# ๊ทธ๋ฆฌ๋“œ ์„œ์น˜ ์‹คํ–‰
param_keys = list(param_grid.keys())
param_values = list(param_grid.values())
total_combinations = 1
for values in param_values:
total_combinations *= len(values)
print(f"๊ทธ๋ฆฌ๋“œ ์„œ์น˜ ์‹คํ–‰: ์ด {total_combinations}๊ฐœ์˜ ํŒŒ๋ผ๋ฏธํ„ฐ ์กฐํ•ฉ์„ ํ…Œ์ŠคํŠธํ•ฉ๋‹ˆ๋‹ค.")
# ๊ฒฐ๊ณผ ์ €์žฅ
results = []
best_score = -float('inf')
best_config = None
iteration_counter = 0
# ๋ชจ๋“  ์กฐํ•ฉ ์ƒ์„ฑ ๋ฐ ํ…Œ์ŠคํŠธ
for combination_values in itertools.product(*param_values):
config = dict(zip(param_keys, combination_values))
config['derivative_weight'] = 1.0 - config['value_weight']
iteration_counter += 1
print("\n" + "=" * 60)
print(f"์กฐํ•ฉ {iteration_counter}/{total_combinations}")
print(f"ํ˜„์žฌ ํŒŒ๋ผ๋ฏธํ„ฐ:")
for k, v in config.items():
print(f" {k}: {v}")
result = evaluate_config(
config, data_dict, ticker_encoder, risk_free_rate,
sector_industry_df, selection_method=metric
)
if result is None:
print(f"์„ค์ • {iteration_counter}์— ๋Œ€ํ•œ ํ‰๊ฐ€ ๊ฒฐ๊ณผ๊ฐ€ None์ž…๋‹ˆ๋‹ค.")
continue
results.append(result)
if metric == 'combined_score' and 'combined_score' in result['metrics']:
metric_value = result['metrics']['combined_score']
else:
metric_value = result['metrics'].get(metric, 0)
current_return = result['metrics'].get('total_return', 0)
current_sharpe = result['metrics'].get('sharpe_ratio', 0)
current_trades = result['metrics'].get('trade_count', 0)
# ๋ณตํ•ฉ ์ ์ˆ˜๋„ ์ถœ๋ ฅ
if 'combined_score' in result['metrics']:
combined_score = result['metrics']['combined_score']
print(f"๊ฒฐ๊ณผ - {metric}: {metric_value:.4f}, ๋ณตํ•ฉ์ ์ˆ˜: {combined_score:.4f}, "
f"์ˆ˜์ต๋ฅ : {current_return:.4f}, ์ƒคํ”„: {current_sharpe:.4f}, ๊ฑฐ๋ž˜: {current_trades}")
else:
print(f"๊ฒฐ๊ณผ - {metric}: {metric_value:.4f}, ์ˆ˜์ต๋ฅ : {current_return:.4f}, "
f"์ƒคํ”„: {current_sharpe:.4f}, ๊ฑฐ๋ž˜: {current_trades}")
if metric_value > best_score:
best_score = metric_value
best_config = config
print(f"์ƒˆ๋กœ์šด ์ตœ๊ณ  ์„ฑ๋Šฅ ๋ฐœ๊ฒฌ! {metric}: {best_score:.4f}")
print(f" ์ˆ˜์ต๋ฅ : {current_return:.4f}")
print(f" ์ƒคํ”„ ๋น„์œจ: {current_sharpe:.4f}")
print(f" ๊ฑฐ๋ž˜ ์ˆ˜: {current_trades}")
else:
print(f"ํ˜„์žฌ ์ตœ๊ณ  ์„ฑ๋Šฅ ({metric}): {best_score:.4f}")
if not results:
print("๋ชจ๋“  ์„ค์ •์—์„œ ํ‰๊ฐ€๊ฐ€ ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค.")
return {'error': '๋ชจ๋“  ๋ชจ๋ธ ํ‰๊ฐ€ ์‹คํŒจ', 'best_config': None, 'results': []}
best_result = max(results, key=lambda x: x['metrics'].get(metric, 0))
best_config = best_result['config']
best_threshold = best_result['best_threshold']
best_model = best_result['model']
print(f"\n๊ทธ๋ฆฌ๋“œ ์„œ์น˜ ์™„๋ฃŒ!")
print(f" ์ด {len(results)}๊ฐœ ๊ฒฐ๊ณผ ์ค‘ ์ตœ๊ณ  ์„ฑ๋Šฅ:")
print(f" {metric}: {best_result['metrics'].get(metric, 0):.4f}")
print(f" ์ˆ˜์ต๋ฅ : {best_result['metrics'].get('total_return', 0):.4f}")
print(f" ์ƒคํ”„ ๋น„์œจ: {best_result['metrics'].get('sharpe_ratio', 0):.4f}")
print(f" ๊ฑฐ๋ž˜ ์ˆ˜: {best_result['metrics'].get('trade_count', 0)}")
# ์ข…๋ชฉ๋ณ„ ๋ฉ”ํŠธ๋ฆญ ์ถœ๋ ฅ
ticker_metrics = best_result['ticker_metrics']
ticker_ids = list(ticker_metrics.keys())
n_tickers = len(ticker_ids)
# ์ข…๋ชฉ๋ณ„ ๋ฉ”ํŠธ๋ฆญ ์ถ”์ถœ ๋ฐ ํ‰๊ท  ๊ณ„์‚ฐ
avg_return = np.mean([ticker_metrics[tid]['total_return'] for tid in ticker_ids])
avg_sharpe = np.mean([ticker_metrics[tid]['sharpe_ratio'] for tid in ticker_ids])
avg_mdd = np.mean([ticker_metrics[tid]['max_drawdown'] for tid in ticker_ids])
avg_win_rate = np.mean([ticker_metrics[tid].get('win_rate', 0) for tid in ticker_ids])
# ๊ฑฐ๋ž˜ ์ˆ˜ ๊ณ„์‚ฐ
total_trades = sum([len(ticker_metrics[tid].get('trades', [])) for tid in ticker_ids])
avg_trades = total_trades / n_tickers
print("\n===== ์ตœ์  ์„ค์ • =====")
print(best_config)
print(f"์ตœ์  ์ž„๊ณ„๊ฐ’: {best_threshold:.4f}")
print(f"\n----- ์ข…๋ชฉ๋ณ„ ํ‰๊ท  ์„ฑ๋Šฅ (ํ‹ฐ์ปค ์ˆ˜: {n_tickers}) -----")
print(f"ํ‰๊ท  ์ข…๋ชฉ ์ˆ˜์ต๋ฅ : {avg_return:.4f}")
print(f"ํ‰๊ท  ์ข…๋ชฉ ์ƒคํ”„ ๋น„์œจ: {avg_sharpe:.4f}")
print(f"ํ‰๊ท  ์ข…๋ชฉ ์ตœ๋Œ€ ๋‚™ํญ: {avg_mdd:.4f}")
print(f"ํ‰๊ท  ์ข…๋ชฉ ์Šน๋ฅ : {avg_win_rate:.2%}")
print(f"ํ‰๊ท  ์ข…๋ชฉ ๊ฑฐ๋ž˜ ํšŸ์ˆ˜: {avg_trades:.1f}\n")
# ์‹œ๊ฐํ™”
if run_visualizations:
try:
# 1. ํ•™์Šต ๊ธฐ๋ก ์‹œ๊ฐํ™”
fig1 = plot_training_history(best_result['history'])
if fig1:
fig1.savefig(plots_dir / "training_history.png", dpi=300, bbox_inches='tight')
plt.close(fig1)
print(" - training_history.png ์ €์žฅ ์™„๋ฃŒ")
# 2. ์„ฑ๋Šฅ ๊ทธ๋ฆฌ๋“œ ์‹œ๊ฐํ™”
fig2 = plot_performance_grid({0.0025: best_result.get('all_thresholds', {})})
if fig2:
fig2.savefig(plots_dir / "performance_grid.png", dpi=300, bbox_inches='tight')
plt.close(fig2)
print(" - performance_grid.png ์ €์žฅ ์™„๋ฃŒ")
# ์˜ˆ์ธก ๋ฐ ์‹œ๊ฐํ™”๋ฅผ ์œ„ํ•œ ๋ฐ์ดํ„ฐ ์ค€๋น„
x_val_clean = clean_numeric_data(data_dict['x_val'], replace_nan=0.0, replace_inf=0.0, verbose=False)
ticker_val = np.asarray(data_dict['ticker_val'], dtype=np.int32)
# ์„นํ„ฐ/์‚ฐ์—… ๋ฐ์ดํ„ฐ์™€ ์‹œ๊ฐ„ ๊ฐ„๊ฒฉ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ
sector_val = data_dict.get('sector_val', np.zeros_like(ticker_val))
industry_val = data_dict.get('industry_val', np.zeros_like(ticker_val))
time_diffs_val = np.asarray(data_dict['time_diffs_val'], dtype=np.float32)
# ์˜ˆ์ธก ์ˆ˜ํ–‰
pred_val = best_model.predict([
tf.cast(x_val_clean, tf.float32),
tf.cast(ticker_val, tf.int32),
tf.cast(sector_val, tf.int32),
tf.cast(industry_val, tf.int32),
tf.cast(time_diffs_val, tf.float32)
], verbose=0)
if isinstance(pred_val, list):
y_pred_val = pred_val[0].flatten()
else:
y_pred_val = pred_val.flatten()
# 3. ์‹ ํ˜ธ ๋ถ„ํฌ ์‹œ๊ฐํ™”
fig3 = plot_signal_distribution(y_pred_val, best_threshold)
if fig3:
fig3.savefig(plots_dir / "signal_distribution.png", dpi=300, bbox_inches='tight')
plt.close(fig3)
print(" - signal_distribution.png ์ €์žฅ ์™„๋ฃŒ")
# 4. ๊ฐ€๊ฒฉ ์˜ˆ์ธก ์‹œ๊ฐํ™”
fig5 = plot_price_predictions(best_model, data_dict, best_threshold, ticker_encoder)
if fig5:
fig5.savefig(plots_dir / "price_predictions.png", dpi=300, bbox_inches='tight')
plt.close(fig5)
print(" - price_predictions.png ์ €์žฅ ์™„๋ฃŒ")
# 5. ๊ทธ๋ž˜ํ”„ ์ž„๋ฒ ๋”ฉ ์‹œ๊ฐํ™”
sector_industry_df = data_dict.get('sector_industry_df')
# ์„นํ„ฐ-์‚ฐ์—… ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์œผ๋ฉด ์ƒ์„ฑ ์‹œ๋„
if sector_industry_df is None:
try:
# ํ‹ฐ์ปค ์ธ์ฝ”๋”์—์„œ ํ‹ฐ์ปค ๋ชฉ๋ก ์ถ”์ถœ
if hasattr(ticker_encoder, 'classes_'):
tickers = ticker_encoder.classes_.tolist()
elif hasattr(ticker_encoder, 'mapping'):
tickers = list(ticker_encoder.mapping.keys())
else:
tickers = None
if tickers:
from ..data.hierarchical_embedding import get_industry_data
sector_industry_df = get_industry_data(tickers)
print(f"์„นํ„ฐ-์‚ฐ์—… ๋ฐ์ดํ„ฐ ๋™์  ์ƒ์„ฑ: {len(sector_industry_df) if sector_industry_df is not None else 0}๊ฐœ ์ข…๋ชฉ")
except Exception as e:
print(f"์„นํ„ฐ-์‚ฐ์—… ๋ฐ์ดํ„ฐ ์ƒ์„ฑ ์‹คํŒจ: {e}")
sector_industry_df = None
if sector_industry_df is not None and len(sector_industry_df) > 0:
try:
# t-SNE์™€ PCA ์‹œ๊ฐํ™” ์ƒ์„ฑ
save_path_tsne = plots_dir / 'graph_embedding_tsne.png'
save_path_pca = plots_dir / 'graph_embedding_pca.png'
# ํ•จ์ˆ˜ ํ˜ธ์ถœ
plot_graph_embeddings(
sector_industry_df,
save_path_tsne=str(save_path_tsne),
save_path_pca=str(save_path_pca)
)
print(" - graph_embedding_tsne.png ์ €์žฅ ์™„๋ฃŒ")
print(" - graph_embedding_pca.png ์ €์žฅ ์™„๋ฃŒ")
except Exception as e:
print(f"๊ทธ๋ž˜ํ”„ ์ž„๋ฒ ๋”ฉ ์‹œ๊ฐํ™” ์˜ค๋ฅ˜: {e}")
import traceback
traceback.print_exc()
else:
print(" - ์„นํ„ฐ-์‚ฐ์—… ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์–ด ๊ทธ๋ž˜ํ”„ ์ž„๋ฒ ๋”ฉ ์‹œ๊ฐํ™”๋ฅผ ๊ฑด๋„ˆ๋œ๋‹ˆ๋‹ค.")
except Exception as e:
print(f"์‹œ๊ฐํ™” ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
import traceback
traceback.print_exc()
print("์‹œ๊ฐํ™”๋ฅผ ๊ฑด๋„ˆ๋›ฐ๊ณ  ๊ณ„์† ์ง„ํ–‰ํ•ฉ๋‹ˆ๋‹ค.")
# ํ…Œ์ŠคํŠธ ์„ธํŠธ ํ‰๊ฐ€
test_backtest = None
if all(key in data_dict for key in ['x_test', 'y_test', 'ticker_test', 'time_diffs_test']):
print("\n===== ํ…Œ์ŠคํŠธ ์„ธํŠธ ์„ฑ๋Šฅ ํ‰๊ฐ€ =====")
try:
# ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ
x_test = data_dict['x_test']
y_test = data_dict['y_test']
ticker_test = data_dict['ticker_test']
time_diffs_test = data_dict['time_diffs_test']
# ์„นํ„ฐ/์‚ฐ์—… ์ •๋ณด ์ฒ˜๋ฆฌ
sector_test = data_dict.get('sector_test', np.zeros_like(ticker_test))
industry_test = data_dict.get('industry_test', np.zeros_like(ticker_test))
# ๋ฐ์ดํ„ฐ ํƒ€์ž… ๋ณ€ํ™˜
ticker_test = np.asarray(ticker_test, dtype=np.int32)
sector_test = np.asarray(sector_test, dtype=np.int32)
industry_test = np.asarray(industry_test, dtype=np.int32)
time_diffs_test = np.asarray(time_diffs_test, dtype=np.float32)
# ํ…Œ์ŠคํŠธ ๋ฐ์ดํ„ฐ ์ •๋ฆฌ
x_test_clean = clean_numeric_data(x_test, replace_nan=0.0, replace_inf=0.0, verbose=False)
# ์˜ˆ์ธก ์ˆ˜ํ–‰
test_preds = best_model.predict([
tf.cast(x_test_clean, tf.float32),
tf.cast(ticker_test, tf.int32),
tf.cast(sector_test, tf.int32),
tf.cast(industry_test, tf.int32),
tf.cast(time_diffs_test, tf.float32)
], verbose=0)
if isinstance(test_preds, list):
y_pred_test = test_preds[0].flatten()
else:
y_pred_test = test_preds.flatten()
# ๋ฐฑํ…Œ์ŠคํŠธ ์‹คํ–‰
test_backtest = backtest_by_ticker(
predictions=y_pred_test,
actual_returns=y_test.flatten(),
ticker_ids=ticker_test.flatten(),
threshold=best_threshold,
commission=0.0025,
risk_free_rate=risk_free_rate
)
# ์ข…๋ชฉ๋ณ„ ๋ฉ”ํŠธ๋ฆญ ๊ณ„์‚ฐ
ticker_returns = [info['total_return'] for _, info in test_backtest['by_ticker'].items()]
ticker_sharpes = [info['sharpe_ratio'] for _, info in test_backtest['by_ticker'].items()]
avg_ticker_return = np.mean(ticker_returns)
avg_ticker_sharpe = np.mean(ticker_sharpes)
# ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
print(f"\n----- ํฌํŠธํด๋ฆฌ์˜ค ์„ฑ๋Šฅ -----")
print(f"ํ…Œ์ŠคํŠธ ์„ธํŠธ ์ด ์ˆ˜์ต๋ฅ : {test_backtest['portfolio']['total_return']:.4f}")
print(f"ํ…Œ์ŠคํŠธ ์„ธํŠธ ์ƒคํ”„ ๋น„์œจ: {test_backtest['portfolio']['sharpe_ratio']:.4f}")
print(f"ํ…Œ์ŠคํŠธ ์„ธํŠธ ์ตœ๋Œ€ ๋‚™ํญ: {test_backtest['portfolio']['max_drawdown']:.4f}")
print(f"ํ…Œ์ŠคํŠธ ์„ธํŠธ ๊ฑฐ๋ž˜ ์ˆ˜: {len(test_backtest['portfolio'].get('trades', []))}")
print(f"\n----- ๊ฐœ๋ณ„ ์ข…๋ชฉ ํ‰๊ท  ์„ฑ๋Šฅ -----")
print(f"ํ…Œ์ŠคํŠธ ์„ธํŠธ ํ‰๊ท  ์ข…๋ชฉ ์ˆ˜์ต๋ฅ : {avg_ticker_return:.4f}")
print(f"ํ…Œ์ŠคํŠธ ์„ธํŠธ ํ‰๊ท  ์ข…๋ชฉ ์ƒคํ”„ ๋น„์œจ: {avg_ticker_sharpe:.4f}")
except Exception as e:
print(f"ํ…Œ์ŠคํŠธ ์„ธํŠธ ํ‰๊ฐ€ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
import traceback
traceback.print_exc()
# ๊ฒฐ๊ณผ ์ €์žฅ
optimization_results = {
'best_config': best_config,
'best_result': best_result,
'results': results,
'solver': 'rk4'
}
# ๋ชจ๋ธ ๋ฐ ๊ฒฐ๊ณผ ์ €์žฅ
if save:
# ๋ชจ๋ธ ์ €์žฅ
if best_model:
# ์ธ์ฝ”๋” ์ •๋ณด ์ถ”์ถœ
encoders = None
if ticker_encoder:
if hasattr(ticker_encoder, 'mapping'):
encoders = {
'ticker_encoder': ticker_encoder.mapping
}
elif hasattr(ticker_encoder, 'classes_'):
encoders = {
'ticker_encoder': {i: tick for i, tick in enumerate(ticker_encoder.classes_)}
}
# ๊ธฐ๋ณธ ๊ฒฝ๋กœ ์„ค์ •
models_dir = Path(get_project_root()) / "models"
results_dir = models_dir / "results"
results_dir.mkdir(parents=True, exist_ok=True)
if model_output is None:
model_output = results_dir / "best_contime_model.keras"
else:
model_output = Path(model_output)
if not model_output.is_absolute():
model_output = results_dir / model_output.name
if not str(model_output).endswith('.keras'):
model_output = Path(str(model_output).replace('.h5', '') + '.keras')
# ๋ชจ๋ธ ์ €์žฅ
save_model(
model=best_model,
model_path=model_output,
config=best_config,
encoders=encoders
)
# ์ž„๊ณ„๊ฐ’ ๋ฐ ์„ฑ๋Šฅ ์ •๋ณด ์ €์žฅ
threshold_info = {
'best_threshold': best_threshold,
'config': best_config,
'avg_ticker_sharpe': best_result['metrics'].get('avg_ticker_sharpe', 0),
'portfolio_sharpe': best_result['metrics']['sharpe_ratio'],
'total_return': best_result['metrics']['total_return'],
'avg_ticker_return': float(avg_return),
'avg_ticker_win_rate': float(avg_win_rate),
'avg_ticker_mdd': float(avg_mdd),
'trade_count': best_result['metrics']['trade_count'],
'total_opportunities': best_result['total_opportunities'],
'trade_ratio': float(best_result['metrics']['trade_count'] / best_result['total_opportunities']),
'min_expected_trades': best_result['min_expected_trades']
}
if test_backtest:
threshold_info['test_metrics'] = {
'total_return': test_backtest['portfolio']['total_return'],
'sharpe_ratio': test_backtest['portfolio']['sharpe_ratio'],
'max_drawdown': test_backtest['portfolio']['max_drawdown'],
'trade_count': len(test_backtest['portfolio'].get('trades', []))
}
# ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์ €์žฅ
meta_path = models_dir / "results" / f"{model_output.stem}_meta.json"
save_metadata(threshold_info, meta_path)
# ๊ฒฐ๊ณผ ์ €์žฅ
if output_path:
output_path = Path(output_path)
if not output_path.is_absolute():
output_path = Path(get_project_root()) / "models" / "results" / output_path
save_results(optimization_results, output_path)
# ์‹œ๊ฐํ™” ์™„๋ฃŒ ๋ฉ”์‹œ์ง€
if run_visualizations:
print(f"\n์‹œ๊ฐํ™” ํŒŒ์ผ๋“ค์ด {plots_dir}์— ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค:")
saved_plots = list(plots_dir.glob("*.png"))
if saved_plots:
for plot_file in saved_plots:
print(f" - {plot_file.name}")
else:
print(" - ์ €์žฅ๋œ ์‹œ๊ฐํ™” ํŒŒ์ผ์ด ์—†์Šต๋‹ˆ๋‹ค.")
return optimization_results