Spaces:
Sleeping
Sleeping
| """ | |
| ๊ทธ๋ฆฌ๋ ์์น๋ฅผ ํตํ ๋ชจ๋ธ ์ต์ ํ ๋ชจ๋ | |
| """ | |
| import numpy as np | |
| import itertools | |
| import tensorflow as tf | |
| from pathlib import Path | |
| import matplotlib.pyplot as plt | |
| from ..models.contime import build_contime_lstm_model | |
| from ..data.processors import prepare_data | |
| from ..data.normalize import clean_numeric_data | |
| from ..evaluation.backtest import backtest_by_ticker, get_risk_free_rate | |
| from ..evaluation.model_evaluation import evaluate_model, calculate_combined_score | |
| from ..data.hierarchical_embedding import create_sector_industry_mapping, apply_sector_industry_mapping | |
| from .utils import TqdmProgressCallback, save_model, save_results, save_metadata, get_project_root | |
| from ..visualization.plots import plot_training_history, plot_performance_grid, plot_signal_distribution, plot_price_predictions, plot_graph_embeddings | |
| def evaluate_config(config, data_dict, ticker_encoder, risk_free_rate, sector_industry_df=None, selection_method='combined_score'): | |
| """ | |
| ํน์ ์ค์ ์ ๋ํ ๋ชจ๋ธ์ ํ๋ จํ๊ณ ํ๊ฐํฉ๋๋ค. | |
| """ | |
| try: | |
| # ๋ฐ์ดํฐ ์ถ์ถ | |
| x_train = data_dict['x_train'] | |
| y_train = data_dict['y_train'] | |
| ticker_train = data_dict['ticker_train'] | |
| y_train_dt = data_dict['y_train_dt'] | |
| x_val = data_dict['x_val'] | |
| y_val = data_dict['y_val'] | |
| ticker_val = data_dict['ticker_val'] | |
| y_val_dt = data_dict['y_val_dt'] | |
| # ํน์ฑ ๋ฐ์ดํฐ ์ ๋ฆฌ | |
| x_train = clean_numeric_data(x_train, verbose=False) | |
| x_val = clean_numeric_data(x_val, verbose=False) | |
| # ์นํฐ/์ฐ์ ์ ๋ณด ์ถ์ถ | |
| sector_train = np.zeros_like(ticker_train) | |
| industry_train = np.zeros_like(ticker_train) | |
| sector_val = np.zeros_like(ticker_val) | |
| industry_val = np.zeros_like(ticker_val) | |
| if sector_industry_df is not None: | |
| id_to_ticker = {v: k for k, v in ticker_encoder.mapping.items()} | |
| ticker_list = list(id_to_ticker.values()) | |
| sector_mapping, industry_mapping = create_sector_industry_mapping( | |
| ticker_list, sector_industry_df | |
| ) | |
| sector_train, industry_train = apply_sector_industry_mapping( | |
| ticker_train, ticker_encoder, sector_mapping, industry_mapping | |
| ) | |
| sector_val, industry_val = apply_sector_industry_mapping( | |
| ticker_val, ticker_encoder, sector_mapping, industry_mapping | |
| ) | |
| # ๊ธฐ์กด ์นํฐ/์ฐ์ ๋ฐ์ดํฐ ์ฌ์ฉ | |
| if 'sector_train' in data_dict and 'industry_train' in data_dict: | |
| sector_train = data_dict['sector_train'] | |
| industry_train = data_dict['industry_train'] | |
| sector_val = data_dict['sector_val'] | |
| industry_val = data_dict['industry_val'] | |
| # ๋ฐ์ดํฐ ํ์ ๋ณํ | |
| inputs_train = [ | |
| np.asarray(x_train, dtype=np.float32), | |
| np.asarray(ticker_train, dtype=np.int32), | |
| np.asarray(sector_train, dtype=np.int32), | |
| np.asarray(industry_train, dtype=np.int32), | |
| np.asarray(data_dict['time_diffs_train'], dtype=np.float32) | |
| ] | |
| inputs_val = [ | |
| np.asarray(x_val, dtype=np.float32), | |
| np.asarray(ticker_val, dtype=np.int32), | |
| np.asarray(sector_val, dtype=np.int32), | |
| np.asarray(industry_val, dtype=np.int32), | |
| np.asarray(data_dict['time_diffs_val'], dtype=np.float32) | |
| ] | |
| targets_train = { | |
| 'value_output': np.asarray(y_train, dtype=np.float32), | |
| 'derivative_output': np.asarray(y_train_dt, dtype=np.float32) | |
| } | |
| targets_val = { | |
| 'value_output': np.asarray(y_val, dtype=np.float32), | |
| 'derivative_output': np.asarray(y_val_dt, dtype=np.float32) | |
| } | |
| # ๋ชจ๋ธ ์์ฑ | |
| model = build_contime_lstm_model( | |
| seq_len=x_train.shape[1], | |
| num_features=x_train.shape[2], | |
| hidden_dim=config['hidden_dim'], | |
| dropout_rate=config['dropout_rate'], | |
| num_tickers=len(np.unique(ticker_train)), | |
| dt=config['dt'], | |
| ode_steps=config['ode_steps'], | |
| value_weight=config['value_weight'], | |
| derivative_weight=config['derivative_weight'], | |
| num_sectors=len(np.unique(sector_train)), | |
| num_industries=len(np.unique(industry_train)), | |
| ) | |
| # ์ฝ๋ฐฑ ์ ์ | |
| callbacks = [ | |
| tf.keras.callbacks.EarlyStopping( | |
| monitor='val_loss', | |
| patience=config['patience'], | |
| restore_best_weights=True, | |
| verbose=0 | |
| ), | |
| tf.keras.callbacks.ReduceLROnPlateau( | |
| monitor='val_loss', | |
| factor=config['factor'], | |
| patience=config['patience'] // 2, | |
| min_lr=config['min_lr'], | |
| verbose=0 | |
| ), | |
| TqdmProgressCallback(epochs=config['epochs']) | |
| ] | |
| # ํ์ต | |
| history = model.fit( | |
| inputs_train, | |
| targets_train, | |
| validation_data=(inputs_val, targets_val), | |
| epochs=config['epochs'], | |
| batch_size=config['batch_size'], | |
| callbacks=callbacks, | |
| verbose=2 | |
| ) | |
| # ํ๊ฐ | |
| metrics = evaluate_model( | |
| model, x_val, y_val, ticker_val, y_val_dt, | |
| sector_test=sector_val, | |
| industry_test=industry_val, | |
| time_diffs_test=inputs_val[4], | |
| verbose=False | |
| ) | |
| # ์์ธก ์ํ | |
| pred_val = model.predict(inputs_val, verbose=0) | |
| # ์์ธก๊ฐ ์ฒ๋ฆฌ | |
| y_pred_val = pred_val[0] if isinstance(pred_val, list) else pred_val | |
| # ๋ง์ง๋ง ํ์์คํ ์ถ์ถ | |
| if len(y_pred_val.shape) == 3: | |
| y_pred_val = y_pred_val[:, -1, 0] | |
| else: | |
| y_pred_val = y_pred_val.flatten() | |
| # ๋ฐ์ดํฐ ์ ๋ฆฌ | |
| y_pred_val = np.asarray(y_pred_val).flatten() | |
| y_val_flat = np.asarray(y_val).flatten() | |
| ticker_val_flat = np.asarray(ticker_val).flatten() | |
| # ๊ธธ์ด ๋ง์ถ๊ธฐ | |
| min_len = min(len(y_pred_val), len(y_val_flat), len(ticker_val_flat)) | |
| y_pred_val = y_pred_val[:min_len] | |
| y_val_flat = y_val_flat[:min_len] | |
| ticker_val_flat = ticker_val_flat[:min_len] | |
| # ๊ฑฐ๋ ๊ธฐํ ๊ณ์ฐ | |
| num_tickers = len(np.unique(ticker_val_flat)) | |
| trading_days = len(y_val_flat) // num_tickers | |
| total_opportunities = trading_days * num_tickers | |
| min_expected_trades = max(10, int(total_opportunities * 0.05)) | |
| # ์ต์ ์๊ณ๊ฐ ์ฐพ๊ธฐ | |
| use_combined_score = (selection_method == 'combined_score') | |
| best_threshold, best_backtest, all_thresholds = find_optimal_threshold( | |
| y_pred_val, y_val_flat, ticker_val_flat, risk_free_rate, min_expected_trades, use_combined_score | |
| ) | |
| # ๋ฉํธ๋ฆญ ๊ณ์ฐ | |
| if selection_method == 'combined_score': | |
| combined_metric = calculate_combined_score( | |
| best_backtest, | |
| min_trades=50, | |
| max_trades=150 | |
| ) | |
| metrics.update({ | |
| 'combined_score': combined_metric, | |
| 'best_threshold': best_threshold, | |
| 'total_return': best_backtest['portfolio']['total_return'], | |
| 'sharpe_ratio': best_backtest['portfolio']['sharpe_ratio'], | |
| 'max_drawdown': best_backtest['portfolio']['max_drawdown'], | |
| 'trade_count': len(best_backtest['portfolio'].get('trades', [])), | |
| 'win_rate': best_backtest['portfolio'].get('win_rate', 0), | |
| 'avg_ticker_sharpe': best_backtest['avg_ticker_sharpe'] | |
| }) | |
| else: | |
| # ๊ธฐ์กด ๋ฐฉ์ ์ ์ง | |
| metrics.update({ | |
| 'best_threshold': best_threshold, | |
| 'total_return': best_backtest['portfolio']['total_return'], | |
| 'sharpe_ratio': best_backtest['portfolio']['sharpe_ratio'], | |
| 'max_drawdown': best_backtest['portfolio']['max_drawdown'], | |
| 'trade_count': len(best_backtest['portfolio'].get('trades', [])), | |
| 'win_rate': best_backtest['portfolio'].get('win_rate', 0), | |
| 'avg_ticker_sharpe': best_backtest['avg_ticker_sharpe'] | |
| }) | |
| print(f"์๊ณ๊ฐ: {best_threshold:.4f}, ์์ต๋ฅ : {best_backtest['portfolio']['total_return']:.4f}") | |
| print(f"๊ฑฐ๋: {len(best_backtest['portfolio'].get('trades', []))}/{total_opportunities} " | |
| f"({len(best_backtest['portfolio'].get('trades', [])) / total_opportunities:.1%})") | |
| if selection_method == 'combined_score': | |
| print(f"๋ณตํฉ ์ ์: {combined_metric:.4f}") | |
| return { | |
| 'config': config, | |
| 'metrics': metrics, | |
| 'model': model, | |
| 'history': history.history, | |
| 'best_threshold': best_threshold, | |
| 'ticker_metrics': best_backtest['by_ticker'], | |
| 'total_opportunities': total_opportunities, | |
| 'min_expected_trades': min_expected_trades, | |
| 'all_thresholds': all_thresholds | |
| } | |
| except Exception as e: | |
| print(f"๋ชจ๋ธ ํ๊ฐ ์คํจ: {str(e)}") | |
| return None | |
| def find_optimal_threshold(y_pred_val, y_val_flat, ticker_val_flat, risk_free_rate, min_expected_trades, use_combined_score=True): | |
| """ | |
| ์ต์ ์๊ณ๊ฐ์ ์ฐพ๋ ํฌํผ ํจ์ | |
| """ | |
| thresholds = np.arange(0.00, 0.05, 0.001) | |
| best_weighted_score = -np.inf | |
| best_threshold = 0 | |
| best_backtest = None | |
| # ๋ชจ๋ ์๊ณ๊ฐ ๊ฒฐ๊ณผ ์ ์ฅ | |
| all_thresholds = {} | |
| for threshold in thresholds: | |
| result = backtest_by_ticker( | |
| predictions=y_pred_val, | |
| actual_returns=y_val_flat, | |
| ticker_ids=ticker_val_flat, | |
| threshold=threshold, | |
| commission=0.0025, | |
| risk_free_rate=risk_free_rate | |
| ) | |
| # ๋ชจ๋ ์๊ณ๊ฐ ๊ฒฐ๊ณผ ์ ์ฅ | |
| all_thresholds[float(threshold)] = { | |
| 'total_return': result['portfolio']['total_return'], | |
| 'sharpe_ratio': result['portfolio']['sharpe_ratio'], | |
| 'max_drawdown': result['portfolio']['max_drawdown'], | |
| 'trades': result['portfolio'].get('trades', []) | |
| } | |
| trade_count = len(result['portfolio'].get('trades', [])) | |
| if use_combined_score: | |
| min_trades = max(10, min_expected_trades // 2) # ์ต์ ๊ฑฐ๋์ ์กฐ์ | |
| max_trades = min_expected_trades * 2 # ์ต๋ ๊ฑฐ๋์ ์ค์ | |
| weighted_score = calculate_combined_score( | |
| result, | |
| min_trades=min_trades, | |
| max_trades=max_trades | |
| ) | |
| else: | |
| trade_ratio_score = min(1.0, trade_count / min_expected_trades) if trade_count >= (min_expected_trades * 0.5) else (trade_count / min_expected_trades) ** 2 | |
| weighted_score = result['avg_ticker_sharpe'] * trade_ratio_score | |
| if weighted_score > best_weighted_score: | |
| best_weighted_score = weighted_score | |
| best_threshold = threshold | |
| best_backtest = result | |
| return best_threshold, best_backtest, all_thresholds | |
| def run_optimization_pipeline(data_dict, ticker_encoder, metric='combined_score', | |
| output_path=None, save=True, model_output=None, | |
| sector_industry_df=None, run_visualizations=False): | |
| """ | |
| ์ฐ์ ์๊ฐ ๋ชจ๋ธ ์ต์ ํ ํ์ดํ๋ผ์ธ | |
| """ | |
| print("===== ์ฐ์ ์๊ฐ ๋ชจ๋ธ ์ต์ ํ =====") | |
| print(f"์ ํ ๊ธฐ์ค: {metric}") | |
| # ๋ฌด์ํ ์์ต๋ฅ ๊ณ์ฐ | |
| start_date = data_dict.get('start_date') | |
| end_date = data_dict.get('end_date') | |
| risk_free_rate = get_risk_free_rate(start_date, end_date) | |
| print(f"๋ฌด์ํ ์์ต๋ฅ : {risk_free_rate:.6f}") | |
| # ์๊ฐํ ์ ์ฅ ๊ฒฝ๋ก ์ค์ | |
| plots_dir = Path(get_project_root()) / "models" / "plots" | |
| if run_visualizations: | |
| plots_dir.mkdir(parents=True, exist_ok=True) | |
| print(f"์๊ฐํ ๊ฒฐ๊ณผ๋ {plots_dir}์ ์ ์ฅ๋ฉ๋๋ค.") | |
| # ํ๋ผ๋ฏธํฐ ๊ทธ๋ฆฌ๋ | |
| param_grid = { | |
| 'hidden_dim': [128], | |
| 'dropout_rate': [0.3], | |
| 'dt': [0.1], | |
| 'ode_steps': [5], | |
| 'value_weight': [0.8], | |
| 'factor': [0.5], | |
| 'patience': [10], | |
| 'min_lr': [1e-6], | |
| 'epochs': [1], | |
| 'batch_size': [64] | |
| } | |
| # param_grid = { | |
| # 'hidden_dim': [96, 128, 256], | |
| # 'dropout_rate': [0.3], | |
| # 'dt': [0.1], | |
| # 'ode_steps': [5], | |
| # 'value_weight': [0.5, 0.6, 0.7, 0.8, 0.9], | |
| # 'factor': [0.5], | |
| # 'patience': [10], | |
| # 'min_lr': [1e-6], | |
| # 'epochs': [50], | |
| # 'batch_size': [64, 96] | |
| # } | |
| # ๋ฐ์ดํฐ ์ ํจ์ฑ ํ์ธ | |
| required_keys = ['x_train', 'y_train', 'ticker_train', 'time_diffs_train', | |
| 'x_val', 'y_val', 'ticker_val', 'time_diffs_val'] | |
| if not all(key in data_dict for key in required_keys): | |
| print("ํ์ํ ๋ฐ์ดํฐ ํค๊ฐ ์์ต๋๋ค. ๋ฐ์ดํฐ๋ฅผ ์ค๋นํฉ๋๋ค...") | |
| data_dict, _, _ = prepare_data(data_dict.get('data'), window_size=60) | |
| # ๊ทธ๋ฆฌ๋ ์์น ์คํ | |
| param_keys = list(param_grid.keys()) | |
| param_values = list(param_grid.values()) | |
| total_combinations = 1 | |
| for values in param_values: | |
| total_combinations *= len(values) | |
| print(f"๊ทธ๋ฆฌ๋ ์์น ์คํ: ์ด {total_combinations}๊ฐ์ ํ๋ผ๋ฏธํฐ ์กฐํฉ์ ํ ์คํธํฉ๋๋ค.") | |
| # ๊ฒฐ๊ณผ ์ ์ฅ | |
| results = [] | |
| best_score = -float('inf') | |
| best_config = None | |
| iteration_counter = 0 | |
| # ๋ชจ๋ ์กฐํฉ ์์ฑ ๋ฐ ํ ์คํธ | |
| for combination_values in itertools.product(*param_values): | |
| config = dict(zip(param_keys, combination_values)) | |
| config['derivative_weight'] = 1.0 - config['value_weight'] | |
| iteration_counter += 1 | |
| print("\n" + "=" * 60) | |
| print(f"์กฐํฉ {iteration_counter}/{total_combinations}") | |
| print(f"ํ์ฌ ํ๋ผ๋ฏธํฐ:") | |
| for k, v in config.items(): | |
| print(f" {k}: {v}") | |
| result = evaluate_config( | |
| config, data_dict, ticker_encoder, risk_free_rate, | |
| sector_industry_df, selection_method=metric | |
| ) | |
| if result is None: | |
| print(f"์ค์ {iteration_counter}์ ๋ํ ํ๊ฐ ๊ฒฐ๊ณผ๊ฐ None์ ๋๋ค.") | |
| continue | |
| results.append(result) | |
| if metric == 'combined_score' and 'combined_score' in result['metrics']: | |
| metric_value = result['metrics']['combined_score'] | |
| else: | |
| metric_value = result['metrics'].get(metric, 0) | |
| current_return = result['metrics'].get('total_return', 0) | |
| current_sharpe = result['metrics'].get('sharpe_ratio', 0) | |
| current_trades = result['metrics'].get('trade_count', 0) | |
| # ๋ณตํฉ ์ ์๋ ์ถ๋ ฅ | |
| if 'combined_score' in result['metrics']: | |
| combined_score = result['metrics']['combined_score'] | |
| print(f"๊ฒฐ๊ณผ - {metric}: {metric_value:.4f}, ๋ณตํฉ์ ์: {combined_score:.4f}, " | |
| f"์์ต๋ฅ : {current_return:.4f}, ์คํ: {current_sharpe:.4f}, ๊ฑฐ๋: {current_trades}") | |
| else: | |
| print(f"๊ฒฐ๊ณผ - {metric}: {metric_value:.4f}, ์์ต๋ฅ : {current_return:.4f}, " | |
| f"์คํ: {current_sharpe:.4f}, ๊ฑฐ๋: {current_trades}") | |
| if metric_value > best_score: | |
| best_score = metric_value | |
| best_config = config | |
| print(f"์๋ก์ด ์ต๊ณ ์ฑ๋ฅ ๋ฐ๊ฒฌ! {metric}: {best_score:.4f}") | |
| print(f" ์์ต๋ฅ : {current_return:.4f}") | |
| print(f" ์คํ ๋น์จ: {current_sharpe:.4f}") | |
| print(f" ๊ฑฐ๋ ์: {current_trades}") | |
| else: | |
| print(f"ํ์ฌ ์ต๊ณ ์ฑ๋ฅ ({metric}): {best_score:.4f}") | |
| if not results: | |
| print("๋ชจ๋ ์ค์ ์์ ํ๊ฐ๊ฐ ์คํจํ์ต๋๋ค.") | |
| return {'error': '๋ชจ๋ ๋ชจ๋ธ ํ๊ฐ ์คํจ', 'best_config': None, 'results': []} | |
| best_result = max(results, key=lambda x: x['metrics'].get(metric, 0)) | |
| best_config = best_result['config'] | |
| best_threshold = best_result['best_threshold'] | |
| best_model = best_result['model'] | |
| print(f"\n๊ทธ๋ฆฌ๋ ์์น ์๋ฃ!") | |
| print(f" ์ด {len(results)}๊ฐ ๊ฒฐ๊ณผ ์ค ์ต๊ณ ์ฑ๋ฅ:") | |
| print(f" {metric}: {best_result['metrics'].get(metric, 0):.4f}") | |
| print(f" ์์ต๋ฅ : {best_result['metrics'].get('total_return', 0):.4f}") | |
| print(f" ์คํ ๋น์จ: {best_result['metrics'].get('sharpe_ratio', 0):.4f}") | |
| print(f" ๊ฑฐ๋ ์: {best_result['metrics'].get('trade_count', 0)}") | |
| # ์ข ๋ชฉ๋ณ ๋ฉํธ๋ฆญ ์ถ๋ ฅ | |
| ticker_metrics = best_result['ticker_metrics'] | |
| ticker_ids = list(ticker_metrics.keys()) | |
| n_tickers = len(ticker_ids) | |
| # ์ข ๋ชฉ๋ณ ๋ฉํธ๋ฆญ ์ถ์ถ ๋ฐ ํ๊ท ๊ณ์ฐ | |
| avg_return = np.mean([ticker_metrics[tid]['total_return'] for tid in ticker_ids]) | |
| avg_sharpe = np.mean([ticker_metrics[tid]['sharpe_ratio'] for tid in ticker_ids]) | |
| avg_mdd = np.mean([ticker_metrics[tid]['max_drawdown'] for tid in ticker_ids]) | |
| avg_win_rate = np.mean([ticker_metrics[tid].get('win_rate', 0) for tid in ticker_ids]) | |
| # ๊ฑฐ๋ ์ ๊ณ์ฐ | |
| total_trades = sum([len(ticker_metrics[tid].get('trades', [])) for tid in ticker_ids]) | |
| avg_trades = total_trades / n_tickers | |
| print("\n===== ์ต์ ์ค์ =====") | |
| print(best_config) | |
| print(f"์ต์ ์๊ณ๊ฐ: {best_threshold:.4f}") | |
| print(f"\n----- ์ข ๋ชฉ๋ณ ํ๊ท ์ฑ๋ฅ (ํฐ์ปค ์: {n_tickers}) -----") | |
| print(f"ํ๊ท ์ข ๋ชฉ ์์ต๋ฅ : {avg_return:.4f}") | |
| print(f"ํ๊ท ์ข ๋ชฉ ์คํ ๋น์จ: {avg_sharpe:.4f}") | |
| print(f"ํ๊ท ์ข ๋ชฉ ์ต๋ ๋ํญ: {avg_mdd:.4f}") | |
| print(f"ํ๊ท ์ข ๋ชฉ ์น๋ฅ : {avg_win_rate:.2%}") | |
| print(f"ํ๊ท ์ข ๋ชฉ ๊ฑฐ๋ ํ์: {avg_trades:.1f}\n") | |
| # ์๊ฐํ | |
| if run_visualizations: | |
| try: | |
| # 1. ํ์ต ๊ธฐ๋ก ์๊ฐํ | |
| fig1 = plot_training_history(best_result['history']) | |
| if fig1: | |
| fig1.savefig(plots_dir / "training_history.png", dpi=300, bbox_inches='tight') | |
| plt.close(fig1) | |
| print(" - training_history.png ์ ์ฅ ์๋ฃ") | |
| # 2. ์ฑ๋ฅ ๊ทธ๋ฆฌ๋ ์๊ฐํ | |
| fig2 = plot_performance_grid({0.0025: best_result.get('all_thresholds', {})}) | |
| if fig2: | |
| fig2.savefig(plots_dir / "performance_grid.png", dpi=300, bbox_inches='tight') | |
| plt.close(fig2) | |
| print(" - performance_grid.png ์ ์ฅ ์๋ฃ") | |
| # ์์ธก ๋ฐ ์๊ฐํ๋ฅผ ์ํ ๋ฐ์ดํฐ ์ค๋น | |
| x_val_clean = clean_numeric_data(data_dict['x_val'], replace_nan=0.0, replace_inf=0.0, verbose=False) | |
| ticker_val = np.asarray(data_dict['ticker_val'], dtype=np.int32) | |
| # ์นํฐ/์ฐ์ ๋ฐ์ดํฐ์ ์๊ฐ ๊ฐ๊ฒฉ ๋ฐ์ดํฐ ์ฒ๋ฆฌ | |
| sector_val = data_dict.get('sector_val', np.zeros_like(ticker_val)) | |
| industry_val = data_dict.get('industry_val', np.zeros_like(ticker_val)) | |
| time_diffs_val = np.asarray(data_dict['time_diffs_val'], dtype=np.float32) | |
| # ์์ธก ์ํ | |
| pred_val = best_model.predict([ | |
| tf.cast(x_val_clean, tf.float32), | |
| tf.cast(ticker_val, tf.int32), | |
| tf.cast(sector_val, tf.int32), | |
| tf.cast(industry_val, tf.int32), | |
| tf.cast(time_diffs_val, tf.float32) | |
| ], verbose=0) | |
| if isinstance(pred_val, list): | |
| y_pred_val = pred_val[0].flatten() | |
| else: | |
| y_pred_val = pred_val.flatten() | |
| # 3. ์ ํธ ๋ถํฌ ์๊ฐํ | |
| fig3 = plot_signal_distribution(y_pred_val, best_threshold) | |
| if fig3: | |
| fig3.savefig(plots_dir / "signal_distribution.png", dpi=300, bbox_inches='tight') | |
| plt.close(fig3) | |
| print(" - signal_distribution.png ์ ์ฅ ์๋ฃ") | |
| # 4. ๊ฐ๊ฒฉ ์์ธก ์๊ฐํ | |
| fig5 = plot_price_predictions(best_model, data_dict, best_threshold, ticker_encoder) | |
| if fig5: | |
| fig5.savefig(plots_dir / "price_predictions.png", dpi=300, bbox_inches='tight') | |
| plt.close(fig5) | |
| print(" - price_predictions.png ์ ์ฅ ์๋ฃ") | |
| # 5. ๊ทธ๋ํ ์๋ฒ ๋ฉ ์๊ฐํ | |
| sector_industry_df = data_dict.get('sector_industry_df') | |
| # ์นํฐ-์ฐ์ ๋ฐ์ดํฐ๊ฐ ์์ผ๋ฉด ์์ฑ ์๋ | |
| if sector_industry_df is None: | |
| try: | |
| # ํฐ์ปค ์ธ์ฝ๋์์ ํฐ์ปค ๋ชฉ๋ก ์ถ์ถ | |
| if hasattr(ticker_encoder, 'classes_'): | |
| tickers = ticker_encoder.classes_.tolist() | |
| elif hasattr(ticker_encoder, 'mapping'): | |
| tickers = list(ticker_encoder.mapping.keys()) | |
| else: | |
| tickers = None | |
| if tickers: | |
| from ..data.hierarchical_embedding import get_industry_data | |
| sector_industry_df = get_industry_data(tickers) | |
| print(f"์นํฐ-์ฐ์ ๋ฐ์ดํฐ ๋์ ์์ฑ: {len(sector_industry_df) if sector_industry_df is not None else 0}๊ฐ ์ข ๋ชฉ") | |
| except Exception as e: | |
| print(f"์นํฐ-์ฐ์ ๋ฐ์ดํฐ ์์ฑ ์คํจ: {e}") | |
| sector_industry_df = None | |
| if sector_industry_df is not None and len(sector_industry_df) > 0: | |
| try: | |
| # t-SNE์ PCA ์๊ฐํ ์์ฑ | |
| save_path_tsne = plots_dir / 'graph_embedding_tsne.png' | |
| save_path_pca = plots_dir / 'graph_embedding_pca.png' | |
| # ํจ์ ํธ์ถ | |
| plot_graph_embeddings( | |
| sector_industry_df, | |
| save_path_tsne=str(save_path_tsne), | |
| save_path_pca=str(save_path_pca) | |
| ) | |
| print(" - graph_embedding_tsne.png ์ ์ฅ ์๋ฃ") | |
| print(" - graph_embedding_pca.png ์ ์ฅ ์๋ฃ") | |
| except Exception as e: | |
| print(f"๊ทธ๋ํ ์๋ฒ ๋ฉ ์๊ฐํ ์ค๋ฅ: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| else: | |
| print(" - ์นํฐ-์ฐ์ ๋ฐ์ดํฐ๊ฐ ์์ด ๊ทธ๋ํ ์๋ฒ ๋ฉ ์๊ฐํ๋ฅผ ๊ฑด๋๋๋๋ค.") | |
| except Exception as e: | |
| print(f"์๊ฐํ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| print("์๊ฐํ๋ฅผ ๊ฑด๋๋ฐ๊ณ ๊ณ์ ์งํํฉ๋๋ค.") | |
| # ํ ์คํธ ์ธํธ ํ๊ฐ | |
| test_backtest = None | |
| if all(key in data_dict for key in ['x_test', 'y_test', 'ticker_test', 'time_diffs_test']): | |
| print("\n===== ํ ์คํธ ์ธํธ ์ฑ๋ฅ ํ๊ฐ =====") | |
| try: | |
| # ํ ์คํธ ๋ฐ์ดํฐ | |
| x_test = data_dict['x_test'] | |
| y_test = data_dict['y_test'] | |
| ticker_test = data_dict['ticker_test'] | |
| time_diffs_test = data_dict['time_diffs_test'] | |
| # ์นํฐ/์ฐ์ ์ ๋ณด ์ฒ๋ฆฌ | |
| sector_test = data_dict.get('sector_test', np.zeros_like(ticker_test)) | |
| industry_test = data_dict.get('industry_test', np.zeros_like(ticker_test)) | |
| # ๋ฐ์ดํฐ ํ์ ๋ณํ | |
| ticker_test = np.asarray(ticker_test, dtype=np.int32) | |
| sector_test = np.asarray(sector_test, dtype=np.int32) | |
| industry_test = np.asarray(industry_test, dtype=np.int32) | |
| time_diffs_test = np.asarray(time_diffs_test, dtype=np.float32) | |
| # ํ ์คํธ ๋ฐ์ดํฐ ์ ๋ฆฌ | |
| x_test_clean = clean_numeric_data(x_test, replace_nan=0.0, replace_inf=0.0, verbose=False) | |
| # ์์ธก ์ํ | |
| test_preds = best_model.predict([ | |
| tf.cast(x_test_clean, tf.float32), | |
| tf.cast(ticker_test, tf.int32), | |
| tf.cast(sector_test, tf.int32), | |
| tf.cast(industry_test, tf.int32), | |
| tf.cast(time_diffs_test, tf.float32) | |
| ], verbose=0) | |
| if isinstance(test_preds, list): | |
| y_pred_test = test_preds[0].flatten() | |
| else: | |
| y_pred_test = test_preds.flatten() | |
| # ๋ฐฑํ ์คํธ ์คํ | |
| test_backtest = backtest_by_ticker( | |
| predictions=y_pred_test, | |
| actual_returns=y_test.flatten(), | |
| ticker_ids=ticker_test.flatten(), | |
| threshold=best_threshold, | |
| commission=0.0025, | |
| risk_free_rate=risk_free_rate | |
| ) | |
| # ์ข ๋ชฉ๋ณ ๋ฉํธ๋ฆญ ๊ณ์ฐ | |
| ticker_returns = [info['total_return'] for _, info in test_backtest['by_ticker'].items()] | |
| ticker_sharpes = [info['sharpe_ratio'] for _, info in test_backtest['by_ticker'].items()] | |
| avg_ticker_return = np.mean(ticker_returns) | |
| avg_ticker_sharpe = np.mean(ticker_sharpes) | |
| # ํ ์คํธ ๊ฒฐ๊ณผ ์ถ๋ ฅ | |
| print(f"\n----- ํฌํธํด๋ฆฌ์ค ์ฑ๋ฅ -----") | |
| print(f"ํ ์คํธ ์ธํธ ์ด ์์ต๋ฅ : {test_backtest['portfolio']['total_return']:.4f}") | |
| print(f"ํ ์คํธ ์ธํธ ์คํ ๋น์จ: {test_backtest['portfolio']['sharpe_ratio']:.4f}") | |
| print(f"ํ ์คํธ ์ธํธ ์ต๋ ๋ํญ: {test_backtest['portfolio']['max_drawdown']:.4f}") | |
| print(f"ํ ์คํธ ์ธํธ ๊ฑฐ๋ ์: {len(test_backtest['portfolio'].get('trades', []))}") | |
| print(f"\n----- ๊ฐ๋ณ ์ข ๋ชฉ ํ๊ท ์ฑ๋ฅ -----") | |
| print(f"ํ ์คํธ ์ธํธ ํ๊ท ์ข ๋ชฉ ์์ต๋ฅ : {avg_ticker_return:.4f}") | |
| print(f"ํ ์คํธ ์ธํธ ํ๊ท ์ข ๋ชฉ ์คํ ๋น์จ: {avg_ticker_sharpe:.4f}") | |
| except Exception as e: | |
| print(f"ํ ์คํธ ์ธํธ ํ๊ฐ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| # ๊ฒฐ๊ณผ ์ ์ฅ | |
| optimization_results = { | |
| 'best_config': best_config, | |
| 'best_result': best_result, | |
| 'results': results, | |
| 'solver': 'rk4' | |
| } | |
| # ๋ชจ๋ธ ๋ฐ ๊ฒฐ๊ณผ ์ ์ฅ | |
| if save: | |
| # ๋ชจ๋ธ ์ ์ฅ | |
| if best_model: | |
| # ์ธ์ฝ๋ ์ ๋ณด ์ถ์ถ | |
| encoders = None | |
| if ticker_encoder: | |
| if hasattr(ticker_encoder, 'mapping'): | |
| encoders = { | |
| 'ticker_encoder': ticker_encoder.mapping | |
| } | |
| elif hasattr(ticker_encoder, 'classes_'): | |
| encoders = { | |
| 'ticker_encoder': {i: tick for i, tick in enumerate(ticker_encoder.classes_)} | |
| } | |
| # ๊ธฐ๋ณธ ๊ฒฝ๋ก ์ค์ | |
| models_dir = Path(get_project_root()) / "models" | |
| results_dir = models_dir / "results" | |
| results_dir.mkdir(parents=True, exist_ok=True) | |
| if model_output is None: | |
| model_output = results_dir / "best_contime_model.keras" | |
| else: | |
| model_output = Path(model_output) | |
| if not model_output.is_absolute(): | |
| model_output = results_dir / model_output.name | |
| if not str(model_output).endswith('.keras'): | |
| model_output = Path(str(model_output).replace('.h5', '') + '.keras') | |
| # ๋ชจ๋ธ ์ ์ฅ | |
| save_model( | |
| model=best_model, | |
| model_path=model_output, | |
| config=best_config, | |
| encoders=encoders | |
| ) | |
| # ์๊ณ๊ฐ ๋ฐ ์ฑ๋ฅ ์ ๋ณด ์ ์ฅ | |
| threshold_info = { | |
| 'best_threshold': best_threshold, | |
| 'config': best_config, | |
| 'avg_ticker_sharpe': best_result['metrics'].get('avg_ticker_sharpe', 0), | |
| 'portfolio_sharpe': best_result['metrics']['sharpe_ratio'], | |
| 'total_return': best_result['metrics']['total_return'], | |
| 'avg_ticker_return': float(avg_return), | |
| 'avg_ticker_win_rate': float(avg_win_rate), | |
| 'avg_ticker_mdd': float(avg_mdd), | |
| 'trade_count': best_result['metrics']['trade_count'], | |
| 'total_opportunities': best_result['total_opportunities'], | |
| 'trade_ratio': float(best_result['metrics']['trade_count'] / best_result['total_opportunities']), | |
| 'min_expected_trades': best_result['min_expected_trades'] | |
| } | |
| if test_backtest: | |
| threshold_info['test_metrics'] = { | |
| 'total_return': test_backtest['portfolio']['total_return'], | |
| 'sharpe_ratio': test_backtest['portfolio']['sharpe_ratio'], | |
| 'max_drawdown': test_backtest['portfolio']['max_drawdown'], | |
| 'trade_count': len(test_backtest['portfolio'].get('trades', [])) | |
| } | |
| # ๋ฉํ๋ฐ์ดํฐ ์ ์ฅ | |
| meta_path = models_dir / "results" / f"{model_output.stem}_meta.json" | |
| save_metadata(threshold_info, meta_path) | |
| # ๊ฒฐ๊ณผ ์ ์ฅ | |
| if output_path: | |
| output_path = Path(output_path) | |
| if not output_path.is_absolute(): | |
| output_path = Path(get_project_root()) / "models" / "results" / output_path | |
| save_results(optimization_results, output_path) | |
| # ์๊ฐํ ์๋ฃ ๋ฉ์์ง | |
| if run_visualizations: | |
| print(f"\n์๊ฐํ ํ์ผ๋ค์ด {plots_dir}์ ์ ์ฅ๋์์ต๋๋ค:") | |
| saved_plots = list(plots_dir.glob("*.png")) | |
| if saved_plots: | |
| for plot_file in saved_plots: | |
| print(f" - {plot_file.name}") | |
| else: | |
| print(" - ์ ์ฅ๋ ์๊ฐํ ํ์ผ์ด ์์ต๋๋ค.") | |
| return optimization_results |