Spaces:
Sleeping
Sleeping
| """ | |
| μκ°ν ν¨μ λͺ¨λ | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import matplotlib.dates as mdates | |
| import networkx as nx | |
| import traceback | |
| from matplotlib.patches import Patch | |
| from sklearn.manifold import TSNE | |
| from sklearn.decomposition import PCA | |
| from node2vec import Node2Vec | |
| def clean_for_visualization(X): | |
| """ | |
| μκ°νλ₯Ό μν κ°λ¨ν λ°μ΄ν° μ 리 ν¨μ | |
| """ | |
| if X is None: | |
| return X | |
| X = np.asarray(X, dtype=np.float32) | |
| # NaNκ³Ό 무νκ°μ 0μΌλ‘ λ체 | |
| X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) | |
| return X | |
| def plot_training_history(history): | |
| """νμ΅ κ³Όμ μ μμ€κ³Ό νμ΅λ₯ μ μκ°ν""" | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) | |
| history_dict = history.history if hasattr(history, 'history') else history | |
| # μμ€ κ·Έλν | |
| ax1.plot(history_dict['loss'], label='Train Loss') | |
| ax1.plot(history_dict['val_loss'], label='Validation Loss') | |
| ax1.set_title('Model Loss') | |
| ax1.set_xlabel('Epoch') | |
| ax1.set_ylabel('Loss') | |
| ax1.legend() | |
| ax1.grid(True) | |
| # νμ΅λ₯ κ·Έλν | |
| ax2.plot(history_dict['learning_rate'], label='Learning Rate') | |
| ax2.set_title('Learning Rate') | |
| ax2.set_xlabel('Epoch') | |
| ax2.set_ylabel('Learning Rate') | |
| ax2.legend() | |
| ax2.grid(True) | |
| plt.tight_layout() | |
| return fig | |
| def plot_performance_grid(grid_results): | |
| """μκ³κ°λ³ μ±λ₯ μ§νλ₯Ό 그리λλ‘ μκ°ν""" | |
| fig, axes = plt.subplots(2, 2, figsize=(9, 6)) | |
| # 1) Total Return | |
| for comm, res in grid_results.items(): | |
| ths = list(res.keys()) | |
| rets = [res[t]['total_return'] for t in ths] | |
| axes[0, 0].plot(ths, rets, label=f'Comm {comm*100:.2f}%') | |
| axes[0, 0].set_title('Total Return by Threshold') | |
| axes[0, 0].set_xlabel('Threshold') | |
| axes[0, 0].set_ylabel('Total Return') | |
| axes[0, 0].legend() | |
| axes[0, 0].grid(True) | |
| # 2) Sharpe Ratio | |
| for comm, res in grid_results.items(): | |
| ths = list(res.keys()) | |
| sps = [res[t]['sharpe_ratio'] for t in ths] | |
| axes[0, 1].plot(ths, sps, label=f'Comm {comm*100:.2f}%') | |
| axes[0, 1].set_title('Sharpe Ratio by Threshold') | |
| axes[0, 1].set_xlabel('Threshold') | |
| axes[0, 1].set_ylabel('Sharpe Ratio') | |
| axes[0, 1].legend() | |
| axes[0, 1].grid(True) | |
| # 3) Trade Count | |
| for comm, res in grid_results.items(): | |
| ths = list(res.keys()) | |
| tcs = [len(res[t].get('trades', [])) for t in ths] | |
| axes[1, 0].plot(ths, tcs, label=f'Comm {comm*100:.2f}%') | |
| axes[1, 0].set_title('Number of Trades') | |
| axes[1, 0].set_xlabel('Threshold') | |
| axes[1, 0].set_ylabel('Trades') | |
| axes[1, 0].legend() | |
| axes[1, 0].grid(True) | |
| # 4) Max Drawdown | |
| for comm, res in grid_results.items(): | |
| ths = list(res.keys()) | |
| mdds = [res[t]['max_drawdown'] for t in ths] | |
| axes[1, 1].plot(ths, mdds, label=f'Comm {comm*100:.2f}%') | |
| axes[1, 1].set_title('Max Drawdown by Threshold') | |
| axes[1, 1].set_xlabel('Threshold') | |
| axes[1, 1].set_ylabel('Max Drawdown') | |
| axes[1, 1].legend() | |
| axes[1, 1].grid(True) | |
| plt.tight_layout() | |
| return fig | |
| def plot_signal_distribution(y_pred, best_threshold): | |
| """μμΈ‘ μ νΈμ λΆν¬λ₯Ό μκ°ν""" | |
| fig, ax = plt.subplots(figsize=(12, 6)) | |
| # λ§€λ§€ μ νΈ λΆν¬ | |
| buy_signals = y_pred > best_threshold | |
| sell_signals = y_pred < -best_threshold | |
| hold_signals = (y_pred >= -best_threshold) & (y_pred <= best_threshold) | |
| ax.hist([y_pred[buy_signals], y_pred[sell_signals], y_pred[hold_signals]], | |
| bins=50, label=['Buy', 'Sell', 'Hold'], alpha=0.7) | |
| ax.axvline(x=best_threshold, color='r', linestyle='--', label=f'Buy Threshold ({best_threshold:.4f})') | |
| ax.axvline(x=-best_threshold, color='g', linestyle='--', label=f'Sell Threshold (-{best_threshold:.4f})') | |
| ax.set_xlabel('Predicted Returns') | |
| ax.set_ylabel('Frequency') | |
| ax.set_title('Distribution of Trading Signals') | |
| ax.legend() | |
| ax.grid(True) | |
| plt.tight_layout() | |
| return fig | |
| def plot_price_predictions(model, data_dict, best_threshold, ticker_encoder, x_test_clean=None): | |
| """ | |
| λͺ¨λΈμ μμΈ‘ λ³λλ₯ μ κΈ°λ°μΌλ‘ μμΈ‘ μ’ κ°λ₯Ό κ³μ°νκ³ μ€μ μ’ κ°μ ν¨κ» μκ°ν | |
| x_test_clean: μΈλΆμμ μ 리λ ν μ€νΈ λ°μ΄ν° (μ νμ ) | |
| """ | |
| try: | |
| # ν μ€νΈ λ°μ΄ν° μΆμΆ | |
| x_test = data_dict['x_test'] | |
| ticker_test = data_dict['ticker_test'] | |
| data = data_dict['data'] | |
| time_diffs_test = data_dict.get('time_diffs_test') | |
| # λΉ λ°μ΄ν° νμΈ | |
| if len(x_test) == 0: | |
| print("ν μ€νΈ λ°μ΄ν°κ° μμ΅λλ€. κ²μ¦ λ°μ΄ν°λ₯Ό μ¬μ©ν©λλ€.") | |
| x_test = data_dict['x_val'] | |
| ticker_test = data_dict['ticker_val'] | |
| time_diffs_test = data_dict.get('time_diffs_val') | |
| if len(x_test) == 0: | |
| print("μκ°νν λ°μ΄ν°κ° μμ΅λλ€.") | |
| return None | |
| # λ°μ΄ν° μ 리 - μΈλΆμμ μ λ¬λ κ²½μ° μ¬μ©, μλλ©΄ κ°λ¨ν μ λ¦¬λ§ | |
| if x_test_clean is not None: | |
| x_test_processed = x_test_clean | |
| else: | |
| x_test_processed = clean_for_visualization(x_test) | |
| ticker_test_clean = np.asarray(ticker_test, dtype=np.int32) | |
| # μΉν°/μ°μ λ°μ΄ν° μ²λ¦¬ | |
| sector_test = data_dict.get('sector_test') | |
| industry_test = data_dict.get('industry_test') | |
| # μΉν°/μ°μ μ λ³΄κ° μλ κ²½μ° x_testμμ μΆμΆ μλ (νΉμ± 59, 60μ΄ ν΄λΉ) | |
| sector_feature_idx = 59 # 'Technology', 'Financial Services' λ± μΉν° μ 보 | |
| industry_feature_idx = 60 # 'Semiconductors', 'Banks - Diversified' λ± μ°μ μ 보 | |
| if (sector_test is None or industry_test is None) and x_test_processed.shape[2] > max(sector_feature_idx, industry_feature_idx): | |
| sector_test = x_test_processed[:, -1, sector_feature_idx].astype(np.int32) | |
| industry_test = x_test_processed[:, -1, industry_feature_idx].astype(np.int32) | |
| # λλ―Έ λ°μ΄ν° μμ± (νμν κ²½μ°) | |
| if sector_test is None or industry_test is None: | |
| print("μΉν°/μ°μ μ λ³΄κ° μμ΅λλ€. λλ―Έ λ°μ΄ν°λ₯Ό μμ±ν©λλ€.") | |
| sector_test = np.zeros_like(ticker_test_clean) | |
| industry_test = np.zeros_like(ticker_test_clean) | |
| else: | |
| sector_test = np.asarray(sector_test, dtype=np.int32) | |
| industry_test = np.asarray(industry_test, dtype=np.int32) | |
| # μκ° κ°κ²© λ°μ΄ν° | |
| if time_diffs_test is None or len(time_diffs_test) == 0: | |
| print("μκ° κ°κ²© λ°μ΄ν°κ° μμ΅λλ€. λλ―Έ λ°μ΄ν°λ₯Ό μμ±ν©λλ€.") | |
| time_diffs_test = np.ones((x_test_processed.shape[0], x_test_processed.shape[1]), dtype=np.float32) | |
| else: | |
| time_diffs_test = np.asarray(time_diffs_test, dtype=np.float32) | |
| # μμΈ‘ μν (5κ° μ λ ₯ μ 곡) | |
| test_preds = model.predict( | |
| [x_test_processed, ticker_test_clean, sector_test, industry_test, time_diffs_test], | |
| verbose=0 | |
| ) | |
| # κ²°κ³Ό νν μ²λ¦¬ | |
| if isinstance(test_preds, list): | |
| test_pred_values = test_preds[0].flatten() # value_output | |
| else: | |
| test_pred_values = test_preds.flatten() | |
| ticker_test_flat = ticker_test.flatten() | |
| if len(test_pred_values) != len(ticker_test_flat): | |
| # μνμ€ κΈΈμ΄(μΌλ°μ μΌλ‘ 60)λ₯Ό μΆμ | |
| seq_len = x_test_processed.shape[1] # μνμ€ κΈΈμ΄ | |
| # μμΈ‘κ° λ°°μ΄μ΄ (μν * μνμ€ κΈΈμ΄) ννμΈ κ²½μ° | |
| if len(test_pred_values) == len(ticker_test_flat) * seq_len: | |
| # κ° μνμ€μ λ§μ§λ§ μμΈ‘λ§ μ¬μ©νλλ‘ μΆμΆ | |
| test_pred_values = test_pred_values.reshape(-1, seq_len)[:, -1] | |
| else: | |
| # λ€λ₯Έ ννμ λΆμΌμΉμΈ κ²½μ°, κ° μνμ€μ λ§μ§λ§ κ°λ§ μ¬μ© | |
| print(f"μ°¨μ λΆμΌμΉ: μμΈ‘κ°={len(test_pred_values)}, ν°μ»€={len(ticker_test_flat)}") | |
| print("νμμ€ν λΉ μμΈ‘κ°μ μΆμΆνκΈ° μν΄ μνλ§ μν...") | |
| # μμΈ‘κ°μ κΈΈμ΄λ₯Ό ν°μ»€ λ°°μ΄ κΈΈμ΄μ λ§μΆκΈ° μν μνλ§ ν©ν° κ³μ° | |
| factor = int(len(test_pred_values) / len(ticker_test_flat)) | |
| if factor > 1: | |
| # factor κ°κ²©μΌλ‘ μνλ§ | |
| test_pred_values = test_pred_values[factor-1::factor] | |
| print(f"μνλ§ ν μμΈ‘κ° νμ: {test_pred_values.shape}") | |
| # κ³ μ μ’ λͺ© ID μ°ΎκΈ° | |
| unique_tickers = np.unique(ticker_test) | |
| # μ’ λͺ©λ³ μκ°ν | |
| fig, axes = plt.subplots(len(unique_tickers), 1, figsize=(12, len(unique_tickers) * 3)) | |
| if len(unique_tickers) == 1: | |
| axes = [axes] | |
| for i, ticker_id in enumerate(unique_tickers): | |
| # νμ¬ μ’ λͺ©μ ν μ€νΈ λ°μ΄ν° νν°λ§ | |
| ticker_mask = ticker_test_flat == ticker_id | |
| # λ§μ€ν¬μ μμΈ‘κ° μ°¨μ νμΈ | |
| if len(ticker_mask) != len(test_pred_values): | |
| print(f"ν°μ»€ ID {ticker_id}μ λν λ§μ€ν¬ κΈΈμ΄({len(ticker_mask)})μ μμΈ‘κ° κΈΈμ΄({len(test_pred_values)})κ° μΌμΉνμ§ μμ΅λλ€.") | |
| continue | |
| ticker_indices = np.where(ticker_mask)[0] | |
| if len(ticker_indices) < 2: | |
| print(f"ν°μ»€ ID {ticker_id}μ λν μνμ΄ λΆμ‘±ν©λλ€.") | |
| continue | |
| # νμ¬ μ’ λͺ© μ΄λ¦ κ°μ Έμ€κΈ° | |
| ticker_name = ticker_encoder.inverse_transform([int(ticker_id)])[0] | |
| # μ΄ μ’ λͺ©μ λν μμΈ‘κ° | |
| ticker_preds = test_pred_values[ticker_mask] | |
| # μλ³Έ λ°μ΄ν°μμ μ΄ μ’ λͺ©μ μ€μ μ’ κ° κ°μ Έμ€κΈ° | |
| ticker_data = data[data['ticker'] == ticker_name].copy() | |
| ticker_data = ticker_data.sort_index() # λ μ§μ μ λ ¬ | |
| # μ μ ν μΈλ±μ€ λ²μ νμΈ | |
| if len(ticker_data) < len(ticker_preds) + 1: | |
| print(f"ν°μ»€ {ticker_name}μ λ°μ΄ν°κ° μΆ©λΆνμ§ μμ΅λλ€: {len(ticker_data)} < {len(ticker_preds) + 1}") | |
| continue | |
| # μ€μ μ’ κ° λ°μ΄ν° μΆμΆ | |
| actual_prices = ticker_data['Close'].values[-len(ticker_preds)-1:] # ν루 λ μ΄μ λΆν° κ°μ Έμ΄ | |
| dates = ticker_data.index[-len(ticker_preds)-1:] # λ μ§λ ν¨κ» κ°μ Έμ΄ | |
| # μμΈ‘ μ’ κ° κ³μ° (μμΈ‘λ λ‘κ·Έ μμ΅λ₯ μμ μ€μ κ°κ²©μΌλ‘ λ³ν) | |
| predicted_prices = [] | |
| last_price = actual_prices[0] # 첫 λ²μ§Έ μ€μ κ°κ²©μΌλ‘ μμ | |
| for j, pred in enumerate(ticker_preds): | |
| # λ‘κ·Έ μμ΅λ₯ μμ μΌλ° μμ΅λ₯ λ‘ λ³ν: r = exp(log_r) - 1 | |
| predicted_return = np.exp(pred) - 1 | |
| # μμ΅λ₯ μ κ°κ²©μΌλ‘ λ³ν | |
| predicted_price = last_price * (1 + predicted_return) | |
| predicted_prices.append(predicted_price) | |
| # λ€μ μμΈ‘μ μν΄ μ€μ λ§μ§λ§ κ°κ²© μ λ°μ΄νΈ | |
| last_price = actual_prices[j+1] | |
| # μ€μ μ’ κ°μ μμΈ‘ μ’ κ° κ·Έλ¦¬κΈ° | |
| axes[i].plot(dates[1:], actual_prices[1:], 'b-', label='Actual Price', linewidth=2) | |
| axes[i].plot(dates[1:], predicted_prices, 'r--', label='Predicted Price', linewidth=2) | |
| # ν¬μ§μ μν μΆμ λ‘μ§ - λ°±ν μ€νΈμ λμΌν λ°©μμΌλ‘ ꡬν | |
| positions = np.zeros(len(ticker_preds)) | |
| current_position = 0 | |
| # μ€μ λ°±ν μ€νΈ λ‘μ§μ²λΌ ν¬μ§μ μΆμ | |
| for j, pred in enumerate(ticker_preds): | |
| # μ νΈ κ²°μ (λ°±ν μ€νΈ λ‘μ§κ³Ό λμΌ) | |
| new_signal = 1 if pred > best_threshold else (-1 if pred < -best_threshold else 0) | |
| # ν¬μ§μ λ³κ²½μμλ§ μ λ°μ΄νΈ | |
| if new_signal != current_position: | |
| current_position = new_signal | |
| # νμ¬ ν¬μ§μ μ μ₯ | |
| positions[j] = current_position | |
| # ν¬μ§μ νμ λ³λ‘ λ°μ΄ν° λΆλ¦¬ | |
| long_indices = positions == 1 | |
| short_indices = positions == -1 | |
| neutral_indices = positions == 0 | |
| # μμ λ° λ§μ»€ μ€μ | |
| long_color = 'green' | |
| short_color = 'red' | |
| neutral_color = 'gray' | |
| # κ° ν¬μ§μ μνλ³λ‘ λ§μ»€ νμ | |
| if any(long_indices): | |
| axes[i].scatter(dates[1:][long_indices], actual_prices[1:][long_indices], | |
| marker='^', color=long_color, s=100, label='Long Position') | |
| if any(short_indices): | |
| axes[i].scatter(dates[1:][short_indices], actual_prices[1:][short_indices], | |
| marker='v', color=short_color, s=100, label='Short Position') | |
| if any(neutral_indices): | |
| axes[i].scatter(dates[1:][neutral_indices], actual_prices[1:][neutral_indices], | |
| marker='o', color=neutral_color, s=50, label='Neutral Position') | |
| # μμμΌλ‘ λ°°κ²½ νμ (μ νμ ) | |
| for j in range(1, len(positions)): | |
| if positions[j] != positions[j-1] or j == 1: # ν¬μ§μ λ³κ²½ λλ 첫 ν¬μ§μ | |
| start_idx = j | |
| pos_type = positions[j] | |
| # κ°μ ν¬μ§μ μ΄ λλλ μ§μ μ°ΎκΈ° | |
| end_idx = start_idx | |
| while end_idx < len(positions) and positions[end_idx] == pos_type: | |
| end_idx += 1 | |
| # λ°°κ²½μ μ€μ | |
| if pos_type == 1: # λ‘± ν¬μ§μ | |
| axes[i].axvspan(dates[1:][start_idx], dates[1:][min(end_idx, len(dates)-2)], | |
| alpha=0.1, color=long_color) | |
| elif pos_type == -1: # μ ν¬μ§μ | |
| axes[i].axvspan(dates[1:][start_idx], dates[1:][min(end_idx, len(dates)-2)], | |
| alpha=0.1, color=short_color) | |
| # κ·Έλν μ€μ | |
| axes[i].set_title(f'{ticker_name} - Actual vs. Predicted Price with Position Changes') | |
| axes[i].set_xlabel('Date') | |
| axes[i].set_ylabel('Price') | |
| axes[i].legend() | |
| axes[i].grid(True, alpha=0.3) | |
| # λ μ§ νμ μ€μ | |
| axes[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d')) | |
| axes[i].xaxis.set_major_locator(mdates.WeekdayLocator(interval=2)) | |
| plt.tight_layout() | |
| return fig | |
| except Exception as e: | |
| print(f"Price prediction visualization failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None | |
| def plot_graph_embeddings(sector_industry_df, save_path_tsne=None, save_path_pca=None): | |
| """ | |
| μΉν°-μ°μ λ°μ΄ν°λ‘λΆν° κ·Έλν μμ± β μλ² λ© β μκ°νκΉμ§ ν΅ν© μν | |
| """ | |
| # κ·Έλν μμ± | |
| G = nx.Graph() | |
| tickers = sector_industry_df['ticker'].tolist() | |
| # λ Έλ μΆκ° | |
| for ticker in tickers: | |
| sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0] | |
| industry = sector_industry_df[sector_industry_df['ticker'] == ticker]['industry'].iloc[0] | |
| G.add_node(ticker, sector=sector, industry=industry) | |
| # μ£μ§ μΆκ° (μΉν°/μ°μ κΈ°λ°) | |
| for i, ticker1 in enumerate(tickers): | |
| for j, ticker2 in enumerate(tickers[i+1:], i+1): | |
| sector1 = sector_industry_df[sector_industry_df['ticker'] == ticker1]['sector'].iloc[0] | |
| sector2 = sector_industry_df[sector_industry_df['ticker'] == ticker2]['sector'].iloc[0] | |
| industry1 = sector_industry_df[sector_industry_df['ticker'] == ticker1]['industry'].iloc[0] | |
| industry2 = sector_industry_df[sector_industry_df['ticker'] == ticker2]['industry'].iloc[0] | |
| weight = 0 | |
| if sector1 == sector2: | |
| weight += 0.5 | |
| if industry1 == industry2: | |
| weight += 0.3 | |
| if weight > 0.3: | |
| G.add_edge(ticker1, ticker2, weight=weight) | |
| # print(f"κ·Έλν μμ±: {G.number_of_nodes()}κ° λ Έλ, {G.number_of_edges()}κ° μ£μ§") | |
| # μλ² λ© μμ± | |
| try: | |
| node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4) | |
| model = node2vec.fit(window=10, min_count=1, batch_words=4) | |
| embeddings = {} | |
| for node in G.nodes(): | |
| try: | |
| embeddings[node] = model.wv[node] | |
| except KeyError: | |
| embeddings[node] = np.random.normal(0, 0.1, 64) | |
| # print("Node2Vec μλ² λ© μμ± μλ£") | |
| except ImportError: | |
| print("Node2Vec μμ. PCA κΈ°λ° μλ² λ© μ¬μ©") | |
| adj_matrix = nx.adjacency_matrix(G).todense() | |
| pca = PCA(n_components=min(64, adj_matrix.shape[0])) | |
| embeddings_matrix = pca.fit_transform(adj_matrix) | |
| embeddings = {} | |
| for i, node in enumerate(G.nodes()): | |
| embeddings[node] = embeddings_matrix[i] if i < embeddings_matrix.shape[0] else np.random.normal(0, 0.1, 64) | |
| # μκ°ν (t-SNE + PCA) | |
| embedding_matrix = np.array([embeddings[ticker] for ticker in tickers]) | |
| # μΉν°λ³ μμ | |
| sectors = sector_industry_df['sector'].unique() | |
| sector_colors = dict(zip(sectors, plt.cm.Set3(np.linspace(0, 1, len(sectors))))) | |
| # t-SNEμ PCA λμ μκ°ν | |
| fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8)) | |
| # t-SNE | |
| tsne = TSNE(n_components=2, random_state=42, perplexity=min(30, len(tickers)//3)) | |
| coords_tsne = tsne.fit_transform(embedding_matrix) | |
| for i, ticker in enumerate(tickers): | |
| sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0] | |
| color = sector_colors[sector] | |
| ax1.scatter(coords_tsne[i, 0], coords_tsne[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black') | |
| ax1.annotate(ticker, (coords_tsne[i, 0], coords_tsne[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold') | |
| ax1.set_title('Graph Embedding Visualization (t-SNE)', fontsize=14, fontweight='bold') | |
| ax1.grid(True, alpha=0.3) | |
| # PCA | |
| pca = PCA(n_components=2) | |
| coords_pca = pca.fit_transform(embedding_matrix) | |
| for i, ticker in enumerate(tickers): | |
| sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0] | |
| color = sector_colors[sector] | |
| ax2.scatter(coords_pca[i, 0], coords_pca[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black') | |
| ax2.annotate(ticker, (coords_pca[i, 0], coords_pca[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold') | |
| ax2.set_title('Graph Embedding Visualization (PCA)', fontsize=14, fontweight='bold') | |
| ax2.grid(True, alpha=0.3) | |
| # λ²λ‘ (곡ν΅) | |
| legend_elements = [Patch(color=color, label=sector) for sector, color in sector_colors.items()] | |
| ax2.legend(handles=legend_elements, loc='upper right') | |
| plt.tight_layout() | |
| # μ μ₯ | |
| if save_path_tsne and save_path_pca: | |
| # κ°λ³ μ μ₯ | |
| fig1, ax_tsne = plt.subplots(figsize=(12, 10)) | |
| for i, ticker in enumerate(tickers): | |
| sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0] | |
| color = sector_colors[sector] | |
| ax_tsne.scatter(coords_tsne[i, 0], coords_tsne[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black') | |
| ax_tsne.annotate(ticker, (coords_tsne[i, 0], coords_tsne[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold') | |
| ax_tsne.set_title('Graph Embedding Visualization (t-SNE)', fontsize=16, fontweight='bold') | |
| ax_tsne.grid(True, alpha=0.3) | |
| ax_tsne.legend(handles=legend_elements, loc='upper right') | |
| plt.tight_layout() | |
| plt.savefig(save_path_tsne, dpi=300, bbox_inches='tight', facecolor='white') | |
| plt.close(fig1) | |
| fig2, ax_pca = plt.subplots(figsize=(12, 10)) | |
| for i, ticker in enumerate(tickers): | |
| sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0] | |
| color = sector_colors[sector] | |
| ax_pca.scatter(coords_pca[i, 0], coords_pca[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black') | |
| ax_pca.annotate(ticker, (coords_pca[i, 0], coords_pca[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold') | |
| ax_pca.set_title('Graph Embedding Visualization (PCA)', fontsize=16, fontweight='bold') | |
| ax_pca.grid(True, alpha=0.3) | |
| ax_pca.legend(handles=legend_elements, loc='upper right') | |
| plt.tight_layout() | |
| plt.savefig(save_path_pca, dpi=300, bbox_inches='tight', facecolor='white') | |
| plt.close(fig2) | |
| # print(f"t-SNE μ μ₯: {save_path_tsne}") | |
| # print(f"PCA μ μ₯: {save_path_pca}") | |
| plt.close() | |
| return coords_tsne, coords_pca |