johnaness's picture
Deploy OStock FastAPI backend to HF Space (Docker SDK, port 7860)
4be2d4d
"""
μ‹œκ°ν™” ν•¨μˆ˜ λͺ¨λ“ˆ
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import networkx as nx
import traceback
from matplotlib.patches import Patch
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from node2vec import Node2Vec
def clean_for_visualization(X):
"""
μ‹œκ°ν™”λ₯Ό μœ„ν•œ κ°„λ‹¨ν•œ 데이터 정리 ν•¨μˆ˜
"""
if X is None:
return X
X = np.asarray(X, dtype=np.float32)
# NaNκ³Ό λ¬΄ν•œκ°’μ„ 0으둜 λŒ€μ²΄
X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)
return X
def plot_training_history(history):
"""ν•™μŠ΅ κ³Όμ •μ˜ 손싀과 ν•™μŠ΅λ₯ μ„ μ‹œκ°ν™”"""
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
history_dict = history.history if hasattr(history, 'history') else history
# 손싀 κ·Έλž˜ν”„
ax1.plot(history_dict['loss'], label='Train Loss')
ax1.plot(history_dict['val_loss'], label='Validation Loss')
ax1.set_title('Model Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)
# ν•™μŠ΅λ₯  κ·Έλž˜ν”„
ax2.plot(history_dict['learning_rate'], label='Learning Rate')
ax2.set_title('Learning Rate')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Learning Rate')
ax2.legend()
ax2.grid(True)
plt.tight_layout()
return fig
def plot_performance_grid(grid_results):
"""μž„κ³„κ°’λ³„ μ„±λŠ₯ μ§€ν‘œλ₯Ό κ·Έλ¦¬λ“œλ‘œ μ‹œκ°ν™”"""
fig, axes = plt.subplots(2, 2, figsize=(9, 6))
# 1) Total Return
for comm, res in grid_results.items():
ths = list(res.keys())
rets = [res[t]['total_return'] for t in ths]
axes[0, 0].plot(ths, rets, label=f'Comm {comm*100:.2f}%')
axes[0, 0].set_title('Total Return by Threshold')
axes[0, 0].set_xlabel('Threshold')
axes[0, 0].set_ylabel('Total Return')
axes[0, 0].legend()
axes[0, 0].grid(True)
# 2) Sharpe Ratio
for comm, res in grid_results.items():
ths = list(res.keys())
sps = [res[t]['sharpe_ratio'] for t in ths]
axes[0, 1].plot(ths, sps, label=f'Comm {comm*100:.2f}%')
axes[0, 1].set_title('Sharpe Ratio by Threshold')
axes[0, 1].set_xlabel('Threshold')
axes[0, 1].set_ylabel('Sharpe Ratio')
axes[0, 1].legend()
axes[0, 1].grid(True)
# 3) Trade Count
for comm, res in grid_results.items():
ths = list(res.keys())
tcs = [len(res[t].get('trades', [])) for t in ths]
axes[1, 0].plot(ths, tcs, label=f'Comm {comm*100:.2f}%')
axes[1, 0].set_title('Number of Trades')
axes[1, 0].set_xlabel('Threshold')
axes[1, 0].set_ylabel('Trades')
axes[1, 0].legend()
axes[1, 0].grid(True)
# 4) Max Drawdown
for comm, res in grid_results.items():
ths = list(res.keys())
mdds = [res[t]['max_drawdown'] for t in ths]
axes[1, 1].plot(ths, mdds, label=f'Comm {comm*100:.2f}%')
axes[1, 1].set_title('Max Drawdown by Threshold')
axes[1, 1].set_xlabel('Threshold')
axes[1, 1].set_ylabel('Max Drawdown')
axes[1, 1].legend()
axes[1, 1].grid(True)
plt.tight_layout()
return fig
def plot_signal_distribution(y_pred, best_threshold):
"""예츑 μ‹ ν˜Έμ˜ 뢄포λ₯Ό μ‹œκ°ν™”"""
fig, ax = plt.subplots(figsize=(12, 6))
# λ§€λ§€ μ‹ ν˜Έ 뢄포
buy_signals = y_pred > best_threshold
sell_signals = y_pred < -best_threshold
hold_signals = (y_pred >= -best_threshold) & (y_pred <= best_threshold)
ax.hist([y_pred[buy_signals], y_pred[sell_signals], y_pred[hold_signals]],
bins=50, label=['Buy', 'Sell', 'Hold'], alpha=0.7)
ax.axvline(x=best_threshold, color='r', linestyle='--', label=f'Buy Threshold ({best_threshold:.4f})')
ax.axvline(x=-best_threshold, color='g', linestyle='--', label=f'Sell Threshold (-{best_threshold:.4f})')
ax.set_xlabel('Predicted Returns')
ax.set_ylabel('Frequency')
ax.set_title('Distribution of Trading Signals')
ax.legend()
ax.grid(True)
plt.tight_layout()
return fig
def plot_price_predictions(model, data_dict, best_threshold, ticker_encoder, x_test_clean=None):
"""
λͺ¨λΈμ˜ 예츑 변동λ₯ μ„ 기반으둜 예츑 μ’…κ°€λ₯Ό κ³„μ‚°ν•˜κ³  μ‹€μ œ 쒅가와 ν•¨κ»˜ μ‹œκ°ν™”
x_test_clean: μ™ΈλΆ€μ—μ„œ μ •λ¦¬λœ ν…ŒμŠ€νŠΈ 데이터 (선택적)
"""
try:
# ν…ŒμŠ€νŠΈ 데이터 μΆ”μΆœ
x_test = data_dict['x_test']
ticker_test = data_dict['ticker_test']
data = data_dict['data']
time_diffs_test = data_dict.get('time_diffs_test')
# 빈 데이터 확인
if len(x_test) == 0:
print("ν…ŒμŠ€νŠΈ 데이터가 μ—†μŠ΅λ‹ˆλ‹€. 검증 데이터λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€.")
x_test = data_dict['x_val']
ticker_test = data_dict['ticker_val']
time_diffs_test = data_dict.get('time_diffs_val')
if len(x_test) == 0:
print("μ‹œκ°ν™”ν•  데이터가 μ—†μŠ΅λ‹ˆλ‹€.")
return None
# 데이터 정리 - μ™ΈλΆ€μ—μ„œ μ „λ‹¬λœ 경우 μ‚¬μš©, μ•„λ‹ˆλ©΄ κ°„λ‹¨ν•œ μ •λ¦¬λ§Œ
if x_test_clean is not None:
x_test_processed = x_test_clean
else:
x_test_processed = clean_for_visualization(x_test)
ticker_test_clean = np.asarray(ticker_test, dtype=np.int32)
# μ„Ήν„°/μ‚°μ—… 데이터 처리
sector_test = data_dict.get('sector_test')
industry_test = data_dict.get('industry_test')
# μ„Ήν„°/μ‚°μ—… 정보가 μ—†λŠ” 경우 x_testμ—μ„œ μΆ”μΆœ μ‹œλ„ (νŠΉμ„± 59, 60이 ν•΄λ‹Ή)
sector_feature_idx = 59 # 'Technology', 'Financial Services' λ“± μ„Ήν„° 정보
industry_feature_idx = 60 # 'Semiconductors', 'Banks - Diversified' λ“± μ‚°μ—… 정보
if (sector_test is None or industry_test is None) and x_test_processed.shape[2] > max(sector_feature_idx, industry_feature_idx):
sector_test = x_test_processed[:, -1, sector_feature_idx].astype(np.int32)
industry_test = x_test_processed[:, -1, industry_feature_idx].astype(np.int32)
# 더미 데이터 생성 (ν•„μš”ν•œ 경우)
if sector_test is None or industry_test is None:
print("μ„Ήν„°/μ‚°μ—… 정보가 μ—†μŠ΅λ‹ˆλ‹€. 더미 데이터λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.")
sector_test = np.zeros_like(ticker_test_clean)
industry_test = np.zeros_like(ticker_test_clean)
else:
sector_test = np.asarray(sector_test, dtype=np.int32)
industry_test = np.asarray(industry_test, dtype=np.int32)
# μ‹œκ°„ 간격 데이터
if time_diffs_test is None or len(time_diffs_test) == 0:
print("μ‹œκ°„ 간격 데이터가 μ—†μŠ΅λ‹ˆλ‹€. 더미 데이터λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.")
time_diffs_test = np.ones((x_test_processed.shape[0], x_test_processed.shape[1]), dtype=np.float32)
else:
time_diffs_test = np.asarray(time_diffs_test, dtype=np.float32)
# 예츑 μˆ˜ν–‰ (5개 μž…λ ₯ 제곡)
test_preds = model.predict(
[x_test_processed, ticker_test_clean, sector_test, industry_test, time_diffs_test],
verbose=0
)
# κ²°κ³Ό ν˜•νƒœ 처리
if isinstance(test_preds, list):
test_pred_values = test_preds[0].flatten() # value_output
else:
test_pred_values = test_preds.flatten()
ticker_test_flat = ticker_test.flatten()
if len(test_pred_values) != len(ticker_test_flat):
# μ‹œν€€μŠ€ 길이(일반적으둜 60)λ₯Ό μΆ”μ •
seq_len = x_test_processed.shape[1] # μ‹œν€€μŠ€ 길이
# μ˜ˆμΈ‘κ°’ 배열이 (μƒ˜ν”Œ * μ‹œν€€μŠ€ 길이) ν˜•νƒœμΈ 경우
if len(test_pred_values) == len(ticker_test_flat) * seq_len:
# 각 μ‹œν€€μŠ€μ˜ λ§ˆμ§€λ§‰ 예츑만 μ‚¬μš©ν•˜λ„λ‘ μΆ”μΆœ
test_pred_values = test_pred_values.reshape(-1, seq_len)[:, -1]
else:
# λ‹€λ₯Έ ν˜•νƒœμ˜ 뢈일치인 경우, 각 μ‹œν€€μŠ€μ˜ λ§ˆμ§€λ§‰ κ°’λ§Œ μ‚¬μš©
print(f"차원 뢈일치: μ˜ˆμΈ‘κ°’={len(test_pred_values)}, 티컀={len(ticker_test_flat)}")
print("νƒ€μž„μŠ€ν… λ‹Ή μ˜ˆμΈ‘κ°’μ„ μΆ”μΆœν•˜κΈ° μœ„ν•΄ μƒ˜ν”Œλ§ μˆ˜ν–‰...")
# μ˜ˆμΈ‘κ°’μ˜ 길이λ₯Ό 티컀 λ°°μ—΄ 길이에 λ§žμΆ”κΈ° μœ„ν•œ μƒ˜ν”Œλ§ νŒ©ν„° 계산
factor = int(len(test_pred_values) / len(ticker_test_flat))
if factor > 1:
# factor κ°„κ²©μœΌλ‘œ μƒ˜ν”Œλ§
test_pred_values = test_pred_values[factor-1::factor]
print(f"μƒ˜ν”Œλ§ ν›„ μ˜ˆμΈ‘κ°’ ν˜•μƒ: {test_pred_values.shape}")
# 고유 μ’…λͺ© ID μ°ΎκΈ°
unique_tickers = np.unique(ticker_test)
# μ’…λͺ©λ³„ μ‹œκ°ν™”
fig, axes = plt.subplots(len(unique_tickers), 1, figsize=(12, len(unique_tickers) * 3))
if len(unique_tickers) == 1:
axes = [axes]
for i, ticker_id in enumerate(unique_tickers):
# ν˜„μž¬ μ’…λͺ©μ˜ ν…ŒμŠ€νŠΈ 데이터 필터링
ticker_mask = ticker_test_flat == ticker_id
# λ§ˆμŠ€ν¬μ™€ μ˜ˆμΈ‘κ°’ 차원 확인
if len(ticker_mask) != len(test_pred_values):
print(f"티컀 ID {ticker_id}에 λŒ€ν•œ 마슀크 길이({len(ticker_mask)})와 μ˜ˆμΈ‘κ°’ 길이({len(test_pred_values)})κ°€ μΌμΉ˜ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
continue
ticker_indices = np.where(ticker_mask)[0]
if len(ticker_indices) < 2:
print(f"티컀 ID {ticker_id}에 λŒ€ν•œ μƒ˜ν”Œμ΄ λΆ€μ‘±ν•©λ‹ˆλ‹€.")
continue
# ν˜„μž¬ μ’…λͺ© 이름 κ°€μ Έμ˜€κΈ°
ticker_name = ticker_encoder.inverse_transform([int(ticker_id)])[0]
# 이 μ’…λͺ©μ— λŒ€ν•œ μ˜ˆμΈ‘κ°’
ticker_preds = test_pred_values[ticker_mask]
# 원본 λ°μ΄ν„°μ—μ„œ 이 μ’…λͺ©μ˜ μ‹€μ œ μ’…κ°€ κ°€μ Έμ˜€κΈ°
ticker_data = data[data['ticker'] == ticker_name].copy()
ticker_data = ticker_data.sort_index() # λ‚ μ§œμˆœ μ •λ ¬
# μ μ ˆν•œ 인덱슀 λ²”μœ„ 확인
if len(ticker_data) < len(ticker_preds) + 1:
print(f"티컀 {ticker_name}의 데이터가 μΆ©λΆ„ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€: {len(ticker_data)} < {len(ticker_preds) + 1}")
continue
# μ‹€μ œ μ’…κ°€ 데이터 μΆ”μΆœ
actual_prices = ticker_data['Close'].values[-len(ticker_preds)-1:] # ν•˜λ£¨ 더 이전뢀터 κ°€μ Έμ˜΄
dates = ticker_data.index[-len(ticker_preds)-1:] # λ‚ μ§œλ„ ν•¨κ»˜ κ°€μ Έμ˜΄
# 예츑 μ’…κ°€ 계산 (예츑된 둜그 수읡λ₯ μ—μ„œ μ‹€μ œ κ°€κ²©μœΌλ‘œ λ³€ν™˜)
predicted_prices = []
last_price = actual_prices[0] # 첫 번째 μ‹€μ œ κ°€κ²©μœΌλ‘œ μ‹œμž‘
for j, pred in enumerate(ticker_preds):
# 둜그 수읡λ₯ μ—μ„œ 일반 수읡λ₯ λ‘œ λ³€ν™˜: r = exp(log_r) - 1
predicted_return = np.exp(pred) - 1
# 수읡λ₯ μ„ κ°€κ²©μœΌλ‘œ λ³€ν™˜
predicted_price = last_price * (1 + predicted_return)
predicted_prices.append(predicted_price)
# λ‹€μŒ μ˜ˆμΈ‘μ„ μœ„ν•΄ μ‹€μ œ λ§ˆμ§€λ§‰ 가격 μ—…λ°μ΄νŠΈ
last_price = actual_prices[j+1]
# μ‹€μ œ 쒅가와 예츑 μ’…κ°€ 그리기
axes[i].plot(dates[1:], actual_prices[1:], 'b-', label='Actual Price', linewidth=2)
axes[i].plot(dates[1:], predicted_prices, 'r--', label='Predicted Price', linewidth=2)
# ν¬μ§€μ…˜ μƒνƒœ 좔적 둜직 - λ°±ν…ŒμŠ€νŠΈμ™€ λ™μΌν•œ λ°©μ‹μœΌλ‘œ κ΅¬ν˜„
positions = np.zeros(len(ticker_preds))
current_position = 0
# μ‹€μ œ λ°±ν…ŒμŠ€νŠΈ 둜직처럼 ν¬μ§€μ…˜ 좔적
for j, pred in enumerate(ticker_preds):
# μ‹ ν˜Έ κ²°μ • (λ°±ν…ŒμŠ€νŠΈ 둜직과 동일)
new_signal = 1 if pred > best_threshold else (-1 if pred < -best_threshold else 0)
# ν¬μ§€μ…˜ λ³€κ²½μ‹œμ—λ§Œ μ—…λ°μ΄νŠΈ
if new_signal != current_position:
current_position = new_signal
# ν˜„μž¬ ν¬μ§€μ…˜ μ €μž₯
positions[j] = current_position
# ν¬μ§€μ…˜ νƒ€μž…λ³„λ‘œ 데이터 뢄리
long_indices = positions == 1
short_indices = positions == -1
neutral_indices = positions == 0
# 색상 및 마컀 μ„€μ •
long_color = 'green'
short_color = 'red'
neutral_color = 'gray'
# 각 ν¬μ§€μ…˜ μƒνƒœλ³„λ‘œ 마컀 ν‘œμ‹œ
if any(long_indices):
axes[i].scatter(dates[1:][long_indices], actual_prices[1:][long_indices],
marker='^', color=long_color, s=100, label='Long Position')
if any(short_indices):
axes[i].scatter(dates[1:][short_indices], actual_prices[1:][short_indices],
marker='v', color=short_color, s=100, label='Short Position')
if any(neutral_indices):
axes[i].scatter(dates[1:][neutral_indices], actual_prices[1:][neutral_indices],
marker='o', color=neutral_color, s=50, label='Neutral Position')
# μƒ‰μƒμœΌλ‘œ λ°°κ²½ ν‘œμ‹œ (선택적)
for j in range(1, len(positions)):
if positions[j] != positions[j-1] or j == 1: # ν¬μ§€μ…˜ λ³€κ²½ λ˜λŠ” 첫 ν¬μ§€μ…˜
start_idx = j
pos_type = positions[j]
# 같은 ν¬μ§€μ…˜μ΄ λλ‚˜λŠ” 지점 μ°ΎκΈ°
end_idx = start_idx
while end_idx < len(positions) and positions[end_idx] == pos_type:
end_idx += 1
# 배경색 μ„€μ •
if pos_type == 1: # λ‘± ν¬μ§€μ…˜
axes[i].axvspan(dates[1:][start_idx], dates[1:][min(end_idx, len(dates)-2)],
alpha=0.1, color=long_color)
elif pos_type == -1: # 숏 ν¬μ§€μ…˜
axes[i].axvspan(dates[1:][start_idx], dates[1:][min(end_idx, len(dates)-2)],
alpha=0.1, color=short_color)
# κ·Έλž˜ν”„ μ„€μ •
axes[i].set_title(f'{ticker_name} - Actual vs. Predicted Price with Position Changes')
axes[i].set_xlabel('Date')
axes[i].set_ylabel('Price')
axes[i].legend()
axes[i].grid(True, alpha=0.3)
# λ‚ μ§œ ν˜•μ‹ μ„€μ •
axes[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
axes[i].xaxis.set_major_locator(mdates.WeekdayLocator(interval=2))
plt.tight_layout()
return fig
except Exception as e:
print(f"Price prediction visualization failed: {e}")
import traceback
traceback.print_exc()
return None
def plot_graph_embeddings(sector_industry_df, save_path_tsne=None, save_path_pca=None):
"""
μ„Ήν„°-μ‚°μ—… λ°μ΄ν„°λ‘œλΆ€ν„° κ·Έλž˜ν”„ 생성 β†’ μž„λ² λ”© β†’ μ‹œκ°ν™”κΉŒμ§€ 톡합 μˆ˜ν–‰
"""
# κ·Έλž˜ν”„ 생성
G = nx.Graph()
tickers = sector_industry_df['ticker'].tolist()
# λ…Έλ“œ μΆ”κ°€
for ticker in tickers:
sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0]
industry = sector_industry_df[sector_industry_df['ticker'] == ticker]['industry'].iloc[0]
G.add_node(ticker, sector=sector, industry=industry)
# μ—£μ§€ μΆ”κ°€ (μ„Ήν„°/μ‚°μ—… 기반)
for i, ticker1 in enumerate(tickers):
for j, ticker2 in enumerate(tickers[i+1:], i+1):
sector1 = sector_industry_df[sector_industry_df['ticker'] == ticker1]['sector'].iloc[0]
sector2 = sector_industry_df[sector_industry_df['ticker'] == ticker2]['sector'].iloc[0]
industry1 = sector_industry_df[sector_industry_df['ticker'] == ticker1]['industry'].iloc[0]
industry2 = sector_industry_df[sector_industry_df['ticker'] == ticker2]['industry'].iloc[0]
weight = 0
if sector1 == sector2:
weight += 0.5
if industry1 == industry2:
weight += 0.3
if weight > 0.3:
G.add_edge(ticker1, ticker2, weight=weight)
# print(f"κ·Έλž˜ν”„ 생성: {G.number_of_nodes()}개 λ…Έλ“œ, {G.number_of_edges()}개 μ—£μ§€")
# μž„λ² λ”© 생성
try:
node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4)
model = node2vec.fit(window=10, min_count=1, batch_words=4)
embeddings = {}
for node in G.nodes():
try:
embeddings[node] = model.wv[node]
except KeyError:
embeddings[node] = np.random.normal(0, 0.1, 64)
# print("Node2Vec μž„λ² λ”© 생성 μ™„λ£Œ")
except ImportError:
print("Node2Vec μ—†μŒ. PCA 기반 μž„λ² λ”© μ‚¬μš©")
adj_matrix = nx.adjacency_matrix(G).todense()
pca = PCA(n_components=min(64, adj_matrix.shape[0]))
embeddings_matrix = pca.fit_transform(adj_matrix)
embeddings = {}
for i, node in enumerate(G.nodes()):
embeddings[node] = embeddings_matrix[i] if i < embeddings_matrix.shape[0] else np.random.normal(0, 0.1, 64)
# μ‹œκ°ν™” (t-SNE + PCA)
embedding_matrix = np.array([embeddings[ticker] for ticker in tickers])
# 섹터별 색상
sectors = sector_industry_df['sector'].unique()
sector_colors = dict(zip(sectors, plt.cm.Set3(np.linspace(0, 1, len(sectors)))))
# t-SNE와 PCA λ™μ‹œ μ‹œκ°ν™”
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 8))
# t-SNE
tsne = TSNE(n_components=2, random_state=42, perplexity=min(30, len(tickers)//3))
coords_tsne = tsne.fit_transform(embedding_matrix)
for i, ticker in enumerate(tickers):
sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0]
color = sector_colors[sector]
ax1.scatter(coords_tsne[i, 0], coords_tsne[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black')
ax1.annotate(ticker, (coords_tsne[i, 0], coords_tsne[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold')
ax1.set_title('Graph Embedding Visualization (t-SNE)', fontsize=14, fontweight='bold')
ax1.grid(True, alpha=0.3)
# PCA
pca = PCA(n_components=2)
coords_pca = pca.fit_transform(embedding_matrix)
for i, ticker in enumerate(tickers):
sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0]
color = sector_colors[sector]
ax2.scatter(coords_pca[i, 0], coords_pca[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black')
ax2.annotate(ticker, (coords_pca[i, 0], coords_pca[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold')
ax2.set_title('Graph Embedding Visualization (PCA)', fontsize=14, fontweight='bold')
ax2.grid(True, alpha=0.3)
# λ²”λ‘€ (곡톡)
legend_elements = [Patch(color=color, label=sector) for sector, color in sector_colors.items()]
ax2.legend(handles=legend_elements, loc='upper right')
plt.tight_layout()
# μ €μž₯
if save_path_tsne and save_path_pca:
# κ°œλ³„ μ €μž₯
fig1, ax_tsne = plt.subplots(figsize=(12, 10))
for i, ticker in enumerate(tickers):
sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0]
color = sector_colors[sector]
ax_tsne.scatter(coords_tsne[i, 0], coords_tsne[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black')
ax_tsne.annotate(ticker, (coords_tsne[i, 0], coords_tsne[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold')
ax_tsne.set_title('Graph Embedding Visualization (t-SNE)', fontsize=16, fontweight='bold')
ax_tsne.grid(True, alpha=0.3)
ax_tsne.legend(handles=legend_elements, loc='upper right')
plt.tight_layout()
plt.savefig(save_path_tsne, dpi=300, bbox_inches='tight', facecolor='white')
plt.close(fig1)
fig2, ax_pca = plt.subplots(figsize=(12, 10))
for i, ticker in enumerate(tickers):
sector = sector_industry_df[sector_industry_df['ticker'] == ticker]['sector'].iloc[0]
color = sector_colors[sector]
ax_pca.scatter(coords_pca[i, 0], coords_pca[i, 1], c=[color], s=100, alpha=0.7, edgecolors='black')
ax_pca.annotate(ticker, (coords_pca[i, 0], coords_pca[i, 1]), xytext=(5, 5), textcoords='offset points', fontweight='bold')
ax_pca.set_title('Graph Embedding Visualization (PCA)', fontsize=16, fontweight='bold')
ax_pca.grid(True, alpha=0.3)
ax_pca.legend(handles=legend_elements, loc='upper right')
plt.tight_layout()
plt.savefig(save_path_pca, dpi=300, bbox_inches='tight', facecolor='white')
plt.close(fig2)
# print(f"t-SNE μ €μž₯: {save_path_tsne}")
# print(f"PCA μ €μž₯: {save_path_pca}")
plt.close()
return coords_tsne, coords_pca