Spaces:
Sleeping
Sleeping
| """ | |
| ๊ณ์ธต์ ์๋ฒ ๋ฉ ์ฒ๋ฆฌ ๊ด๋ จ ํจ์ | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| from sklearn.preprocessing import LabelEncoder | |
| import yfinance as yf | |
| def combine_stocks_for_embedding(all_data, tickers): | |
| """ | |
| ์ข ๋ชฉ ์๋ฒ ๋ฉ์ ์ํ ๋ฐ์ดํฐ ํตํฉ | |
| """ | |
| print("\n์ข ๋ชฉ ์๋ฒ ๋ฉ์ ์ํ ๋ฐ์ดํฐ ํตํฉ ์ค...") | |
| all_stocks_data = [] | |
| for ticker in tickers: | |
| if ticker in all_data: | |
| # ํ์ฌ ์ข ๋ชฉ ๋ฐ์ดํฐ ๋ณต์ฌ | |
| stock_df = all_data[ticker].copy() | |
| # ์ข ๋ชฉ ์๋ณ์ ์ปฌ๋ผ ์ถ๊ฐ | |
| stock_df['ticker'] = ticker | |
| # ๋ฆฌ์คํธ์ ์ถ๊ฐ | |
| all_stocks_data.append(stock_df) | |
| print(f"{ticker} ๋ฐ์ดํฐ: {len(stock_df)}ํ ์ฒ๋ฆฌ") | |
| # ๋ชจ๋ ๋ฐ์ดํฐํ๋ ์์ ์์ง์ผ๋ก ๋ณํฉ | |
| combined_df = pd.concat(all_stocks_data, axis=0) | |
| combined_df = combined_df.sort_index() | |
| combined_df = combined_df.reset_index() | |
| # ticker ์ปฌ๋ผ์ ์์ชฝ์ผ๋ก ์ด๋ | |
| cols = combined_df.columns.tolist() | |
| cols.remove('ticker') | |
| cols = ['ticker'] + cols | |
| combined_df = combined_df[cols] | |
| return combined_df | |
| def get_industry_data(tickers): | |
| """ | |
| ์ข ๋ชฉ ๋ฆฌ์คํธ์ ์ฐ์ ๋ฐ ์นํฐ ์ ๋ณด๋ฅผ ๊ฐ์ ธ์ต๋๋ค. | |
| """ | |
| industry_data = {} | |
| # print("์ฐ์ ๋ถ๋ฅ ์ ๋ณด ์์ง ์ค...") | |
| for ticker in tickers: | |
| try: | |
| # ์ข ๋ชฉ ์ ๋ณด ๊ฐ์ ธ์ค๊ธฐ | |
| stock = yf.Ticker(ticker) | |
| info = stock.info | |
| # ์ฐ์ ๋ฐ ์นํฐ ์ ๋ณด ์ถ์ถ | |
| industry_data[ticker] = { | |
| 'sector': info.get('sector', 'Unknown'), | |
| 'industry': info.get('industry', 'Unknown') | |
| } | |
| print(f"{ticker}: {industry_data[ticker]['sector']} - {industry_data[ticker]['industry']}") | |
| except Exception as e: | |
| print(f"{ticker} ๋ฐ์ดํฐ ๊ฐ์ ธ์ค๊ธฐ ์คํจ: {str(e)}") | |
| industry_data[ticker] = {'sector': 'Unknown', 'industry': 'Unknown'} | |
| # ๋ฐ์ดํฐํ๋ ์์ผ๋ก ๋ณํ | |
| df = pd.DataFrame.from_dict(industry_data, orient='index') | |
| df.index.name = 'ticker' | |
| df.reset_index(inplace=True) | |
| return df | |
| def add_industry_encoding(data, industry_data): | |
| """ | |
| ๋ฐ์ดํฐ์ ์ฐ์ ์ ๋ณด๋ฅผ ์ถ๊ฐํ๊ณ ์ธ์ฝ๋ฉํฉ๋๋ค. | |
| """ | |
| print("\n์ฐ์ ์ ๋ณด ์ธ์ฝ๋ฉ ์ค...") | |
| # ์ฐ์ ์ ๋ณด ๋ณํฉ | |
| data = data.merge(industry_data, on='ticker', how='left') | |
| # ๋๋ฝ๋ ์ฐ์ ์ ๋ณด๋ 'Unknown'์ผ๋ก ์ค์ | |
| data['sector'] = data['sector'].fillna('Unknown') | |
| data['industry'] = data['industry'].fillna('Unknown') | |
| # ์ธ์ฝ๋ฉ | |
| sector_encoder = LabelEncoder() | |
| industry_encoder = LabelEncoder() | |
| data['sector_id'] = sector_encoder.fit_transform(data['sector']) | |
| data['industry_id'] = industry_encoder.fit_transform(data['industry']) | |
| # ์ธ์ฝ๋ ์ ๋ณด ๋ฐํ | |
| encoders = { | |
| 'sector_encoder': sector_encoder, | |
| 'industry_encoder': industry_encoder, | |
| 'n_sectors': len(sector_encoder.classes_), | |
| 'n_industries': len(industry_encoder.classes_) | |
| } | |
| print(f"์นํฐ ์: {encoders['n_sectors']}, ์ฐ์ ์: {encoders['n_industries']}") | |
| return data, encoders | |
| def create_sector_industry_mapping(ticker_list, sector_industry_df): | |
| """ | |
| ์ข ๋ชฉ๋ณ ์นํฐ/์ฐ์ ๋งคํ์ ์์ฑํฉ๋๋ค. | |
| """ | |
| sector_mapping = {} | |
| industry_mapping = {} | |
| for ticker in ticker_list: | |
| if ticker in sector_industry_df.index: | |
| sector_mapping[ticker] = int(sector_industry_df.loc[ticker, 'sector_id']) | |
| industry_mapping[ticker] = int(sector_industry_df.loc[ticker, 'industry_id']) | |
| else: | |
| # ์ ๋ณด๊ฐ ์๋ ๊ฒฝ์ฐ ๊ธฐ๋ณธ๊ฐ 0 ์ฌ์ฉ | |
| sector_mapping[ticker] = 0 | |
| industry_mapping[ticker] = 0 | |
| return sector_mapping, industry_mapping | |
| def apply_sector_industry_mapping(ticker_data, ticker_encoder, sector_mapping, industry_mapping): | |
| """ | |
| ํฐ์ปค ๋ฐ์ดํฐ์ ์นํฐ/์ฐ์ ๋งคํ์ ์ ์ฉํฉ๋๋ค. | |
| """ | |
| # ์ถ๋ ฅ ๋ฐฐ์ด ์ด๊ธฐํ | |
| sector_data = np.zeros_like(ticker_data) | |
| industry_data = np.zeros_like(ticker_data) | |
| # ํฐ์ปค ID -> ์ค์ ํฐ์ปค ๋งคํ | |
| id_to_ticker = {v: k for k, v in ticker_encoder.mapping.items()} | |
| # ๊ฐ ์์น์ ๋งคํ ์ ์ฉ | |
| for i in range(ticker_data.shape[0]): | |
| for j in range(ticker_data.shape[1]): | |
| ticker_id = ticker_data[i, j] | |
| if ticker_id in id_to_ticker: | |
| ticker = id_to_ticker[ticker_id] | |
| sector_data[i, j] = sector_mapping.get(ticker, 0) | |
| industry_data[i, j] = industry_mapping.get(ticker, 0) | |
| print(f"์ ์ฉ๋ ์นํฐ ํด๋์ค: {np.unique(sector_data)}") | |
| print(f"์ ์ฉ๋ ์ฐ์ ํด๋์ค: {np.unique(industry_data)}") | |
| return sector_data, industry_data |