Spaces:
Sleeping
Sleeping
| """ | |
| ์ฌ๋ฌด์ ํ ๋ฐ์ดํฐ ์ฒ๋ฆฌ ๊ด๋ จ ํจ์ | |
| """ | |
| import pandas as pd | |
| import numpy as np | |
| import requests | |
| import warnings | |
| from bs4 import BeautifulSoup | |
| from statsmodels.tsa.holtwinters import ExponentialSmoothing | |
| warnings.filterwarnings("ignore", message="Optimization failed to converge") | |
| def scrape_financial_statement(ticker, statement_type): | |
| """ | |
| ํน์ ์ข ๋ฅ์ ์ฌ๋ฌด์ ํ๋ฅผ ์คํฌ๋ํํ๋ ํจ์ | |
| """ | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36' | |
| } | |
| # URL ๋งคํ | |
| url_paths = { | |
| 'income': '', | |
| 'ratios': 'ratios/', | |
| 'balance-sheet': 'balance-sheet/', | |
| 'cash-flow-statement': 'cash-flow-statement/' | |
| } | |
| statement_names = { | |
| 'income': '์์ต๊ณ์ฐ์', | |
| 'ratios': '์ฌ๋ฌด๋น์จ', | |
| 'balance-sheet': '๋์ฐจ๋์กฐํ', | |
| 'cash-flow-statement': 'ํ๊ธํ๋ฆํ' | |
| } | |
| try: | |
| url = f"https://stockanalysis.com/stocks/{ticker}/financials/{url_paths[statement_type]}?p=quarterly" | |
| response = requests.get(url, headers=headers) | |
| print(f"{statement_names[statement_type]} ์ํ์ฝ๋: {response.status_code}") | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| element_tables = soup.select("table[data-test='financials']") | |
| if not element_tables: | |
| print(f"{ticker}: {statement_names[statement_type]} ํ ์ด๋ธ์ ์ฐพ์ ์ ์์ต๋๋ค.") | |
| return None | |
| df = pd.read_html(str(element_tables))[0] | |
| # ์ปฌ๋ผ์ด MultiIndex์ธ์ง ํ์ธ | |
| if isinstance(df.columns, pd.MultiIndex): | |
| print(f"{ticker}: MultiIndex {statement_names[statement_type]} ์ฒ๋ฆฌ") | |
| date_cols = df.columns.get_level_values(1)[1:] # Period Ending ๊ฐ๋ค | |
| df = df.droplevel(0, axis=1) # ์ฒซ๋ฒ์งธ ๋ ๋ฒจ ์ ๊ฑฐ | |
| df.columns = [df.columns[0]] + list(date_cols) | |
| result_df = df.set_index(df.columns[0]).transpose() | |
| else: | |
| date_col = df.columns[0] | |
| result_df = df.set_index(date_col).transpose() | |
| result_df.index.name = "Date" | |
| # ์ฒซ ํ ์ ์ธ | |
| if statement_type == 'ratios': | |
| result_df = result_df.iloc[1:-1, :] # ์ฒซ ํ๊ณผ ๋ง์ง๋ง ํ ์ ์ธ | |
| else: | |
| result_df = result_df.iloc[:-1, :] # ๋ง์ง๋ง ํ๋ง ์ ์ธ | |
| return result_df | |
| except Exception as e: | |
| print(f"{ticker} {statement_names[statement_type]} ์คํฌ๋ํ ์ค๋ฅ: {e}") | |
| return None | |
| def convert_to_numeric(df): | |
| """ | |
| DataFrame์ ๋ชจ๋ ์ด์ ์ซ์ํ์ผ๋ก ๋ณํ | |
| """ | |
| for column in df.columns: | |
| if df[column].dtype == 'object': | |
| # ์์๊ฐ ์ฒ๋ฆฌ (์: '-123' -> -123) | |
| df[column] = df[column].apply( | |
| lambda x: float(str(x).replace('-', '')) * -1 | |
| if isinstance(x, str) and '-' in x and x.replace('-', '').replace('.', '').isdigit() | |
| else x | |
| ) | |
| # ๋ฐฑ๋ถ์จ ์ฒ๋ฆฌ (์: '12%' -> 0.12) | |
| if df[column].dtype == 'object': | |
| df[column] = df[column].apply( | |
| lambda x: float(str(x).replace('%', '')) / 100 | |
| if isinstance(x, str) and '%' in x | |
| else x | |
| ) | |
| # ์ต์ข ์ซ์ ๋ณํ | |
| df[column] = pd.to_numeric(df[column], errors='coerce') | |
| return df | |
| def convert_date_format(date_str): | |
| """ | |
| ๋ ์ง ๋ฌธ์์ด์ ํ์ค ํ์์ผ๋ก ๋ณํ | |
| """ | |
| try: | |
| # ๋ฏธ๋ ๋ ์ง ์ฒ๋ฆฌ ๊ฐ์ | |
| if isinstance(date_str, str) and "'" in date_str and len(date_str.split()) >= 4: | |
| # ๋ฏธ๋ ๋ฐ์ดํฐ ๊ฐ์ง ์ None ๋ฐํ | |
| if any(future_marker in date_str for future_marker in ["'24", "2024"]): | |
| return None | |
| # ๊ณผ๊ฑฐ ๋ฐ์ดํฐ๋ ์ ์ ์ฒ๋ฆฌ | |
| parts = date_str.split() | |
| month_part = parts[-3] | |
| day_part = parts[-2].replace(',', '') | |
| year_part = parts[-1] | |
| month_dict = { | |
| 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, | |
| 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12 | |
| } | |
| month = month_dict.get(month_part, 1) | |
| day = int(day_part) | |
| year = int(year_part) | |
| return f"{year}-{month:02d}-{day:02d}" | |
| # ๊ธฐํ ํ์ ์ฒ๋ฆฌ | |
| return date_str | |
| except Exception as e: | |
| print(f"๋ ์ง ๋ณํ ์ค๋ฅ: {e} - '{date_str}'") | |
| return None | |
| def process_index_dates(df): | |
| """ | |
| ๋ฐ์ดํฐํ๋ ์์ ์ธ๋ฑ์ค ๋ ์ง๋ฅผ ๋ณํ | |
| """ | |
| new_index = [convert_date_format(idx) for idx in df.index] | |
| df['_temp_date'] = new_index | |
| df = df[df['_temp_date'].notna()] | |
| if df.empty: | |
| return None | |
| df.index = df['_temp_date'] | |
| df = df.drop(columns=['_temp_date']) | |
| return df | |
| def interpolate_and_forecast(df, end_date): | |
| """ | |
| ์ฌ๋ฌด ๋ฐ์ดํฐ๋ฅผ ์ผ๋ณ๋ก ๋ณด๊ฐํ๊ณ ํ์์ ๋ฏธ๋ ๋ฐ์ดํฐ ์์ธก | |
| """ | |
| # ๋ ์ง ์ธ๋ฑ์ค๋ฅผ datetime์ผ๋ก ๋ณํ | |
| df.index = pd.to_datetime(df.index) | |
| # ์ผ๋ณ ๋ฐ์ดํฐ๋ก ๋ณํ ๋ฐ ๋ณด๊ฐ | |
| daily_df = df.resample('D').asfreq() | |
| for column in daily_df.columns: | |
| daily_df[column] = daily_df[column].interpolate(method='linear') | |
| # ์์ธก ๋ฐ์ดํฐ ์์ฑ | |
| end_date = pd.to_datetime(end_date) | |
| forecast_steps = (end_date - daily_df.index[-1]).days | |
| if forecast_steps > 0: | |
| print(f"์์ธก ์์: {forecast_steps}์ผ") | |
| date_range = pd.date_range(daily_df.index[-1] + pd.Timedelta(days=1), end_date) | |
| # ์์ธก๊ฐ์ ์ฌ์ ์ ๋จผ์ ๋ชจ์ | |
| forecasts = {} | |
| for column in daily_df.columns: | |
| try: | |
| model = ExponentialSmoothing( | |
| daily_df[column], trend='add', seasonal=None, seasonal_periods=4 | |
| ).fit() | |
| forecast = model.forecast(steps=forecast_steps) | |
| forecasts[column] = forecast | |
| except Exception as e: | |
| print(f"{column} ์์ธก ์คํจ: {e}") | |
| forecasts[column] = np.full(forecast_steps, np.nan) | |
| # ํ ๋ฒ์ DataFrame ์์ฑ | |
| forecast_df = pd.DataFrame(forecasts, index=date_range) | |
| daily_df = pd.concat([daily_df, forecast_df]) | |
| # ๊ฒฐ์ธก์น๊ฐ ์๋ ์ด ์ ๊ฑฐ | |
| daily_df = daily_df.dropna(axis=1, how='any') | |
| return daily_df | |
| def process_financial_data(ticker, all_data, stock_end_date): | |
| """ | |
| ์ฌ๋ฌด์ ํ ๋ฐ์ดํฐ๋ฅผ ์ฒ๋ฆฌํ๋ ๋ฉ์ธ ํจ์ | |
| """ | |
| try: | |
| print(f"===== {ticker} ์ฌ๋ฌด๋ฐ์ดํฐ ์ฒ๋ฆฌ ์์ =====") | |
| # ๊ฐ ์ฌ๋ฌด์ ํ ์คํฌ๋ํ | |
| FS_Income = scrape_financial_statement(ticker, 'income') | |
| FS_Ratio = scrape_financial_statement(ticker, 'ratios') | |
| FS_Balance = scrape_financial_statement(ticker, 'balance-sheet') | |
| FS_Cash = scrape_financial_statement(ticker, 'cash-flow-statement') | |
| # ์คํฌ๋ํ ์คํจ ํ์ธ | |
| if any(fs is None for fs in [FS_Income, FS_Ratio, FS_Balance, FS_Cash]): | |
| print(f"{ticker}: ์ผ๋ถ ์ฌ๋ฌด์ ํ ๋ฐ์ดํฐ๋ฅผ ๊ฐ์ ธ์ค์ง ๋ชปํ์ต๋๋ค.") | |
| return None | |
| # ์ฌ๋ฌด์ ํ ๋ฐ์ดํฐ๋ฅผ ์ซ์๋ก ๋ณํ | |
| FS_Income = convert_to_numeric(FS_Income) | |
| FS_Ratio = convert_to_numeric(FS_Ratio) | |
| FS_Balance = convert_to_numeric(FS_Balance) | |
| FS_Cash = convert_to_numeric(FS_Cash) | |
| # ๋ ์ง ์ธ๋ฑ์ค ์ฒ๋ฆฌ | |
| FS_Income = process_index_dates(FS_Income) | |
| FS_Ratio = process_index_dates(FS_Ratio) | |
| FS_Balance = process_index_dates(FS_Balance) | |
| FS_Cash = process_index_dates(FS_Cash) | |
| # ๋ ์ง ๋ณํ ์คํจ ํ์ธ | |
| if any(fs is None for fs in [FS_Income, FS_Ratio, FS_Balance, FS_Cash]): | |
| print(f"{ticker}: ๋ ์ง ๋ณํ ํ ์ ํจํ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.") | |
| return None | |
| # ROE ๊ณ์ฐ | |
| try: | |
| if 'Net Income' in FS_Income.columns and 'Shareholders\' Equity' in FS_Balance.columns: | |
| FS_Ratio['ROE'] = FS_Income['Net Income'] / FS_Balance['Shareholders\' Equity'] | |
| except Exception as e: | |
| print(f"ROE ๊ณ์ฐ ์ค๋ฅ: {e}") | |
| # ๋ชจ๋ ์ฌ๋ฌด์ ํ ๋ฐ์ดํฐ ๋ณํฉ | |
| FS_Summary = pd.concat([FS_Income, FS_Balance, FS_Ratio, FS_Cash], axis=1) | |
| # ์ค๋ณต ์ปฌ๋ผ ์ ๊ฑฐ | |
| duplicated_columns = FS_Summary.columns[FS_Summary.columns.duplicated()].unique() | |
| if len(duplicated_columns) > 0: | |
| print(f"{ticker} ์ค๋ณต ์ปฌ๋ผ ์ ๊ฑฐ: {duplicated_columns}") | |
| FS_Summary = FS_Summary.loc[:, ~FS_Summary.columns.duplicated()] | |
| # ๋ณด๊ฐ ๋ฐ ์์ธก | |
| daily_FS_Summary = interpolate_and_forecast(FS_Summary, stock_end_date) | |
| if daily_FS_Summary.empty: | |
| print(f"{ticker}: ์ ํจํ ์ผ๋ณ ์ฌ๋ฌด ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค") | |
| return None | |
| # ์ฃผ๊ฐ ๋ฐ์ดํฐ ๋ณํฉ | |
| if ticker in all_data and 'Close' in all_data[ticker].columns: | |
| close_df = pd.DataFrame(all_data[ticker]['Close']) | |
| close_df.columns = ['Close'] | |
| # ์ฌ๋ฌด ๋ฐ์ดํฐ์ ์ฃผ๊ฐ ๋ฐ์ดํฐ ๋ณํฉ | |
| daily_FS_Summary = daily_FS_Summary.merge( | |
| close_df, left_index=True, right_index=True, how='inner' | |
| ) | |
| if daily_FS_Summary.empty: | |
| print(f"{ticker}: ์ฃผ๊ฐ ๋ฐ์ดํฐ์ ๋ณํฉ ํ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค") | |
| return None | |
| else: | |
| print(f"{ticker}: Close ๋ฐ์ดํฐ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค") | |
| return None | |
| print(f"{ticker} ์ฌ๋ฌด ๋ฐ์ดํฐ ์ฒ๋ฆฌ ์๋ฃ: {daily_FS_Summary.shape}") | |
| return daily_FS_Summary | |
| except Exception as e: | |
| print(f"{ticker} ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| return None |