portfolio / etf /postprocessing_ETF_csv.py
eric2digit's picture
Upload folder using huggingface_hub
bf3714e verified
# 1. ํ˜•์‹ ํ›„์ฒ˜๋ฆฌ
import pandas as pd
def rearrange_csv(csv_path: str):
"""
CSV ํŒŒ์ผ์„ ์ฝ์–ด ์—ด ์ˆœ์„œ๋ฅผ ๋ณ€๊ฒฝํ•˜๊ณ  ์›๋ณธ ํŒŒ์ผ์— ๋ฎ์–ด์“ฐ๊ธฐ ์ €์žฅํ•œ๋‹ค.
๋ณ€๊ฒฝ์‚ฌํ•ญ:
- NAME ๋’ค์— ID ์—ด ๋ฐฐ์น˜
- ID โ†’ TICKER ๋กœ ์ปฌ๋Ÿผ๋ช… ๋ณ€๊ฒฝ
- ์›๋ณธ csv_path์— ๋ฎ์–ด์“ฐ๊ธฐ
"""
# 1. CSV ์ฝ๊ธฐ
df = pd.read_csv(csv_path)
# 2. ID โ†’ TICKER ๋กœ ์ปฌ๋Ÿผ๋ช… ๋ณ€๊ฒฝ
df = df.rename(columns={"ID": "TICKER"})
# 3. NAME ๋’ค์— TICKER ๋ฐฐ์น˜
desired_order = ["NAME", "TICKER"] + [col for col in df.columns if col not in ["NAME", "TICKER"]]
df = df[desired_order]
# 4. ์›๋ณธ ํŒŒ์ผ ๋ฎ์–ด์“ฐ๊ธฐ
df.to_csv(csv_path, index=False, encoding="utf-8-sig")
print(f"[์™„๋ฃŒ] ์—ด ์ˆœ์„œ ๋ณ€๊ฒฝ ๋ฐ ์ €์žฅ ์™„๋ฃŒ โ†’ {csv_path}")
rearrange_csv("ETF.csv")
# 2. COMPANY ์—ด ํ•œ๊ตญ์–ด๋กœ ๋ณ€๊ฒฝ
import pandas as pd
from pykrx import stock
def convert_company_to_korean(csv_path: str):
df = pd.read_csv(csv_path)
skipped_rows = [] # ๋ฌด์‹œ๋œ row ์ „์ฒด ์ €์žฅ (์ถœ๋ ฅ์šฉ)
def get_korean_name(ticker: str):
try:
code = ticker.split(".")[0]
name = stock.get_market_ticker_name(code)
# pykrx ์กฐํšŒ ์‹คํŒจ โ†’ None ๋˜๋Š” ๋นˆ ๋ฌธ์ž์—ด
if not isinstance(name, str) or name.strip() == "":
return None
return name
except:
return None
# ์ƒˆ COMPANY ๊ฐ’ ์ƒ์„ฑ
df["NEW_COMPANY"] = df["TICKER"].apply(get_korean_name)
# ์ž˜๋ชป๋œ row ์ถ”์ถœ
skipped_df = df[df["NEW_COMPANY"].isna()]
skipped_rows = skipped_df.to_dict(orient="records")
# ์ •์ƒ row๋งŒ ๋‚จ๊น€
df = df[df["NEW_COMPANY"].notna()].copy()
# COMPANY ์—…๋ฐ์ดํŠธ
df["COMPANY"] = df["NEW_COMPANY"]
df.drop(columns=["NEW_COMPANY"], inplace=True)
# CSV ์ €์žฅ
df.to_csv(csv_path, index=False, encoding="utf-8-sig")
print(f"[์™„๋ฃŒ] ํ•œ๊ตญ์–ด ๊ธฐ์—…๋ช… ๋ณ€ํ™˜ ์™„๋ฃŒ โ†’ {csv_path}")
# ๋ฌด์‹œ๋œ ํ–‰ ์ถœ๋ ฅ
print(f"\n[๋ฌด์‹œ๋œ ํ–‰ ๊ฐœ์ˆ˜] {len(skipped_rows)}๊ฐœ")
if skipped_rows:
print("[๋ฌด์‹œ๋œ ๋ฐ์ดํ„ฐ ๋ชฉ๋ก]")
for idx, row in enumerate(skipped_rows, 1):
print(f"{idx}. {row}")
convert_company_to_korean("ETF.csv")