import pykrx import pandas as pd import yfinance as yf from datetime import datetime import re import os # ============================================================ # 1) KOSPI / KOSDAQ 티커 목록 미리 로딩 # ============================================================ print("[INFO] Loading KOSPI/KOSDAQ ticker lists...") KOSPI_SET = set(pykrx.stock.get_market_ticker_list(market="KOSPI")) KOSDAQ_SET = set(pykrx.stock.get_market_ticker_list(market="KOSDAQ")) # ============================================================ # 2) 시장에 따라 .KS / .KQ 자동 부착 # ============================================================ def attach_market_suffix(ticker6: str): if ticker6 in KOSPI_SET: return ticker6 + ".KS" if ticker6 in KOSDAQ_SET: return ticker6 + ".KQ" return ticker6 + ".KS" # ============================================================ # 3) RAW-ID 문자열에서 대표 한국 티커 추출 # ============================================================ def extract_primary_ticker(raw_id: str): if pd.isna(raw_id): return "" s = str(raw_id).strip() if re.fullmatch(r"\d{6}\.(KS|KQ)", s): return s parts = re.split(r"[,\s]+", s) # 6자리 숫자 → 시장 자동판별 for p in parts: if re.fullmatch(r"\d{6}", p): return attach_market_suffix(p) # 이미 .KS/.KQ for p in parts: if re.fullmatch(r"\d{6}\.(KS|KQ)", p): return p # fallback 숫자 for p in parts: if p.isdigit() and len(p) == 6: return attach_market_suffix(p) # 마지막 fallback return parts[0] if parts else s # ============================================================ # 4) 회사명 조회 함수 # ============================================================ def safe_company_name(ticker): try: yf_t = yf.Ticker(ticker) try: fi = yf_t.fast_info if fi: nm = fi.get("longName") or fi.get("shortName") if nm: return nm except: pass try: info = yf_t.info if info: nm = info.get("longName") or info.get("shortName") if nm: return nm except: pass except Exception: pass return "" # ============================================================ # 5) MAIN CODE # ============================================================ def main(): input_df = pd.read_csv('etf_2.tsv', sep='\t') output_path = "ETF.csv" # 파일 없으면 헤더 포함 새로 생성 if not os.path.exists(output_path): pd.DataFrame(columns=["ID","NAME","COMPANY","VALUE","AMOUNT","PERCENTAGE"])\ .to_csv(output_path, index=False, encoding="utf-8") for row in input_df.to_dict('records'): etf_ticker = row['종목코드'] name_from_input = row['종목명'] today_dt = datetime.now().strftime('%Y%m%d') df = pykrx.stock.get_etf_portfolio_deposit_file(etf_ticker, today_dt) if df is None or df.empty: print(f"[WARN] {etf_ticker} PDF 없음. skip") continue # ============ 1) 티커 정리 ============ if "티커" in df.columns: df = df.rename(columns={"티커": "ID"}) elif df.index.name == "티커": df = df.reset_index().rename(columns={"티커": "ID"}) else: print(f"[ERROR] {etf_ticker}: 티커 컬럼 없음") print(df) continue raw_ids = df["ID"].astype(str) df["ID"] = raw_ids.apply(extract_primary_ticker) # ============ 2) ETF 이름 ============ df["NAME"] = name_from_input # ============ 3) 숫자 처리 ============ if "금액" in df.columns: df["VALUE"] = pd.to_numeric(df["금액"], errors="coerce") if "계약수" in df.columns: df["AMOUNT"] = pd.to_numeric(df["계약수"], errors="coerce") if "VALUE" not in df.columns: df["VALUE"] = None if "AMOUNT" not in df.columns: df["AMOUNT"] = None df["VALUE"] = df["VALUE"].clip(lower=0) df["AMOUNT"] = df["AMOUNT"].clip(lower=0) # ============ 4) 회사명 조회 ============ df["COMPANY"] = df["ID"].apply(safe_company_name) # ============ 5) 비중 제거 ============ if "비중" in df.columns: df = df.drop(columns=["비중"]) # ============ 6) 비율 계산 ============ if df["VALUE"].notna().any(): total_value = df["VALUE"].sum(skipna=True) if total_value > 0: df["PERCENTAGE"] = (df["VALUE"] / total_value * 100).round(1) else: df["PERCENTAGE"] = 0.0 else: df["PERCENTAGE"] = None # ============ 7) 최종 컬럼 ============ final_cols = ["ID", "NAME", "COMPANY", "VALUE", "AMOUNT", "PERCENTAGE"] for col in final_cols: if col not in df.columns: df[col] = None df = df[final_cols].reset_index(drop=True) print(df) # =============================================== # 8) CSV 에 append (쉼표로 구분 + UTF-8) # =============================================== df.to_csv( output_path, mode="a", header=False, index=False, encoding="utf-8" ) # 여러 종목 처리할 때 break 제거 # break if __name__ == "__main__": main()