portfolio / theme_info.py
eric2digit's picture
Upload folder using huggingface_hub
bf3714e verified
import argparse
import csv
import html
import json
import os
from openai import OpenAI
client = OpenAI()
def parse_stock_input(stock_arg):
"""์ž…๋ ฅ ๋ฌธ์ž์—ด โ†’ ์ข…๋ชฉ ๋ฆฌ์ŠคํŠธ ๋ณ€ํ™˜"""
if not isinstance(stock_arg, str):
return []
items = stock_arg.split(",")
result = []
for item in items:
s = item.strip()
if (s.startswith('"') and s.endswith('"')) or (s.startswith("'") and s.endswith("'")):
s = s[1:-1]
if s:
result.append(s)
return result
class theme_info:
def __init__(self, data):
self.path = data
self._store = {}
self._raw = []
if not os.path.exists(data):
raise FileNotFoundError(f"Data file not found: {data}")
# CSV ๋กœ๋“œ
with open(data, "r", encoding="utf-8") as f:
reader = csv.DictReader(f)
required_cols = {"NAME", "THEME", "THEME_2", "THEME_3"}
headers = set(n.strip() for n in (reader.fieldnames or []))
if not required_cols.issubset(headers):
raise ValueError(
f"CSV must contain headers {sorted(required_cols)}. Found: {reader.fieldnames}"
)
for row in reader:
name = (row.get("NAME") or "").strip()
theme = html.unescape((row.get("THEME") or "").strip())
theme2 = html.unescape((row.get("THEME_2") or "").strip())
theme3 = html.unescape((row.get("THEME_3") or "").strip())
desc = f"Theme: {theme} | Theme 2: {theme2} | Theme 3: {theme3}"
self._store[name] = desc
self._raw.append({
"NAME": name,
"THEME": theme,
"THEME_2": theme2,
"THEME_3": theme3
})
self._ci_index = {name.lower(): name for name in self._store.keys()}
def get_ticker_and_name(self, stock):
"""GPT๋กœ ํ‹ฐ์ปค์™€ ๊ณต์‹ ์ข…๋ชฉ๋ช… ์กฐํšŒ"""
prompt = f"""
์•„๋ž˜ ์ข…๋ชฉ๋ช…์— ๋Œ€ํ•ด Yahoo Finance ๊ธฐ์ค€:
1) ํ‹ฐ์ปค(symbol)
2) ๊ณต์‹ ์ข…๋ชฉ๋ช…(full name)
๋‹จ, ์ค‘์š”ํ•œ ์กฐ๊ฑด:
- ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ข…๋ชฉ๋ช…์ด ํ•œ๊ตญ์–ด๋ผ๋ฉด โ†’ ๊ณต์‹ ์ข…๋ชฉ๋ช…๋„ ํ•œ๊ตญ์–ด๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
- ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ข…๋ชฉ๋ช…์— ํ•œ๊ตญ์–ด๊ฐ€ ์กฐ๊ธˆ์ด๋ผ๋„ ํฌํ•จ๋˜์–ด ์žˆ๋‹ค๋ฉด โ†’ ๊ณต์‹ ์ข…๋ชฉ๋ช…๋„ ํ•œ๊ตญ์–ด๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
- ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ข…๋ชฉ๋ช…์ด ์˜์–ด๋ผ๋ฉด โ†’ ๊ณต์‹ ์ข…๋ชฉ๋ช…๋„ ์˜์–ด๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
- ํ•œ๊ตญ ์ข…๋ชฉ์€ '.KS', '.KQ' ๋“ฑ KRX ํ˜•์‹์˜ ํ‹ฐ์ปค.
- ํ•ด์™ธ ์ข…๋ชฉ(๋ฏธ๊ตญ/์œ ๋Ÿฝ/ํ™์ฝฉ ๋“ฑ)์€ ํ•ด๋‹น ์‹œ์žฅ์˜ ์ผ๋ฐ˜ ํ‹ฐ์ปค๋ฅผ ์‚ฌ์šฉ.
์˜ˆ์‹œ:
์‚ฌ์šฉ์ž ์ž…๋ ฅ: 'SKํ•˜์ด๋‹‰์Šค' โ†’ {{
"ticker": "000660.KS",
"name": "SKํ•˜์ด๋‹‰์Šค"
}}
์‚ฌ์šฉ์ž ์ž…๋ ฅ: 'SKHynix' โ†’ {{
"ticker": "000660.KS",
"name": "SK Hynix Inc."
}}
์‚ฌ์šฉ์ž ์ž…๋ ฅ: 'ํ˜„๋Œ€์ž๋™์ฐจ' โ†’ {{
"ticker": "000660.KS",
"name": "ํ˜„๋Œ€์ฐจ"
}}
์ข…๋ชฉ๋ช…: {stock}
์˜ค์ง JSON์œผ๋กœ๋งŒ ๋‹ต๋ณ€ํ•˜์„ธ์š”.
"""
resp = client.responses.create(model="gpt-5.1", input=prompt)
try:
parsed = json.loads(resp.output_text)
return parsed.get("ticker"), parsed.get("name")
except:
print("[ERROR] GPT ํ‹ฐ์ปค/ํ’€๋„ค์ž„ JSON ํŒŒ์‹ฑ ์‹คํŒจ")
return None, None
def find_existing_row(self, full_name):
"""CSV ๋‚ด ๊ธฐ์กด ์ข…๋ชฉ ์—ฌ๋ถ€ ํ™•์ธ"""
return next(
(r for r in self._raw if r["NAME"].lower() == full_name.lower()),
None
)
def generate_theme_gpt(self, stock_name):
"""GPT๋กœ ํ…Œ๋งˆ 3๊ฐœ ์ƒ์„ฑ"""
prompt = f"""
์ข…๋ชฉ '{stock_name}'๊ณผ ๊ด€๋ จ๋œ ํˆฌ์ž ํ…Œ๋งˆ 3๊ฐœ ์ถ”์ฒœ
๊ด„ํ˜ธ ์—†์ด ๊ฐ„๋‹จํ•˜๊ฒŒ ํ•ต์‹ฌ ํ…Œ๋งˆ ํ‘œํ˜„
- ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ข…๋ชฉ๋ช…์ด ํ•œ๊ตญ์–ด๋ผ๋ฉด โ†’ ํ…Œ๋งˆ๋„ ํ•œ๊ตญ์–ด๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
- ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ข…๋ชฉ๋ช…์— ํ•œ๊ตญ์–ด๊ฐ€ ์กฐ๊ธˆ์ด๋ผ๋„ ํฌํ•จ๋˜์–ด ์žˆ๋‹ค๋ฉด โ†’ ํ…Œ๋งˆ๋„ ํ•œ๊ตญ์–ด๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
- ์‚ฌ์šฉ์ž๊ฐ€ ์ž…๋ ฅํ•œ ์ข…๋ชฉ๋ช…์ด ์˜์–ด๋ผ๋ฉด โ†’ ํ…Œ๋งˆ๋„ ์˜์–ด๋กœ ๋ฐ˜ํ™˜ํ•˜์„ธ์š”.
JSON ํ˜•์‹์œผ๋กœ ์ถœ๋ ฅ: {{"theme": "...", "theme2": "...", "theme3": "..."}}
"""
try:
response = client.responses.create(model="gpt-5.1", input=prompt)
return json.loads(response.output_text)
except:
print(f"[WARN] GPT ํ…Œ๋งˆ ์ƒ์„ฑ ์‹คํŒจ({stock_name})")
return {"theme": "", "theme2": "", "theme3": ""}
def _save_to_csv(self, row):
"""๋‹จ์ผ row๋ฅผ CSV์— append ์ €์žฅ"""
file_exists = os.path.exists(self.path)
with open(self.path, "a", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(
f,
fieldnames=["NAME", "THEME", "THEME_2", "THEME_3"],
quoting=csv.QUOTE_ALL
)
if not file_exists:
writer.writeheader()
writer.writerow(row)
print(f"CSV append ์™„๋ฃŒ: {row['NAME']}")
def make(self, stock):
"""์ข…๋ชฉ๋ช… โ†’ ํ‹ฐ์ปค/๊ณต์‹๋ช… ์กฐํšŒ โ†’ ๊ธฐ์กด CSV ํ™•์ธ โ†’ GPT๋กœ ํ…Œ๋งˆ ์ƒ์„ฑ โ†’ CSV ์ €์žฅ"""
# 1) ํ‹ฐ์ปค + ๊ณต์‹๋ช… ์กฐํšŒ
ticker_symbol, full_name = self.get_ticker_and_name(stock)
if not ticker_symbol:
print("[ERROR] ํ‹ฐ์ปค ์กฐํšŒ ์‹คํŒจ")
return None
print(f"GPT ๊ฒฐ๊ณผ โ†’ ticker: {ticker_symbol}, full name: {full_name}")
# 2) ๊ธฐ์กด CSV ์ฒดํฌ
existing = self.find_existing_row(full_name)
if existing:
print(f"'{full_name}' ๊ธฐ์กด CSV ์กด์žฌ โ†’ ๊ธฐ์กด๊ฐ’ ๋ฐ˜ํ™˜")
return [existing]
# 3) GPT๋กœ ํ…Œ๋งˆ ์ƒ์„ฑ
theme_data = self.generate_theme_gpt(full_name)
theme = theme_data.get("theme", "").strip()
theme2 = theme_data.get("theme2", "").strip()
theme3 = theme_data.get("theme3", "").strip()
new_row = {
"NAME": full_name,
"THEME": theme,
"THEME_2": theme2,
"THEME_3": theme3
}
# CSV ์ €์žฅ
self._save_to_csv(new_row)
# _store ์—…๋ฐ์ดํŠธ
desc = f"Theme: {new_row['THEME']} | Theme 2: {new_row['THEME_2']} | Theme 3: {new_row['THEME_3']}"
self._store[full_name] = desc
self._raw.append(new_row)
print("[STEP] make() ์™„๋ฃŒ")
return [new_row]
def get(self, stock):
"""์ข…๋ชฉ๋ช…์œผ๋กœ description ์กฐํšŒ"""
if not stock or not isinstance(stock, str):
return None
key = stock.strip()
if key in self._store:
return self._store[key]
canonical = self._ci_index.get(key.lower())
if canonical:
return self._store[canonical]
# ๋ถ€๋ถ„ ์ผ์น˜ 1๊ฐœ๋งŒ ๋ฐ˜ํ™˜
candidates = [n for n in self._store if key.lower() in n.lower()]
if len(candidates) == 1:
return self._store[candidates[0]]
return None
def ensure_parent_dir(json_path):
"""ํŒŒ์ผ ์ €์žฅ ์ „ ์ƒ์œ„ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ"""
parent = os.path.dirname(os.path.abspath(json_path))
if parent and not os.path.exists(parent):
os.makedirs(parent, exist_ok=True)
def append_to_json(records, json_path, class_name="theme_info"):
"""records โ†’ JSON append ์ €์žฅ"""
ensure_parent_dir(json_path)
if os.path.exists(json_path):
try:
with open(json_path, "r", encoding="utf-8") as rf:
data = json.load(rf)
except:
print("[WARN] ๊ธฐ์กด JSON ๋กœ๋“œ ์‹คํŒจ. ์ƒˆ ํŒŒ์ผ ์ƒ์„ฑ")
data = {class_name: []}
else:
data = {class_name: []}
data.setdefault(class_name, [])
data[class_name].extend(records)
with open(json_path, "w", encoding="utf-8") as wf:
json.dump(data, wf, ensure_ascii=False, indent=2)
print(f"[5/5] {json_path} ์—…๋ฐ์ดํŠธ ์™„๋ฃŒ.\n")
def main():
parser = argparse.ArgumentParser(description="์ข…๋ชฉ์˜ ํ…Œ๋งˆ ๊ฒ€์ƒ‰ ๋„๊ตฌ")
parser.add_argument("--stock", default="Meta", nargs="+", help="์ข…๋ชฉ ์ž…๋ ฅ. ์—ฌ๋Ÿฌ ๊ฐœ๋Š” ๊ณต๋ฐฑ ๋˜๋Š” ์ฝค๋งˆ(,) ๋กœ ๊ตฌ๋ถ„ ๊ฐ€๋Šฅ")
parser.add_argument("--data", default="/work/portfolio/data/theme_info.csv")
parser.add_argument("--output", default="output.json")
args = parser.parse_args()
# --- 1. args ํ™•์ธ ---
stock_str = " ".join(args.stock)
stock_list = parse_stock_input(stock_str)
if not stock_list:
print("[ERROR] --stock์— ์œ ํšจํ•œ ์ข…๋ชฉ ์—†์Œ")
return
print("[1/5] ์ธ์ž(args) ํ™•์ธ.")
print(f"[INFO] --stock {args.stock}")
print(f"[INFO] --data {args.data}")
print(f"[INFO] --output {args.output}")
print(f"[INFO] ์ž…๋ ฅ๋œ ์ข…๋ชฉ ๋ฆฌ์ŠคํŠธ: {stock_list}\n")
# --- 2. ๊ฐ์ฒด ์ƒ์„ฑ ---
ti = theme_info(args.data)
print("[2/5] theme_info ๊ฐ์ฒด ์ƒ์„ฑ ์™„๋ฃŒ.\n")
# --- 3. make ํ•จ์ˆ˜ ์‹คํ–‰ ---
print("[3/5] make() ์‹คํ–‰.")
make_results = []
for stock in stock_list:
print(f"[INFO] ์ฒ˜๋ฆฌ ์ค‘: {stock}")
rows = ti.make(stock)
if rows:
make_results.extend(rows)
print()
# --- 4. get ํ•จ์ˆ˜ ์‹คํ–‰ ---
print("[4/5] get() ์‹คํ–‰.")
all_records = []
for row in make_results:
official_name = row["NAME"]
desc = ti.get(official_name)
all_records.append({"stock": official_name, "desc": desc})
print(f"[INFO] {official_name} ์กฐํšŒ ์™„๋ฃŒ")
print(f"{desc}")
print()
# --- 5. ์ €์žฅ ---
if all_records:
append_to_json(all_records, args.output, class_name="theme_info")
else:
print("[INFO] ์ €์žฅํ•  JSON ๋ฐ์ดํ„ฐ ์—†์Œ")
if __name__ == "__main__":
main()