Spaces:
Paused
Paused
File size: 16,419 Bytes
5c4e969 5f7fe63 f869c25 b666806 df159f4 339a70d df159f4 5c4e969 df159f4 156b204 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5fffe72 df159f4 5c4e969 df159f4 5c4e969 df159f4 c82dbab df159f4 1fcccb3 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 5c4e969 df159f4 b666806 df159f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 |
import os
os.environ['HF_HOME'] = '/tmp/hf_cache'
import streamlit as st
import pandas as pd
import plotly.express as px
import torch
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizerFast
import sqlite3
# 타 코드에서 모듈 불러오기
from analyze_portfolio_risk import classify_investment_style # 사용자 성향 파악
# ----------------------------------------------------------------------
# 0. (필수) LLM 모델 로드 및 NASDAQ100 리스트 준비
# ----------------------------------------------------------------------
BASE_PATH = 'src/data/'
BASE_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
ADAPTER_PATH = BASE_PATH + "earningcall"
DB_PATH = BASE_PATH + "news.db"
hf_token = os.environ.get("HF_TOKEN")
from huggingface_hub import login
login(token=hf_token)
@st.cache_data
def load_ticker_data():
TICKERS = pd.read_csv(BASE_PATH + 'ticker_list.csv')
TICKER_OPTIONS_LIST = TICKERS['display_name'].tolist()
DISPLAY_TO_TICKER_MAP = TICKERS.set_index('display_name')['Ticker'].to_dict()
TICKER_TO_PRICE_MAP = TICKERS.set_index('Ticker')['Price'].to_dict()
return TICKER_OPTIONS_LIST, DISPLAY_TO_TICKER_MAP, TICKER_TO_PRICE_MAP
@st.cache_data
def load_company_metrics():
nasdaq = pd.read_csv(BASE_PATH + 'NASDAQ100_metrics.csv')
nasdaq = nasdaq.set_index('Ticker', drop = False)
company = nasdaq.to_dict(orient='index')
return company
@st.cache_data
def load_full_df():
full_df = pd.read_csv(BASE_PATH + "us_market_metrics_sp500_nasdaq100.csv")
return full_df
@st.cache_resource # 모델처럼 무거운 객체는 캐시
def load_my_model():
base_model = LlamaForCausalLM.from_pretrained(
BASE_MODEL_NAME,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="balanced",)
tokenizer = LlamaTokenizerFast.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
peft_model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
return peft_model, tokenizer
def get_news_summaries_for_ticker(ticker_symbol: str):
conn = sqlite3.connect(DB_PATH)
query = """
SELECT summary
FROM articles
WHERE ticker = ?
AND summary IS NOT NULL AND LENGTH(TRIM(summary)) > 0
ORDER BY pubdate DESC
LIMIT 5
"""
cursor = conn.cursor()
rows = cursor.execute(query, (ticker_symbol.upper(),)).fetchall()
summaries = [row[0] for row in rows]
conn.close()
if not summaries:
return ["최근 뉴스 없음"]
return summaries
# 리포트 전체 생성
def generate_llm_reports(portfolio_list, override_style=None):
df = pd.DataFrame(portfolio_list)
df['total_value'] = df['quantity'] * df['price']
if override_style:
investor_style = override_style
st.toast(f"'{investor_style}' 스타일(수동)로 재생성 시작...")
else:
srisk, investor_style = classify_investment_style(full_market_df, portfolio_list)
st.toast(f"srisk: {srisk:.2f} '{investor_style}' 스타일(자동)로 생성 시작...")
reports = {}
for item in portfolio_list:
ticker = item['ticker']
if ticker in company:
report = generate_llm_report(investor_style, ticker)
reports[ticker] = report
else:
st.warning(f"{ticker} 종목은 NASDAQ100에 없어 건너뜁니다.")
continue
return reports
# LLM 리포트 생성 함수
def generate_llm_report(investor_style, ticker):
peft_model = st.session_state.peft_model
tokenizer = st.session_state.tokenizer
if not peft_model or not tokenizer:
st.error("모델이 로드되지 않았습니다. (generate_llm_report)")
return "오류: 모델 로드 실패"
company_data = str(company[ticker])
news = get_news_summaries_for_ticker(ticker)
news_text = "\n\n".join([f"- {s}" for s in news])
user_prompt = f"""Analyze all the provided data and generate a report tailored to the investor's profile.
1. Investor Style: {investor_style}
2. Company Under Review, Key Data from Corporate Filings:
{company_data}
3. Recent News (Last 10 Articles):
{news_text}
"""
system_prompt = """You are an expert financial analyst. Your mission is to write a concise, objective investment report for a client based on their specific risk profile.
ANALYSIS INSTRUCTIONS:
- Use BOTH the provided financial metrics ("Facts") and recent news headlines ("News").
- Always include the metrics listed under `core_metrics` in Facts. These are the most important indicators for the company/sector.
- Each Key Highlight should integrate at least one financial fact and one news item together (not listed separately).
- Do not hallucinate numbers that are not in Facts.
- Adjust the focus and tone strictly based on the investor's style:
If the style is SAFE (Conservative):
* Focus: Capital preservation and stable income.
* Highlight: Balance sheet strength, liquidity, predictable returns.
* Mention risks (regulatory, legal, earnings decline) first, then cautiously note positives.
* Downplay speculative or uncertain news.
If the style is NEUTRAL (Moderate):
* Focus: A balance between growth and safety.
* Highlight: Strategic trade-offs. Analyze how growth initiatives (from news) interact with financial stability (from facts).
* Present risks and opportunities in equal measure.
If the style is RISKY (Aggressive):
* Focus: High growth potential and maximum returns.
* Highlight: Exciting, forward-looking growth story. Emphasize innovation, expansion, competitive advantages.
* Frame risks as natural volatility on the path to high rewards.
* Place financial facts in the context of supporting aggressive growth.
OUTPUT REPORT TEMPLATE
Report for: A (investor_style) Investor
Company: (company_name)
1. Executive Summary:
(Provide a brief, one-paragraph summary that aligns with the investor's style, integrating at least one key metric and one recent news item.)
2. Key Analysis & Highlights:
(5–7 bullet points. Each bullet must combine a financial metric with a relevant news event, written from the perspective of the given investor style.)
3. Concluding Remark:
(One or two sentences, neutrally summarizing the company’s current standing for this type of investor.
Do NOT provide direct financial advice or buy/sell recommendations.)
IMPORTANT:
- Keep the tone professional and concise.
- Reports must be grounded in Facts and News only.
- Different investor styles should produce clearly differentiated tone and emphasis.
"""
message = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}]
tokens = tokenizer.apply_chat_template(message,tokenize=True,padding=True,add_generation_prompt=True, return_tensors="pt")
input_ids_length = tokens.shape[1]
with torch.no_grad():
res_base = peft_model.generate(tokens, max_new_tokens=1024)
result = tokenizer.decode(res_base[0, input_ids_length:], skip_special_tokens=True)
return result
# ----------------------------------------------------------------------
# 1. 세션 상태(Session State) 초기화
# ----------------------------------------------------------------------
# st.session_state : 스트림릿이 재실행되어도 값을 유지하는 마법의 변수
if 'portfolio' not in st.session_state:
st.session_state.portfolio = [] # 사용자의 포트폴리오를 저장할 리스트
if 'last_report' not in st.session_state:
st.session_state.last_report = None # 생성된 보고서를 저장할 변수
if 'peft_model' not in st.session_state:
st.session_state.peft_model = None
if 'tokenizer' not in st.session_state:
st.session_state.tokenizer = None
# ----------------------------------------------------------------------
# 2. 페이지 기본 설정
# ----------------------------------------------------------------------
st.set_page_config(page_title="AI 주식 포트폴리오 분석", layout="wide")
st.title("🤖 AI 주식 포트폴리오 보고서 생성기")
st.write("NASDAQ 100 종목을 검색하여 포트폴리오를 구성하고, 맞춤형 AI 보고서를 받아보세요.")
TICKER_OPTIONS_LIST, DISPLAY_TO_TICKER_MAP, TICKER_TO_PRICE_MAP = load_ticker_data()
company = load_company_metrics()
full_market_df = load_full_df() # (Srisk 모듈용 데이터)
# ----------------------------------------------------------------------
# 3. 입력 섹션 (종목 추가)
# ----------------------------------------------------------------------
st.subheader("1. 보유 종목 추가하기")
# 컬럼을 나눠서 UI를 깔끔하게 구성
col1, col2 = st.columns([2, 1])
with col1:
# `selectbox`를 검색 가능한 입력창으로 사용
selected_display = st.selectbox(
"종목 검색 (NASDAQ100 or S&P500 티커 또는 기업명)",
options=TICKER_OPTIONS_LIST,
index=None,
placeholder="티커를 검색하거나 선택하세요 (예: AAPL 또는 Apple)"
)
with col2:
quantity = st.number_input("보유 수량 (주)", min_value=0.01, step=0.1, format="%.2f")
# '종목 추가' 버튼
if st.button("➕ 포트폴리오에 추가", use_container_width=True):
selected_ticker = None
if selected_display:
selected_ticker = DISPLAY_TO_TICKER_MAP.get(selected_display)
current_price = TICKER_TO_PRICE_MAP.get(selected_ticker)
if selected_ticker:
st.session_state.portfolio.append({
"ticker": selected_ticker,
"quantity": quantity,
"price": current_price,
"total_value": quantity * current_price
})
st.success(f"{selected_ticker} {quantity}주 (현재가 ${current_price:,.2f})를 포트폴리오에 추가했습니다.")
else:
st.warning("종목, 수량을 모두 올바르게 입력하세요.")
# ----------------------------------------------------------------------
# 4. 포트폴리오 요약 및 보고서 생성 (스케치 레이아웃)
# ----------------------------------------------------------------------
st.subheader("2. 포트폴리오 요약 및 보고서 생성")
col_chart, col_controls = st.columns(2, gap="large")
with col_chart:
st.markdown("### 📊 포트폴리오 구성")
if st.session_state.portfolio:
df = pd.DataFrame(st.session_state.portfolio)
# Plotly 파이 차트 생성 (스케치와 유사하게)
fig = px.pie(
df,
values='total_value',
names='ticker',
hole=.3 # 도넛 차트 형태
)
fig.update_traces(textposition='inside', textinfo='percent+label')
st.plotly_chart(fig, use_container_width=True)
else:
st.info("종목을 추가하면 여기에 파이 차트가 표시됩니다.")
with col_controls:
st.markdown("### ✏️ 포트폴리오 수정 (삭제)")
if st.session_state.portfolio:
df = pd.DataFrame(st.session_state.portfolio)
edited_df = st.data_editor(
df,
column_config={
"ticker": st.column_config.TextColumn("티커", disabled=True),
"quantity": st.column_config.NumberColumn("수량", min_value=0.01, format="%.2f"),
"price": st.column_config.NumberColumn("현재가", disabled=True, format="$%.2f"),
"total_value": st.column_config.NumberColumn("총 가치", disabled=True, format="$%.2f"),
},
hide_index=True,
num_rows="dynamic",
key="portfolio_editor"
)
if not df.equals(edited_df):
# 삭제되거나 수정된 DataFrame을 다시 세션 상태(list of dicts)로 변환
st.session_state.portfolio = edited_df.to_dict('records')
st.toast("포트폴리오가 수정(삭제)되었습니다.")
st.rerun()
if st.button("🔄 포트폴리오 전체 초기화", use_container_width=True, type="secondary"):
st.session_state.portfolio = []
st.session_state.last_report = None
st.toast("포트폴리오가 초기화되었습니다.")
st.rerun()
# ----------------------------------------------------------------------
# 5. 보고서 생성 버튼 (메인 LLM 호출)
# ----------------------------------------------------------------------
if st.button("🚀 AI 보고서 생성하기", type="primary", use_container_width=True, disabled=(not st.session_state.portfolio)):
if st.session_state.peft_model and st.session_state.tokenizer:
with st.spinner("AI가 포트폴리오를 분석하고 보고서를 작성 중입니다..."):
generated_reports = generate_llm_reports(
st.session_state.portfolio)
st.session_state.last_report = generated_reports
else:
# 모델이 아직 로드 중일 때
st.warning("모델이 아직 로드 중입니다. 잠시 후 다시 시도해주세요.")
# ----------------------------------------------------------------------
# 6. 보고서 재생성 버튼 (메인 LLM 호출)
# ----------------------------------------------------------------------
if st.session_state.last_report:
st.markdown("##### 🔄 다른 성향으로 보고서 다시 뽑기")
col_style, col_regen = st.columns([3, 2])
with col_style:
new_style = st.selectbox(
"보고서 성향 선택",
["SAFE", "RISKY", "NEUTRAL"],
key="report_style_select",
label_visibility="collapsed" # 레이블 숨기기
)
with col_regen:
if st.button(f"'{new_style}' 스타일로 재생성", use_container_width=True):
with st.spinner(f"'{new_style}' 스타일로 보고서를 다시 작성 중입니다..."):
regenerated_reports = generate_llm_reports(
st.session_state.portfolio,
override_style=new_style
)
st.session_state.last_report = regenerated_reports
st.rerun() # 화면을 즉시 새로고침
# ----------------------------------------------------------------------
# 7. 보고서 출력 섹션
# ----------------------------------------------------------------------
st.divider()
if st.session_state.last_report:
st.subheader("📑 생성된 AI 보고서")
report_data = st.session_state.last_report
ordered_tickers = [item['ticker'] for item in st.session_state.portfolio if item['ticker'] in report_data]
ticker_tabs = st.tabs(ordered_tickers)
for i, ticker in enumerate(ordered_tickers):
with ticker_tabs[i]:
st.markdown(report_data[ticker]) # LLM이 생성한 마크다운 보고서 출력
else:
st.info("보고서를 생성하면 이 곳에 결과가 표시됩니다.")
# ----------------------------------------------------------------------
# 8. (신규) LLM 모델 로딩 (모든 UI를 그린 후 마지막에 실행)
# ----------------------------------------------------------------------
# peft_model, tokenizer를 st.session_state로 관리
if 'peft_model' not in st.session_state:
st.session_state.peft_model = None
if 'tokenizer' not in st.session_state:
st.session_state.tokenizer = None
# 세션에 모델이 없으면(최초 실행 시) 로드
if st.session_state.peft_model is None:
# (중요) UI를 먼저 그린 후, 스피너를 표시하며 모델 로드
with st.spinner("AI 분석 모델(LLM)을 로드 중입니다... (최초 실행 시 1-2분 소요)"):
st.session_state.peft_model, st.session_state.tokenizer = load_my_model()
# 로드가 완료되면 스피너를 없애기 위해 화면을 한 번 새로고침
st.rerun()
# 세션에 저장된 모델을 전역 변수처럼 사용
peft_model = st.session_state.peft_model
tokenizer = st.session_state.tokenizer
# (중요) 모델 로딩 실패 시 버튼 비활성화
if peft_model is None or tokenizer is None:
st.error("모델 로딩에 실패했습니다. 앱을 새로고침하거나 관리자에게 문의하세요.") |