Spaces:

molba2see
/

confer_tech2

Paused

File size: 16,419 Bytes

import os
os.environ['HF_HOME'] = '/tmp/hf_cache'

import streamlit as st
import pandas as pd
import plotly.express as px
import torch
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizerFast
import sqlite3

# 타 코드에서 모듈 불러오기
from analyze_portfolio_risk import classify_investment_style  # 사용자 성향 파악
# ----------------------------------------------------------------------
# 0. (필수) LLM 모델 로드 및 NASDAQ100 리스트 준비
# ----------------------------------------------------------------------

BASE_PATH = 'src/data/'
BASE_MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
ADAPTER_PATH = BASE_PATH + "earningcall"
DB_PATH = BASE_PATH + "news.db"

hf_token = os.environ.get("HF_TOKEN")
from huggingface_hub import login
login(token=hf_token)

@st.cache_data
def load_ticker_data():
    TICKERS = pd.read_csv(BASE_PATH + 'ticker_list.csv')
    TICKER_OPTIONS_LIST = TICKERS['display_name'].tolist()
    DISPLAY_TO_TICKER_MAP = TICKERS.set_index('display_name')['Ticker'].to_dict()
    TICKER_TO_PRICE_MAP = TICKERS.set_index('Ticker')['Price'].to_dict()
    return TICKER_OPTIONS_LIST, DISPLAY_TO_TICKER_MAP, TICKER_TO_PRICE_MAP

@st.cache_data
def load_company_metrics():
    nasdaq = pd.read_csv(BASE_PATH + 'NASDAQ100_metrics.csv')
    nasdaq = nasdaq.set_index('Ticker', drop = False)
    company = nasdaq.to_dict(orient='index')
    return company

@st.cache_data
def load_full_df():
    full_df = pd.read_csv(BASE_PATH + "us_market_metrics_sp500_nasdaq100.csv")
    return full_df

@st.cache_resource  # 모델처럼 무거운 객체는 캐시
def load_my_model():
    base_model = LlamaForCausalLM.from_pretrained(
    BASE_MODEL_NAME,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="balanced",)

    tokenizer = LlamaTokenizerFast.from_pretrained(BASE_MODEL_NAME, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    peft_model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
    return peft_model, tokenizer


def get_news_summaries_for_ticker(ticker_symbol: str):
    conn = sqlite3.connect(DB_PATH)
    query = """
    SELECT summary
    FROM articles
    WHERE ticker = ?
      AND summary IS NOT NULL AND LENGTH(TRIM(summary)) > 0
    ORDER BY pubdate DESC
    LIMIT 5
    """
    cursor = conn.cursor()

    rows = cursor.execute(query, (ticker_symbol.upper(),)).fetchall()
    summaries = [row[0] for row in rows]
    conn.close()
    if not summaries:
        return ["최근 뉴스 없음"]
    return summaries

# 리포트 전체 생성
def generate_llm_reports(portfolio_list, override_style=None):
    df = pd.DataFrame(portfolio_list)
    df['total_value'] = df['quantity'] * df['price']

    if override_style:
        investor_style = override_style
        st.toast(f"'{investor_style}' 스타일(수동)로 재생성 시작...")
    else:
        srisk, investor_style = classify_investment_style(full_market_df, portfolio_list)
        st.toast(f"srisk: {srisk:.2f} '{investor_style}' 스타일(자동)로 생성 시작...")

    reports = {}

    for item in portfolio_list:
        ticker = item['ticker']

        if ticker in company:
            report = generate_llm_report(investor_style, ticker)
            reports[ticker] = report
        else:
            st.warning(f"{ticker} 종목은 NASDAQ100에 없어 건너뜁니다.")
            continue

    return reports

# LLM 리포트 생성 함수
def generate_llm_report(investor_style, ticker):

    peft_model = st.session_state.peft_model
    tokenizer = st.session_state.tokenizer
    if not peft_model or not tokenizer:
        st.error("모델이 로드되지 않았습니다. (generate_llm_report)")
        return "오류: 모델 로드 실패"
    
    company_data = str(company[ticker])
    
    news = get_news_summaries_for_ticker(ticker)
    news_text = "\n\n".join([f"- {s}" for s in news])

    user_prompt = f"""Analyze all the provided data and generate a report tailored to the investor's profile.
    1. Investor Style: {investor_style}
    
    2. Company Under Review, Key Data from Corporate Filings:
    {company_data}
    
    3. Recent News (Last 10 Articles):
    {news_text}
    """
    system_prompt = """You are an expert financial analyst. Your mission is to write a concise, objective investment report for a client based on their specific risk profile.
    ANALYSIS INSTRUCTIONS: 
    - Use BOTH the provided financial metrics ("Facts") and recent news headlines ("News"). 
    - Always include the metrics listed under `core_metrics` in Facts. These are the most important indicators for the company/sector. 
    - Each Key Highlight should integrate at least one financial fact and one news item together (not listed separately). 
    - Do not hallucinate numbers that are not in Facts. 
    - Adjust the focus and tone strictly based on the investor's style: 
    If the style is SAFE (Conservative):
    * Focus: Capital preservation and stable income. 
    * Highlight: Balance sheet strength, liquidity, predictable returns. 
    * Mention risks (regulatory, legal, earnings decline) first, then cautiously note positives. 
    * Downplay speculative or uncertain news.
    If the style is NEUTRAL (Moderate):
    * Focus: A balance between growth and safety. 
    * Highlight: Strategic trade-offs. Analyze how growth initiatives (from news) interact with financial stability (from facts). 
    * Present risks and opportunities in equal measure. 
    If the style is RISKY (Aggressive):
    * Focus: High growth potential and maximum returns. 
    * Highlight: Exciting, forward-looking growth story. Emphasize innovation, expansion, competitive advantages. 
    * Frame risks as natural volatility on the path to high rewards. 
    * Place financial facts in the context of supporting aggressive growth. 
    OUTPUT REPORT TEMPLATE
    Report for: A (investor_style) Investor
    Company: (company_name)
    1. Executive Summary:
    (Provide a brief, one-paragraph summary that aligns with the investor's style, integrating at least one key metric and one recent news item.)
    2. Key Analysis & Highlights:
    (5–7 bullet points. Each bullet must combine a financial metric with a relevant news event, written from the perspective of the given investor style.)
    3. Concluding Remark:
    (One or two sentences, neutrally summarizing the company’s current standing for this type of investor. 
    Do NOT provide direct financial advice or buy/sell recommendations.)
    IMPORTANT:
    - Keep the tone professional and concise. 
    - Reports must be grounded in Facts and News only. 
    - Different investor styles should produce clearly differentiated tone and emphasis.
    """

    message = [
    {"role": "system", "content": system_prompt},
    {"role": "user", "content": user_prompt}]

    tokens = tokenizer.apply_chat_template(message,tokenize=True,padding=True,add_generation_prompt=True, return_tensors="pt")
    input_ids_length = tokens.shape[1]

    with torch.no_grad():
        res_base = peft_model.generate(tokens, max_new_tokens=1024)

    result = tokenizer.decode(res_base[0, input_ids_length:], skip_special_tokens=True)

    return result


# ----------------------------------------------------------------------
# 1. 세션 상태(Session State) 초기화
# ----------------------------------------------------------------------
# st.session_state : 스트림릿이 재실행되어도 값을 유지하는 마법의 변수
if 'portfolio' not in st.session_state:
    st.session_state.portfolio = []  # 사용자의 포트폴리오를 저장할 리스트
if 'last_report' not in st.session_state:
    st.session_state.last_report = None # 생성된 보고서를 저장할 변수

if 'peft_model' not in st.session_state:
    st.session_state.peft_model = None
if 'tokenizer' not in st.session_state:
    st.session_state.tokenizer = None

# ----------------------------------------------------------------------
# 2. 페이지 기본 설정
# ----------------------------------------------------------------------
st.set_page_config(page_title="AI 주식 포트폴리오 분석", layout="wide")
st.title("🤖 AI 주식 포트폴리오 보고서 생성기")
st.write("NASDAQ 100 종목을 검색하여 포트폴리오를 구성하고, 맞춤형 AI 보고서를 받아보세요.")

TICKER_OPTIONS_LIST, DISPLAY_TO_TICKER_MAP, TICKER_TO_PRICE_MAP = load_ticker_data()
company = load_company_metrics()
full_market_df = load_full_df() # (Srisk 모듈용 데이터)

# ----------------------------------------------------------------------
# 3. 입력 섹션 (종목 추가)
# ----------------------------------------------------------------------
st.subheader("1. 보유 종목 추가하기")

# 컬럼을 나눠서 UI를 깔끔하게 구성
col1, col2 = st.columns([2, 1])

with col1:
    # `selectbox`를 검색 가능한 입력창으로 사용
    selected_display = st.selectbox(
        "종목 검색 (NASDAQ100 or S&P500 티커 또는 기업명)",
        options=TICKER_OPTIONS_LIST,
        index=None,
        placeholder="티커를 검색하거나 선택하세요 (예: AAPL 또는 Apple)"
    )
with col2:
    quantity = st.number_input("보유 수량 (주)", min_value=0.01, step=0.1, format="%.2f")

# '종목 추가' 버튼
if st.button("➕ 포트폴리오에 추가", use_container_width=True):
    selected_ticker = None
    if selected_display:
        selected_ticker = DISPLAY_TO_TICKER_MAP.get(selected_display)
        current_price = TICKER_TO_PRICE_MAP.get(selected_ticker)

    if selected_ticker:
        st.session_state.portfolio.append({
            "ticker": selected_ticker,
            "quantity": quantity,
            "price": current_price,
            "total_value": quantity * current_price
        })
        st.success(f"{selected_ticker} {quantity}주 (현재가 ${current_price:,.2f})를 포트폴리오에 추가했습니다.")
    else:
        st.warning("종목, 수량을 모두 올바르게 입력하세요.")

# ----------------------------------------------------------------------
# 4. 포트폴리오 요약 및 보고서 생성 (스케치 레이아웃)
# ----------------------------------------------------------------------
st.subheader("2. 포트폴리오 요약 및 보고서 생성")

col_chart, col_controls = st.columns(2, gap="large")

with col_chart:
    st.markdown("### 📊 포트폴리오 구성")
    if st.session_state.portfolio:
        df = pd.DataFrame(st.session_state.portfolio)

        # Plotly 파이 차트 생성 (스케치와 유사하게)
        fig = px.pie(
            df, 
            values='total_value', 
            names='ticker', 
            hole=.3 # 도넛 차트 형태
        )
        fig.update_traces(textposition='inside', textinfo='percent+label')
        st.plotly_chart(fig, use_container_width=True)
        
    else:
        st.info("종목을 추가하면 여기에 파이 차트가 표시됩니다.")
    
with col_controls:
    st.markdown("### ✏️ 포트폴리오 수정 (삭제)")
    if st.session_state.portfolio:
        df = pd.DataFrame(st.session_state.portfolio)

        edited_df = st.data_editor(
            df,
            column_config={
                "ticker": st.column_config.TextColumn("티커", disabled=True),
                "quantity": st.column_config.NumberColumn("수량", min_value=0.01, format="%.2f"),
                "price": st.column_config.NumberColumn("현재가", disabled=True, format="$%.2f"),
                "total_value": st.column_config.NumberColumn("총 가치", disabled=True, format="$%.2f"),
            },
            hide_index=True,
            num_rows="dynamic",
            key="portfolio_editor"
        )

        if not df.equals(edited_df):
            # 삭제되거나 수정된 DataFrame을 다시 세션 상태(list of dicts)로 변환
            st.session_state.portfolio = edited_df.to_dict('records')
            st.toast("포트폴리오가 수정(삭제)되었습니다.")
            st.rerun()

    if st.button("🔄 포트폴리오 전체 초기화", use_container_width=True, type="secondary"):
        st.session_state.portfolio = []
        st.session_state.last_report = None
        st.toast("포트폴리오가 초기화되었습니다.")
        st.rerun()

# ----------------------------------------------------------------------
# 5. 보고서 생성 버튼 (메인 LLM 호출)
# ----------------------------------------------------------------------
    if st.button("🚀 AI 보고서 생성하기", type="primary", use_container_width=True, disabled=(not st.session_state.portfolio)):
        if st.session_state.peft_model and st.session_state.tokenizer:
            with st.spinner("AI가 포트폴리오를 분석하고 보고서를 작성 중입니다..."):
                generated_reports = generate_llm_reports(
                    st.session_state.portfolio)
                st.session_state.last_report = generated_reports
        else:
            # 모델이 아직 로드 중일 때
            st.warning("모델이 아직 로드 중입니다. 잠시 후 다시 시도해주세요.")


# ----------------------------------------------------------------------
# 6. 보고서 재생성 버튼 (메인 LLM 호출)
# ----------------------------------------------------------------------
    if st.session_state.last_report:
        st.markdown("##### 🔄 다른 성향으로 보고서 다시 뽑기")
        
        col_style, col_regen = st.columns([3, 2])
        
        with col_style:
            new_style = st.selectbox(
                "보고서 성향 선택",
                ["SAFE", "RISKY", "NEUTRAL"],
                key="report_style_select",
                label_visibility="collapsed" # 레이블 숨기기
            )
        
        with col_regen:
            if st.button(f"'{new_style}' 스타일로 재생성", use_container_width=True):
                with st.spinner(f"'{new_style}' 스타일로 보고서를 다시 작성 중입니다..."):
                    regenerated_reports = generate_llm_reports(
                        st.session_state.portfolio,
                        override_style=new_style
                    )
                    st.session_state.last_report = regenerated_reports
                    st.rerun() # 화면을 즉시 새로고침

# ----------------------------------------------------------------------
# 7. 보고서 출력 섹션
# ----------------------------------------------------------------------
st.divider()

if st.session_state.last_report:
    st.subheader("📑 생성된 AI 보고서")
    
    report_data = st.session_state.last_report
    ordered_tickers = [item['ticker'] for item in st.session_state.portfolio if item['ticker'] in report_data]
    ticker_tabs = st.tabs(ordered_tickers)
    
    for i, ticker in enumerate(ordered_tickers):
        with ticker_tabs[i]:
            st.markdown(report_data[ticker]) # LLM이 생성한 마크다운 보고서 출력
else:
    st.info("보고서를 생성하면 이 곳에 결과가 표시됩니다.")

# ----------------------------------------------------------------------
# 8. (신규) LLM 모델 로딩 (모든 UI를 그린 후 마지막에 실행)
# ----------------------------------------------------------------------

# peft_model, tokenizer를 st.session_state로 관리
if 'peft_model' not in st.session_state:
    st.session_state.peft_model = None
if 'tokenizer' not in st.session_state:
    st.session_state.tokenizer = None

# 세션에 모델이 없으면(최초 실행 시) 로드
if st.session_state.peft_model is None:
    # (중요) UI를 먼저 그린 후, 스피너를 표시하며 모델 로드
    with st.spinner("AI 분석 모델(LLM)을 로드 중입니다... (최초 실행 시 1-2분 소요)"):
        st.session_state.peft_model, st.session_state.tokenizer = load_my_model()
    
    # 로드가 완료되면 스피너를 없애기 위해 화면을 한 번 새로고침
    st.rerun()

# 세션에 저장된 모델을 전역 변수처럼 사용
peft_model = st.session_state.peft_model
tokenizer = st.session_state.tokenizer

# (중요) 모델 로딩 실패 시 버튼 비활성화
if peft_model is None or tokenizer is None:
    st.error("모델 로딩에 실패했습니다. 앱을 새로고침하거나 관리자에게 문의하세요.")