Spaces:

Asish22
/

Sentinel_V2

Running

File size: 20,459 Bytes

5d2eba0

"""
features/research_report.py — AI-Generated Investment Research Reports
Uses LangGraph multi-agent pipeline: Fundamentals, News, Risk, Verdict agents.
Data from SEC EDGAR, Tavily, Alpha Vantage.
"""
import streamlit as st
import json
import re
import time
import requests
import logging
from typing import TypedDict, Dict, Any
from datetime import datetime
from functools import lru_cache

from langgraph.graph import StateGraph, END

logger = logging.getLogger("ResearchReport")

# ---------------------------------------------------------------------------
# SEC EDGAR — Dynamic CIK lookup (supports ALL US public companies)
# ---------------------------------------------------------------------------
SEC_HEADERS = {"User-Agent": "SentinelAI research@sentinel-ai.app", "Accept-Encoding": "gzip, deflate"}

_cik_cache: dict = {}  # in-memory cache: ticker -> CIK


def _get_cik_for_ticker(ticker: str) -> str | None:
    """Look up CIK number for any US public company ticker via SEC EDGAR."""
    global _cik_cache
    ticker = ticker.upper().strip()

    # Return from cache if available
    if ticker in _cik_cache:
        return _cik_cache[ticker]

    # Fetch the full SEC ticker→CIK mapping (cached after first call)
    if not _cik_cache:
        try:
            url = "https://www.sec.gov/files/company_tickers.json"
            resp = requests.get(url, headers=SEC_HEADERS, timeout=15)
            resp.raise_for_status()
            data = resp.json()
            for entry in data.values():
                t = str(entry.get("ticker", "")).upper()
                cik = str(entry.get("cik_str", "")).zfill(10)
                _cik_cache[t] = cik
            logger.info(f"Loaded {len(_cik_cache)} ticker→CIK mappings from SEC EDGAR")
        except Exception as e:
            logger.error(f"Failed to fetch SEC ticker mappings: {e}")
            return None

    return _cik_cache.get(ticker)


# ---------------------------------------------------------------------------
# Smart ticker resolution (supports company names AND ticker symbols)
# ---------------------------------------------------------------------------
# Common company names → tickers (fast path)
_COMMON_NAMES = {
    "AMAZON": "AMZN", "APPLE": "AAPL", "GOOGLE": "GOOGL", "ALPHABET": "GOOGL",
    "MICROSOFT": "MSFT", "TESLA": "TSLA", "NVIDIA": "NVDA", "META": "META",
    "FACEBOOK": "META", "NETFLIX": "NFLX", "AMD": "AMD", "INTEL": "INTC",
    "DISNEY": "DIS", "WALMART": "WMT", "JPMORGAN": "JPM", "GOLDMAN": "GS",
    "BERKSHIRE": "BRK-B", "VISA": "V", "MASTERCARD": "MA", "PAYPAL": "PYPL",
    "UBER": "UBER", "AIRBNB": "ABNB", "SNOWFLAKE": "SNOW", "PALANTIR": "PLTR",
    "COINBASE": "COIN", "SPOTIFY": "SPOT", "SHOPIFY": "SHOP", "SALESFORCE": "CRM",
    "ORACLE": "ORCL", "IBM": "IBM", "CISCO": "CSCO", "ADOBE": "ADBE",
    "BOEING": "BA", "FORD": "F", "GM": "GM", "TOYOTA": "TM",
    "COCA-COLA": "KO", "COCACOLA": "KO", "PEPSI": "PEP", "NIKE": "NKE",
    "STARBUCKS": "SBUX", "MCDONALDS": "MCD", "PFIZER": "PFE", "JOHNSON": "JNJ",
    "EXXON": "XOM", "CHEVRON": "CVX", "COSTCO": "COST", "TARGET": "TGT",
    "BROADCOM": "AVGO", "QUALCOMM": "QCOM", "MICRON": "MU", "RIVIAN": "RIVN",
    "ROBINHOOD": "HOOD", "SOFI": "SOFI", "BLOCK": "SQ", "SQUARE": "SQ",
}

# Name-to-ticker cache from SEC EDGAR
_name_to_ticker_cache: dict = {}


def _resolve_ticker(user_input: str) -> str:
    """Resolve user input (company name or ticker) to a valid ticker symbol."""
    global _name_to_ticker_cache
    cleaned = user_input.upper().strip()

    # 1. Check if it's already a valid short ticker (1-5 chars, all alpha)
    if len(cleaned) <= 5 and cleaned.replace("-", "").isalpha():
        # Verify it exists in SEC data (if cache is loaded)
        if _cik_cache and cleaned in _cik_cache:
            return cleaned
        # If cache is empty, trust the user
        if not _cik_cache:
            return cleaned

    # 2. Fast path: common names
    if cleaned in _COMMON_NAMES:
        logger.info(f"Resolved '{user_input}' → '{_COMMON_NAMES[cleaned]}' (common name)")
        return _COMMON_NAMES[cleaned]

    # 3. Check partial matches in common names
    for name, ticker in _COMMON_NAMES.items():
        if name in cleaned or cleaned in name:
            logger.info(f"Resolved '{user_input}' → '{ticker}' (partial match: {name})")
            return ticker

    # 4. Search SEC EDGAR company names (lazy load)
    if not _name_to_ticker_cache:
        try:
            url = "https://www.sec.gov/files/company_tickers.json"
            resp = requests.get(url, headers=SEC_HEADERS, timeout=15)
            resp.raise_for_status()
            data = resp.json()
            for entry in data.values():
                name = str(entry.get("title", "")).upper()
                ticker = str(entry.get("ticker", "")).upper()
                _name_to_ticker_cache[name] = ticker
        except Exception as e:
            logger.warning(f"SEC name lookup failed: {e}")

    # Exact match on SEC company name
    if cleaned in _name_to_ticker_cache:
        resolved = _name_to_ticker_cache[cleaned]
        logger.info(f"Resolved '{user_input}' → '{resolved}' (SEC EDGAR exact)")
        return resolved

    # Partial match on SEC company name
    for name, ticker in _name_to_ticker_cache.items():
        if cleaned in name:
            logger.info(f"Resolved '{user_input}' → '{ticker}' (SEC EDGAR partial: {name})")
            return ticker

    # 5. Fallback: return as-is (user probably typed a valid ticker we don't have cached)
    logger.warning(f"Could not resolve '{user_input}' — using as-is")
    return cleaned


# ---------------------------------------------------------------------------
# SEC EDGAR filing fetcher
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# LangGraph state
# ---------------------------------------------------------------------------
class ReportState(TypedDict):
    ticker: str
    sec_data: Dict[str, Any]
    fundamentals_output: str
    news_output: str
    risk_output: str
    verdict_output: str
    final_report: Dict[str, str]


def _fetch_sec_filings(ticker: str) -> dict:
    """Fetch company filing metadata from SEC EDGAR (supports ALL tickers)."""
    cik = _get_cik_for_ticker(ticker.upper())
    if not cik:
        return {"error": f"CIK not found for {ticker}. SEC data unavailable."}
    url = f"https://data.sec.gov/submissions/CIK{cik}.json"
    try:
        resp = requests.get(url, headers=SEC_HEADERS, timeout=15)
        resp.raise_for_status()
        data = resp.json()
        company_name = data.get("name", ticker)
        recent = data.get("filings", {}).get("recent", {})
        forms = recent.get("form", [])
        dates = recent.get("filingDate", [])
        descriptions = recent.get("primaryDocDescription", [])
        accession = recent.get("accessionNumber", [])
        # Get last 10-K and 10-Q
        filings_summary = []
        for i, form in enumerate(forms[:50]):
            if form in ("10-K", "10-Q", "8-K"):
                filings_summary.append({
                    "form": form,
                    "date": dates[i] if i < len(dates) else "N/A",
                    "description": descriptions[i] if i < len(descriptions) else "",
                    "accession": accession[i] if i < len(accession) else "",
                })
        return {"company_name": company_name, "filings": filings_summary[:10]}
    except Exception as e:
        logger.error(f"SEC EDGAR fetch failed: {e}")
        return {"error": str(e)}


# ---------------------------------------------------------------------------
# Build the LangGraph pipeline
# ---------------------------------------------------------------------------
def _build_report_pipeline():
    from features.utils import call_gemini, run_tavily_search, fetch_stock_data, fetch_company_overview, fetch_global_quote

    def fundamentals_agent(state: ReportState):
        ticker = state["ticker"]
        sec = state.get("sec_data", {})

        # Fetch company fundamentals (Revenue, EPS, P/E, Margins, Market Cap)
        overview_data = {}
        try:
            overview_result = fetch_company_overview(ticker)
            overview_data = overview_result.get("data", {})
            overview_source = overview_result.get("source", "Unknown")
        except Exception as e:
            logger.warning(f"Company overview fetch failed: {e}")
            overview_source = "Unavailable"

        # Fetch real-time price quote
        quote_data = {}
        try:
            quote_result = fetch_global_quote(ticker)
            quote_data = quote_result.get("data", {})
        except Exception as e:
            logger.warning(f"Global quote fetch failed: {e}")

        # Build a rich data summary for the LLM
        financials_summary = f"""
Company: {overview_data.get('Name', ticker)} ({overview_data.get('Symbol', ticker)})
Sector: {overview_data.get('Sector', 'N/A')} | Industry: {overview_data.get('Industry', 'N/A')}
Description: {overview_data.get('Description', 'N/A')[:300]}

--- FINANCIAL METRICS (Source: {overview_source}) ---
Market Cap: ${overview_data.get('MarketCapitalization', 'N/A')}
Revenue (TTM): ${overview_data.get('RevenueTTM', 'N/A')}
Gross Profit (TTM): ${overview_data.get('GrossProfitTTM', 'N/A')}
EPS: ${overview_data.get('EPS', 'N/A')}
P/E Ratio: {overview_data.get('PERatio', 'N/A')}
Forward P/E: {overview_data.get('ForwardPE', 'N/A')}
Profit Margin: {overview_data.get('ProfitMargin', 'N/A')}
Operating Margin: {overview_data.get('OperatingMarginTTM', 'N/A')}
Return on Equity: {overview_data.get('ReturnOnEquityTTM', 'N/A')}
Revenue Per Share: ${overview_data.get('RevenuePerShareTTM', 'N/A')}
Book Value: ${overview_data.get('BookValue', 'N/A')}
Price to Book: {overview_data.get('PriceToBookRatio', 'N/A')}
Dividend Yield: {overview_data.get('DividendYield', 'N/A')}
Beta: {overview_data.get('Beta', 'N/A')}

--- GROWTH ---
Quarterly Earnings Growth (YoY): {overview_data.get('QuarterlyEarningsGrowthYOY', 'N/A')}
Quarterly Revenue Growth (YoY): {overview_data.get('QuarterlyRevenueGrowthYOY', 'N/A')}

--- PRICE DATA ---
Current Price: ${quote_data.get('price', 'N/A')}
Today's Change: {quote_data.get('change', 'N/A')} ({quote_data.get('change_percent', 'N/A')})
Today's Open: ${quote_data.get('open', 'N/A')}
Today's High: ${quote_data.get('high', 'N/A')}
Today's Low: ${quote_data.get('low', 'N/A')}
Volume: {quote_data.get('volume', 'N/A')}
Previous Close: ${quote_data.get('previous_close', 'N/A')}
52-Week High: ${overview_data.get('52WeekHigh', 'N/A')}
52-Week Low: ${overview_data.get('52WeekLow', 'N/A')}
50-Day MA: ${overview_data.get('50DayMovingAverage', 'N/A')}
200-Day MA: ${overview_data.get('200DayMovingAverage', 'N/A')}

--- ANALYST CONSENSUS ---
Target Price: ${overview_data.get('AnalystTargetPrice', 'N/A')}
Buy Ratings: {overview_data.get('AnalystRatingBuy', 'N/A')}
Hold Ratings: {overview_data.get('AnalystRatingHold', 'N/A')}
Sell Ratings: {overview_data.get('AnalystRatingSell', 'N/A')}
"""

        prompt = f"""You are a financial fundamentals analyst. Analyze {ticker}.

{financials_summary}

SEC Filings Summary: {json.dumps(sec.get('filings', [])[:5], indent=2)}

Based on ALL the data above, provide:
1. Business overview (2-3 sentences)
2. Key financial metrics analysis — use the ACTUAL numbers provided (Revenue, EPS, Margins, P/E, etc.)
3. Year-over-year growth assessment using the quarterly growth data
4. A markdown table of key metrics with their actual values
5. Valuation assessment (is it overvalued/undervalued based on P/E, P/B, analyst targets?)

Use the real numbers. Be specific and data-driven."""

        result = call_gemini(prompt, "You are a senior equity research analyst specializing in fundamental analysis.")
        return {"fundamentals_output": result}

    def news_agent(state: ReportState):
        ticker = state["ticker"]
        try:
            search_result = run_tavily_search(f"{ticker} stock news last 30 days analysis")
            articles = []
            for qr in search_result.get("data", []):
                for r in qr.get("results", []):
                    articles.append(f"- **{r.get('title', '')}**: {r.get('content', '')[:200]}...")
            news_text = "\n".join(articles[:8]) if articles else "No recent news found."
        except Exception:
            news_text = "News search unavailable."

        prompt = f"""Summarize the last 30 days of news for {ticker}:

{news_text}

Provide:
1. Overall news sentiment (Bullish/Bearish/Neutral)
2. Top 3-5 key headlines with brief explanations
3. Any catalysts or upcoming events mentioned
Be concise and factual."""
        result = call_gemini(prompt, "You are a financial news analyst summarizing market intelligence.")
        return {"news_output": result}

    def risk_agent(state: ReportState):
        ticker = state["ticker"]
        sec = state.get("sec_data", {})
        filings_text = json.dumps(sec.get("filings", []), indent=2)

        try:
            search_result = run_tavily_search(f"{ticker} 10-K risk factors annual report risks")
            risk_articles = []
            for qr in search_result.get("data", []):
                for r in qr.get("results", []):
                    risk_articles.append(r.get("content", "")[:300])
            risk_text = "\n".join(risk_articles[:5])
        except Exception:
            risk_text = "Risk search unavailable."

        prompt = f"""You are a risk analyst. Identify key risk factors for {ticker}.

SEC Filing History: {filings_text}
Risk-Related Research: {risk_text}

Provide:
1. Top 5 risk factors (ranked by severity)
2. Risk category for each (Operational, Financial, Regulatory, Market, Competitive)
3. Brief mitigation outlook for each
Format as a numbered list."""
        result = call_gemini(prompt, "You are a senior risk analyst at a major investment bank.")
        return {"risk_output": result}

    def verdict_agent(state: ReportState):
        prompt = f"""You are the lead analyst writing the final investment verdict for {state['ticker']}.

FUNDAMENTALS ANALYSIS:
{state.get('fundamentals_output', 'N/A')}

NEWS & SENTIMENT:
{state.get('news_output', 'N/A')}

RISK ASSESSMENT:
{state.get('risk_output', 'N/A')}

Based on ALL the above analysis, provide:
1. **Recommendation**: Buy / Hold / Sell (with conviction level: High/Medium/Low)
2. **Price Target**: Estimated 12-month price target with brief methodology
3. **Bull Case** (2-3 sentences)
4. **Bear Case** (2-3 sentences)
5. **Key Catalysts to Watch** (3-5 bullet points)

Be specific and data-driven. Reference specific findings from the analysis above."""
        result = call_gemini(prompt, "You are a senior investment strategist issuing a formal recommendation.")
        return {"verdict_output": result}

    def compile_report(state: ReportState):
        return {
            "final_report": {
                "executive_summary": f"Research report for **{state['ticker']}** generated on {datetime.now().strftime('%Y-%m-%d %H:%M')}.",
                "fundamentals": state.get("fundamentals_output", ""),
                "news": state.get("news_output", ""),
                "risks": state.get("risk_output", ""),
                "verdict": state.get("verdict_output", ""),
            }
        }

    workflow = StateGraph(ReportState)
    workflow.add_node("fundamentals", fundamentals_agent)
    workflow.add_node("news", news_agent)
    workflow.add_node("risk", risk_agent)
    workflow.add_node("verdict", verdict_agent)
    workflow.add_node("compile", compile_report)

    workflow.set_entry_point("fundamentals")
    workflow.add_edge("fundamentals", "news")
    workflow.add_edge("news", "risk")
    workflow.add_edge("risk", "verdict")
    workflow.add_edge("verdict", "compile")
    workflow.add_edge("compile", END)

    return workflow.compile()


# ---------------------------------------------------------------------------
# Cached report generation
# ---------------------------------------------------------------------------
@lru_cache(maxsize=128)
def generate_report(ticker: str) -> dict:
    # Resolve company names to ticker symbols
    resolved = _resolve_ticker(ticker)
    sec_data = _fetch_sec_filings(resolved)
    pipeline = _build_report_pipeline()
    result = pipeline.invoke({"ticker": resolved.upper(), "sec_data": sec_data})
    report = result.get("final_report", {})
    report["_resolved_ticker"] = resolved.upper()
    return report


# ---------------------------------------------------------------------------
# Streamlit page renderer
# ---------------------------------------------------------------------------
def render_research_report():
    st.markdown("## 🌳💰 AI-Generated Research Report")
    st.caption("Generate a comprehensive, multi-agent investment research report for any stock. "
               "Powered by SEC EDGAR, Tavily news search, Alpha Vantage, and Google Gemini.")

    col1, col2 = st.columns([3, 1])
    with col1:
        ticker = st.text_input("Enter Ticker or Company Name:", placeholder="e.g. AAPL, Tesla, Amazon, NVDA", key="rr_ticker").strip()
    with col2:
        st.markdown("<br>", unsafe_allow_html=True)
        generate_btn = st.button("🔬 Generate Report", use_container_width=True, key="rr_generate")

    if generate_btn and ticker:
        with st.status("🚀 Multi-Agent Research Pipeline Active...", expanded=True) as status:
            status.write("📡 Fetching SEC filings...")
            time.sleep(0.5)
            status.write("🔬 FundamentalsAgent analyzing financials...")
            status.write("📰 NewsAgent scanning last 30 days...")
            status.write("⚠️ RiskAgent evaluating risk factors...")
            status.write("🎯 VerdictAgent synthesizing recommendation...")

            try:
                report = generate_report(ticker)
                resolved = report.get("_resolved_ticker", ticker.upper())
                st.session_state["rr_report"] = report
                st.session_state["rr_display_ticker"] = resolved
                if resolved != ticker.upper():
                    status.write(f"🔄 Resolved '{ticker}' → {resolved}")
                status.update(label=f"✅ Report Complete for {resolved}!", state="complete", expanded=False)
            except Exception as e:
                status.update(label="❌ Pipeline Error", state="error")
                st.error(f"Failed to generate report: {e}")
                return

    # Display report
    report = st.session_state.get("rr_report")
    if report:
        ticker_display = st.session_state.get("rr_display_ticker", "")
        st.markdown(f"### 🌳💰 Research Report: **{ticker_display}**")
        st.info(report.get("executive_summary", ""))

        st.subheader("📋 Business Overview & Financial Health")
        st.markdown(report.get('fundamentals', 'No data available.'))
        st.markdown("---")

        st.subheader("📰 Recent News & Sentiment")
        st.markdown(report.get('news', 'No data available.'))
        st.markdown("---")

        st.subheader("⚠️ Risk Factors")
        st.markdown(report.get('risks', 'No data available.'))
        st.markdown("---")

        st.subheader("🎯 Analyst Verdict & Price Target")
        st.markdown(report.get('verdict', 'No data available.'))

        # PDF Download
        st.markdown("---")
        if st.button("📥 Download as PDF", key="rr_pdf"):
            from features.utils import export_to_pdf
            sections = [
                {"title": "Executive Summary", "body": report.get("executive_summary", "")},
                {"title": "Business Overview & Financial Health", "body": report.get("fundamentals", "")},
                {"title": "Recent News & Sentiment", "body": report.get("news", "")},
                {"title": "Risk Factors", "body": report.get("risks", "")},
                {"title": "Analyst Verdict & Price Target", "body": report.get("verdict", "")},
            ]
            pdf_bytes = export_to_pdf(sections, f"{ticker_display}_report.pdf")
            st.download_button(
                label="⬇️ Download PDF",
                data=pdf_bytes,
                file_name=f"{ticker_display}_Research_Report.pdf",
                mime="application/pdf",
                key="rr_pdf_dl",
            )