Spaces:

Aarya003
/

Financial-Analyst-Agent

Sleeping

App Files Files Community

Aarya003 commited on Feb 19

Commit

22055b7

verified ·

1 Parent(s): 6171369

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +135 -128

src/streamlit_app.py CHANGED Viewed

@@ -12,88 +12,81 @@ from llama_index.program.openai import OpenAIPydanticProgram
 from llama_index.llms.openai import OpenAI
 from llama_index.core.vector_stores import MetadataFilters, ExactMatchFilter
-# --- 1. CONFIGURATION ---
-st.set_page_config(page_title="Financial Agent (Strict Logic)", page_icon="📈", layout="wide")
 # Ensure keys exist
 if "OPENAI_API_KEY" not in os.environ:
-    st.error("❌ OPENAI_API_KEY missing.")
     st.stop()
-# --- 2. DATA MODELS (From your snippet) ---
 class AgentResponse(BaseModel):
-  """The AgentResponse class is a Pydantic model designed to structure the output of the financial agent. It ensures that every response from the agent contains not just the answer, but also the supporting evidence and lineage of data used.
-Attributes:
-answer (str): The final, synthesized natural language response generated by the LLM for the user.
-sources (List[str]): A list of high-level source names cited in the answer (e.g., "Tesla Inc 10-K", "Real-time Market Data"). This provides immediate transparency.
-context_used (List[str]): A list of the actual raw text chunks or data dictionaries retrieved from the tools (RAG or Market Data) and passed to the LLM. This is crucial for auditability and debugging."""
-  answer: str
-  sources: List[str]
-  context_used: List[str]
 class TickerExtraction(BaseModel):
-    """List of stock tickers."""
     symbols: List[str] = Field(description="List of stock tickers.")
 class RoutePrediction(BaseModel):
-    """Tools list"""
     tools: List[Literal["financial_rag", "market_data", "general_chat"]] = Field(description="Tools list")
 # --- 3. CACHED INITIALIZATION ---
 @st.cache_resource(show_spinner=False)
 def initialize_resources():
-    print("🔌 Initializing Agent...")
     Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0)
     Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
-    # --- CSV PATH FINDER ---
-    # We check ALL possible locations
     possible_paths = [
-        "nasdaq-listed.csv",                                      # Root directory
-        "src/nasdaq-listed.csv",                                  # Src folder
-        os.path.join(os.getcwd(), "nasdaq-listed.csv"),           # Current Working Directory
-        os.path.join(os.path.dirname(__file__), "nasdaq-listed.csv"), # Same folder as script
-        "../nasdaq-listed.csv"                                    # One level up
     ]
-    csv_path = None
-    for path in possible_paths:
-        if os.path.exists(path):
-            csv_path = path
-            print(f"✅ Found CSV at: {path}")
-            break
     if csv_path:
-        try:
-            nasdaq_df = pd.read_csv(csv_path)
-            nasdaq_df.columns = [c.strip() for c in nasdaq_df.columns]
-        except Exception as e:
-            st.error(f"CSV Corrupt: {e}")
-            nasdaq_df = pd.DataFrame()
     else:
-        st.error(f"❌ CRITICAL: 'nasdaq-listed.csv' not found. I looked in: {possible_paths}")
         nasdaq_df = pd.DataFrame()
-    # --- Connect to Pinecone ---
     try:
         api_key = os.environ.get("PINECONE_API_KEY")
         if not api_key: raise ValueError("Pinecone Key Missing")
         pc = Pinecone(api_key=api_key)
         index = VectorStoreIndex.from_vector_store(
             vector_store=PineconeVectorStore(pinecone_index=pc.Index("financial-rag-agent"))
         )
-    except Exception as e:
-        st.error(f"Pinecone Error: {e}")
-        return nasdaq_df, None
     return nasdaq_df, index
-# --- 4. HELPER FUNCTIONS (From your snippet) ---
 def get_symbol_from_csv(query_str: str, df) -> Optional[str]:
     if df.empty: return None
     query_str = query_str.strip().upper()
@@ -115,7 +108,7 @@ def get_tickers_from_query(query: str, index, df) -> List[str]:
         if not ticker and len(entity) <= 5: ticker = entity.upper()
         if ticker: valid_tickers.append(ticker)
-    if not valid_tickers:
         try:
             nodes = index.as_retriever(similarity_top_k=1).retrieve(query)
             if nodes and nodes[0].metadata.get("ticker"):
@@ -123,7 +116,7 @@ def get_tickers_from_query(query: str, index, df) -> List[str]:
         except: pass
     return list(set(valid_tickers))
-# --- 5. TOOLS (From your snippet) ---
 def get_market_data(query: str, index, df):
     tickers = get_tickers_from_query(query, index, df)
     if not tickers: return "No companies found."
@@ -138,9 +131,7 @@ def get_market_data(query: str, index, df):
                 "Market Cap": info.get('marketCap', 'N/A'),
                 "PE Ratio": info.get('trailingPE', 'N/A'),
                 "52w High": info.get('fiftyTwoWeekHigh', 'N/A'),
-                "52w Low": info.get('fiftyTwoWeekLow', 'N/A'),
                 "Volume": info.get('volume', 'N/A'),
-                "Currency": info.get('currency', 'USD')
             }
             results.append(str(data))
         except Exception as e:
@@ -158,7 +149,6 @@ def get_financial_rag(query: str, index, df):
             continue
         filters = MetadataFilters(filters=[ExactMatchFilter(key="ticker", value=ticker)])
-        # Using logic from your snippet (similarity_top_k=3)
         engine = index.as_query_engine(similarity_top_k=3, filters=filters)
         resp = engine.query(query)
@@ -169,29 +159,15 @@ def get_financial_rag(query: str, index, df):
     return payload
-# --- 6. AGENT LOGIC (From your snippet) ---
 def run_agent(user_query: str, index, df) -> AgentResponse:
-    # THE STRICT PROMPT YOU PROVIDED
     router_prompt = """
     Route the user query to the correct tool based on these strict definitions:
-    1. "financial_rag":
-       - Use for ANY question about a specific company's internal details.
-       - INCLUDES: Revenue, Profit, Income, CEO, Board Members, Risks, Strategy, Competitors, Legal Issues, History.
-       - Key Trigger: If the answer would be found in a PDF report or Wikipedia page, use this.
-    2. "market_data":
-       - Use ONLY for Real-Time Trading Metrics.
-       - INCLUDES: Current Price, Market Cap, PE Ratio, Trading Volume, 52-Week High/Low.
-       - EXCLUDES: Historical revenue or annual profit (Use financial_rag for those).
-    3. "general_chat":
-       - Use ONLY for non-business questions (e.g. "Hi", "Help").
-       - NEVER use this if a specific company (Tesla, Apple, Nvidia) is mentioned.
     Query: {query_str}
     """
     router = OpenAIPydanticProgram.from_defaults(
         output_cls=RoutePrediction,
         prompt_template_str=router_prompt,
@@ -216,47 +192,69 @@ def run_agent(user_query: str, index, df) -> AgentResponse:
         context_used.extend(res["raw_nodes"])
     final_prompt = f"""
-    You are a Wall Street Financial Analyst. Answer the user request using the provided context.
-    Context Data:
-    {results}
     Instructions:
     1. Compare Metrics if multiple companies are listed.
     2. Synthesize qualitative (Risks) and quantitative (Price) data.
-    3. Explicitly state if a report is missing.
-    4. Cite sources.
     User Query: {user_query}
     """
     response_text = Settings.llm.complete(final_prompt).text
-    return AgentResponse(
-        answer=response_text,
-        sources=list(set(sources)),
-        context_used=context_used
-    )
-# --- 7. STREAMLIT UI ---
-# Initialize Logic
 with st.sidebar:
-    st.title("🔧 System Status")
-    with st.spinner("Initializing Strict-Boundary Agent..."):
-        try:
-            nasdaq_df, pinecone_index = initialize_resources()
-            st.success("✅ Brain Loaded")
-            st.success(f"✅ {len(nasdaq_df)} Tickers Indexed")
-        except Exception as e:
-            st.error(f"Initialization Failed: {e}")
-            st.stop()
     st.markdown("---")
-    st.markdown("### 🎯 RAG Coverage")
-    st.code("AAPL\nTSLA\nNVDA")
-st.title("📈 Financial Agent (Strict Logic)")
 if "messages" not in st.session_state:
     st.session_state.messages = []
@@ -265,40 +263,49 @@ for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
         if "sources" in message:
-             with st.expander("📚 Sources & Context"):
                  st.write(message["sources"])
-                 for i, c in enumerate(message["context"][:3]): # Limit preview
-                     st.text(f"Snippet {i+1}: {str(c)[:300]}...")
-# Input Handler
-if prompt := st.chat_input("Enter query..."):
-    st.session_state.messages.append({"role": "user", "content": prompt})
     with st.chat_message("user"):
-        st.markdown(prompt)
     with st.chat_message("assistant"):
-        with st.status("🧠 Analyst is thinking...", expanded=True) as status:
             try:
-                # RUN THE SAVED LOGIC
-                response = run_agent(prompt, pinecone_index, nasdaq_df)
-                status.update(label="✅ Complete", state="complete", expanded=False)
-                st.markdown(response.answer)
-                # Audit Trail
-                with st.expander("🔍 Audit Trail (Full Context)"):
-                    st.write("**Sources:**", response.sources)
-                    st.write("**Raw Retrieval:**")
-                    for ctx in response.context_used:
-                        st.text(str(ctx))
-                st.session_state.messages.append({
-                    "role": "assistant",
-                    "content": response.answer,
-                    "sources": response.sources,
-                    "context": response.context_used
-                })
             except Exception as e:
                 st.error(f"Error: {e}")
-                status.update(label="❌ Error", state="error")

 from llama_index.llms.openai import OpenAI
 from llama_index.core.vector_stores import MetadataFilters, ExactMatchFilter
+# --- 1. PAGE CONFIGURATION ---
+st.set_page_config(
+    page_title="Wall St. AI Analyst",
+    page_icon="🏛️",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for a cleaner look
+st.markdown("""
+    <style>
+    .stButton>button {
+        width: 100%;
+        border-radius: 5px;
+        height: 3em;
+        background-color: #f0f2f6;
+    }
+    .reportview-container {
+        background: #ffffff;
+    }
+    </style>
+""", unsafe_allow_html=True)
 # Ensure keys exist
 if "OPENAI_API_KEY" not in os.environ:
+    st.error("❌ OPENAI_API_KEY missing. Please check Space Settings.")
     st.stop()
+# --- 2. DATA MODELS ---
 class AgentResponse(BaseModel):
+    answer: str
+    sources: List[str]
+    context_used: List[str]
 class TickerExtraction(BaseModel):
     symbols: List[str] = Field(description="List of stock tickers.")
 class RoutePrediction(BaseModel):
     tools: List[Literal["financial_rag", "market_data", "general_chat"]] = Field(description="Tools list")
 # --- 3. CACHED INITIALIZATION ---
 @st.cache_resource(show_spinner=False)
 def initialize_resources():
     Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0)
     Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
+    # Locate CSV
     possible_paths = [
+        "nasdaq-listed.csv", "src/nasdaq-listed.csv",
+        os.path.join(os.getcwd(), "nasdaq-listed.csv"),
+        os.path.join(os.path.dirname(__file__), "nasdaq-listed.csv"),
+        "../nasdaq-listed.csv"
     ]
+    csv_path = next((p for p in possible_paths if os.path.exists(p)), None)
     if csv_path:
+        nasdaq_df = pd.read_csv(csv_path)
+        nasdaq_df.columns = [c.strip() for c in nasdaq_df.columns]
     else:
         nasdaq_df = pd.DataFrame()
+    # Connect to Pinecone
     try:
         api_key = os.environ.get("PINECONE_API_KEY")
         if not api_key: raise ValueError("Pinecone Key Missing")
         pc = Pinecone(api_key=api_key)
         index = VectorStoreIndex.from_vector_store(
             vector_store=PineconeVectorStore(pinecone_index=pc.Index("financial-rag-agent"))
         )
+    except:
+        index = None
     return nasdaq_df, index
+# --- 4. HELPER FUNCTIONS ---
 def get_symbol_from_csv(query_str: str, df) -> Optional[str]:
     if df.empty: return None
     query_str = query_str.strip().upper()
         if not ticker and len(entity) <= 5: ticker = entity.upper()
         if ticker: valid_tickers.append(ticker)
+    if not valid_tickers and index:
         try:
             nodes = index.as_retriever(similarity_top_k=1).retrieve(query)
             if nodes and nodes[0].metadata.get("ticker"):
         except: pass
     return list(set(valid_tickers))
+# --- 5. TOOLS ---
 def get_market_data(query: str, index, df):
     tickers = get_tickers_from_query(query, index, df)
     if not tickers: return "No companies found."
                 "Market Cap": info.get('marketCap', 'N/A'),
                 "PE Ratio": info.get('trailingPE', 'N/A'),
                 "52w High": info.get('fiftyTwoWeekHigh', 'N/A'),
                 "Volume": info.get('volume', 'N/A'),
             }
             results.append(str(data))
         except Exception as e:
             continue
         filters = MetadataFilters(filters=[ExactMatchFilter(key="ticker", value=ticker)])
         engine = index.as_query_engine(similarity_top_k=3, filters=filters)
         resp = engine.query(query)
     return payload
+# --- 6. AGENT LOGIC ---
 def run_agent(user_query: str, index, df) -> AgentResponse:
     router_prompt = """
     Route the user query to the correct tool based on these strict definitions:
+    1. "financial_rag": Company internal details (Revenue, Risks, Strategy, CEO).
+    2. "market_data": Real-Time Trading Metrics (Price, PE, Volume) ONLY.
+    3. "general_chat": Non-business questions.
     Query: {query_str}
     """
     router = OpenAIPydanticProgram.from_defaults(
         output_cls=RoutePrediction,
         prompt_template_str=router_prompt,
         context_used.extend(res["raw_nodes"])
     final_prompt = f"""
+    You are a Wall Street Financial Analyst. Answer using the provided context.
+    Context Data: {results}
     Instructions:
     1. Compare Metrics if multiple companies are listed.
     2. Synthesize qualitative (Risks) and quantitative (Price) data.
+    3. Cite sources.
     User Query: {user_query}
     """
     response_text = Settings.llm.complete(final_prompt).text
+    return AgentResponse(answer=response_text, sources=list(set(sources)), context_used=context_used)
+# --- 7. UI LOGIC ---
 with st.sidebar:
+    st.image("https://img.icons8.com/color/96/000000/bullish.png", width=80)
+    st.title("System Status")
+    with st.spinner("Connecting to Wall St..."):
+        nasdaq_df, pinecone_index = initialize_resources()
+    if not nasdaq_df.empty:
+        st.success(f"✅ Market Data: {len(nasdaq_df):,} Tickers")
+    else:
+        st.warning("⚠️ Market Data: Offline")
+    if pinecone_index:
+        st.success("✅ Knowledge Base: Online")
+    else:
+        st.error("❌ Knowledge Base: Offline")
     st.markdown("---")
+    st.markdown("### 🧠 Capabilities")
+    st.info("**Deep Dive (10-K Reports)**")
+    st.markdown("- 🍎 Apple (AAPL)\n- 🚗 Tesla (TSLA)\n- 🎮 Nvidia (NVDA)")
+    st.caption("*Ask about Strategy, Risks, Revenue*")
+    st.info("**Live Market Data**")
+    st.markdown("- 🌍 All NASDAQ Companies")
+    st.caption("*Ask about Price, PE Ratio, Volume*")
+    st.markdown("---")
+    if st.button("🧹 Clear Conversation"):
+        st.session_state.messages = []
+        st.rerun()
+# Main Hero Section
+st.title("🏛️ Wall St. AI Analyst")
+st.markdown("""
+**Your Hybrid Financial Assistant.** I bridge the gap between **Real-Time Market Data** and **Deep 10-K Analysis**.
+""")
+# Quick Start Buttons
+col1, col2, col3 = st.columns(3)
+if col1.button("🆚 Compare Risks"):
+    prompt = "Compare the supply chain risks of Apple and Tesla."
+elif col2.button("📊 Apple vs Nvidia Revenue"):
+    prompt = "Compare the revenue growth of Apple and Nvidia."
+elif col3.button("📈 Tesla PE & Price"):
+    prompt = "What is the current price and PE ratio of Tesla?"
+else:
+    prompt = None
+# Chat State
 if "messages" not in st.session_state:
     st.session_state.messages = []
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
         if "sources" in message:
+             with st.expander("📚 Data Sources & Citations"):
                  st.write(message["sources"])
+                 st.divider()
+                 for i, c in enumerate(message["context"][:2]):
+                     st.caption(f"**Context Fragment {i+1}:**")
+                     st.text(str(c)[:500] + "...")
+# Handle Input (Button or Text)
+if user_input := st.chat_input("Ask a financial question...") or prompt:
+    # If button was clicked, override text input
+    final_query = prompt if prompt else user_input
+    st.session_state.messages.append({"role": "user", "content": final_query})
     with st.chat_message("user"):
+        st.markdown(final_query)
     with st.chat_message("assistant"):
+        # Status container (collapsible)
+        with st.status("🧠 Analyzing 10-Ks and Market Data...", expanded=True) as status:
             try:
+                response = run_agent(final_query, pinecone_index, nasdaq_df)
+                status.update(label="✅ Analysis Complete", state="complete", expanded=False)
             except Exception as e:
                 st.error(f"Error: {e}")
+                status.update(label="❌ Error", state="error")
+                st.stop()
+        # ANSWER DISPLAY (Now OUTSIDE the status block so it auto-shows)
+        st.markdown(response.answer)
+        # Sources (Collapsible)
+        with st.expander("🔍 Audit Trail (Read the Source Data)"):
+            st.markdown("### 📚 Cited Sources")
+            st.write(response.sources)
+            st.divider()
+            st.markdown("### 📄 Raw Context Snippets")
+            for ctx in response.context_used:
+                st.text(str(ctx))
+        # Save to history
+        st.session_state.messages.append({
+            "role": "assistant",
+            "content": response.answer,
+            "sources": response.sources,
+            "context": response.context_used
+        })