Spaces:

Mpavan45
/

ITC_Financial_Analysis

Sleeping

App Files Files Community

Mpavan45 commited on May 8, 2025

Commit

d05a579

verified ·

1 Parent(s): 76d0d38

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +62 -141

src/streamlit_app.py CHANGED Viewed

@@ -1,89 +1,36 @@
 import streamlit as st
-import zipfile
 import os
-from langchain_community.vectorstores import Chroma
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_core.messages import HumanMessage, AIMessage
-from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
-from langchain.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
 from langchain.schema.output_parser import StrOutputParser
-# --- Streamlit Setup ---
-st.set_page_config(page_title="📊 ITC Financial Analyst AI", layout="wide")
-st.markdown("""
-    <style>
-        .main { background-color: #f9f9f9; }
-        .block-container {
-            padding-top: 2rem;
-            padding-bottom: 2rem;
-        }
-        .stChatMessage {
-            background-color: #ffffff;
-            border: 1px solid #e0e0e0;
-            padding: 1rem;
-            border-radius: 12px;
-            margin-bottom: 1rem;
-        }
-        .stButton button {
-            background-color: #FF6347 !important;
-            color: white !important;
-            border-radius: 8px !important;
-            font-weight: 600;
-        }
-        .source-box {
-            background-color: #f0f0f0;
-            border-left: 5px solid #555;
-            padding: 0.5rem;
-            margin-top: 0.5rem;
-            border-radius: 8px;
-            font-size: 0.9rem;
-        }
-    </style>
-""", unsafe_allow_html=True)
-st.title("📊 ITC Financial Analysis with AI-Powered Insights")
-# Chat history buffer
-memory_buffer = {"chat_history": []}
-# Sidebar - Clear chat
-st.sidebar.markdown("## 🛠️ Options")
-if st.sidebar.button("🔁 End Chat"):
-    memory_buffer["chat_history"] = []
-# Extract Chroma DB ZIP (only if not already extracted)
-zip_path = 'src/chroma_db1.zip'
-extract_path = 'chroma_db'
-if not os.path.exists(extract_path):
-    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-        zip_ref.extractall(extract_path)
-# Load embeddings & vector DB
-embedding = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
-vectorstore = Chroma(persist_directory='chroma_db', embedding_function=embedding)
-mmr_retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 1})
-# Document formatter
-def format_docs(docs):
-    return "\n\n".join(doc.page_content for doc in docs)
-def get_docs_and_context(question):
-    docs = mmr_retriever.get_relevant_documents(question)
-    return {"question": question, "docs": docs, "context": format_docs(docs)}
-# LLM + Prompt Setup
-parallel_chain = RunnableLambda(lambda x: {
-    "question": x["input"],
-    **get_docs_and_context(x["input"])
-})
-chat_prompt = ChatPromptTemplate.from_messages([
-    ("system",
      """
      You are a domain-specific AI financial analyst focused on company-level performance evaluation.
@@ -91,69 +38,43 @@ chat_prompt = ChatPromptTemplate.from_messages([
      Rules:
      1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
-     2. If data is *missing or partially available*, clearly state: "The required data is not available in the current transcript." Then provide a generic but relevant explanation based on standard financial principles.
-     3. Maintain numerical accuracy and avoid interpretation beyond data boundaries.
-     4. Prioritize answers relevant to *ITC Ltd.*, but keep response format adaptable to other firms and fiscal years.
-     5. Clearly present year-wise or metric-wise insights using bullet points or structured formats if applicable.
-     Your goals:
-     - Ensure 100% fidelity to source transcript.
-     - Do not assume or hallucinate missing numbers.
-     - Use clear, reproducible reasoning steps (e.g., show which line items support your conclusion).
-     - Output should be modular enough to scale across other companies and time periods.
-     Respond only to this question from the user.
      """),
-    MessagesPlaceholder(variable_name="chat_history", optional=True),
-    ("human", "{input}")
 ])
 GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
-llm = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY, model="gemini-2.0-flash-exp", temperature=1)
-parser = StrOutputParser()
-def get_history_from_buffer(_):
-    return memory_buffer['chat_history']
-runnable_get_history_from_buffer = RunnableLambda(get_history_from_buffer)
-main_chain = (
-    parallel_chain |
-    RunnableLambda(lambda x: {
-        "llm_input": {"input": x["question"], "context": x["context"]},
-        "docs": x["docs"]
-    }) |
-    RunnableLambda(lambda x: {
-        "result": (chat_prompt | llm | parser).invoke(x["llm_input"]),
-        "source_documents": x["docs"]
-    })
 )
-chain = RunnablePassthrough.assign(chat_history=runnable_get_history_from_buffer) | main_chain
-# --- Chat UI ---
-st.markdown("### 💬 Conversation")
-for msg in memory_buffer["chat_history"]:
-    role = "user" if isinstance(msg, HumanMessage) else "assistant"
-    with st.chat_message(role):
-        st.markdown(msg.content)
-# --- Input Section ---
-user_input = st.chat_input("Ask about ITC’s performance or any financial metric...")
-if user_input:
-    with st.chat_message("user"):
-        st.markdown(user_input)
-    memory_buffer["chat_history"].append(HumanMessage(content=user_input))
-    output = chain.invoke({"input": user_input})
-    ai_response = output["result"]
-    memory_buffer["chat_history"].append(AIMessage(content=ai_response))
-    with st.chat_message("assistant"):
-        st.markdown(ai_response)
-        if output.get("source_documents"):
-            st.markdown("**Sources:**")
-            for doc in output["source_documents"]:
-                source = doc.metadata.get("source", "Unknown document")
-                st.markdown(f"<div class='source-box'>📄 {source}</div>", unsafe_allow_html=True)

 import streamlit as st
 import os
+import zipfile
+from langchain_chroma import Chroma  # ✅ Updated import
+from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+from langchain.prompts import ChatPromptTemplate
 from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnableLambda
+import tempfile
+# === Page Setup ===
+st.set_page_config(page_title="Financial QA - ITC Ltd.", layout="wide")
+st.title("📊 Financial Q&A Chatbot (ITC Ltd.)")
+# === Step 1: Extract Chroma DB from zip ===
+def load_chroma_db():
+    with zipfile.ZipFile("chroma_db1.zip", 'r') as zip_ref:
+        temp_dir = tempfile.mkdtemp()
+        zip_ref.extractall(temp_dir)
+    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
+    return Chroma(persist_directory=temp_dir, embedding_function=embedding)
+vectorstore = load_chroma_db()
+# === Step 2: MMR Retriever ===
+retriever = vectorstore.as_retriever(
+    search_type="mmr",
+    search_kwargs={"k": 3, "lambda_mult": 1}
+)
+# === Step 3: Prompt Template ===
+prompt = ChatPromptTemplate.from_messages([
+    ("system",
      """
      You are a domain-specific AI financial analyst focused on company-level performance evaluation.
      Rules:
      1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
+     2. If data is *missing or partially available*, clearly state: "The required data is not available in the current transcript."
+     3. Do not assume or hallucinate values. Be transparent and evidence-driven.
+     4. Prioritize answers for ITC Ltd., but keep the structure reusable.
+     5. Use bullet points or structure year-wise/metric-wise data when appropriate.
      """),
+    ("human", "{question}")
 ])
+# === Step 4: LLM Setup ===
 GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
+llm = ChatGoogleGenerativeAI(
+    api_key=GOOGLE_API_KEY,
+    model="gemini-2.0-flash",
+    temperature=1
 )
+parser = StrOutputParser()
+# === Step 5: Helper Functions ===
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+def retrieve_and_answer(question):
+    docs = retriever.invoke(question)  # ✅ Updated to new `invoke()` method
+    context = format_docs(docs)
+    final_input = {"question": question, "context": context}
+    result = (prompt | llm | parser).invoke(final_input)
+    return result, docs
+# === Step 6: Streamlit UI ===
+query = st.text_input("🔍 Enter your financial question:", "")
+if st.button("Get Answer") and query.strip():
+    with st.spinner("Generating answer..."):
+        answer, source_docs = retrieve_and_answer(query)
+        st.markdown("### ✅ Answer")
+        st.markdown(answer)
+        st.markdown("### 📄 Source Documents")
+        for doc in source_docs:
+            st.write(doc.metadata)