Spaces:

Mpavan45
/

ITC_Financial_Analysis

Sleeping

App Files Files Community

Mpavan45 commited on May 8, 2025

Commit

b4aa4f6

verified ·

1 Parent(s): 04def97

Create app.py

Browse files

Files changed (1) hide show

app.py +132 -0

app.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import streamlit as st
+import os
+import zipfile
+from langchain.vectorstores import Chroma
+from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+from langchain.prompts import ChatPromptTemplate
+from langchain.schema.output_parser import StrOutputParser
+from langchain.schema.runnable import RunnableLambda
+# Page setup
+st.set_page_config(page_title="Financial QA - ITC Ltd.", layout="wide", initial_sidebar_state="expanded")
+# Custom CSS for enhanced UI
+st.markdown("""
+<style>
+    .main { background-color: #f8f9fa; }
+    .header { text-align: center; padding: 20px; background-color: #007bff; color: white; border-radius: 10px; }
+    .stTextInput>input { border-radius: 5px; padding: 10px; }
+    .stButton>button { background-color: #28a745; color: white; border-radius: 5px; padding: 10px; width: 100%; }
+    .answer-box { background-color: #e9ecef; border-radius: 10px; padding: 15px; margin-top: 10px; }
+    .source-expander { background-color: #f1f3f5; border-radius: 5px; }
+    .sidebar .stSelectbox { margin-bottom: 15px; }
+</style>
+""", unsafe_allow_html=True)
+# Header
+with st.container():
+    st.markdown('<div class="header">', unsafe_allow_html=True)
+    st.title("📊 Financial Q&A Chatbot (ITC Ltd.)")
+    st.markdown("Ask financial questions about ITC Ltd. based on transcript data, powered by AI.")
+    st.markdown('</div>', unsafe_allow_html=True)
+# Safe way to access secrets
+GOOGLE_API_KEY = "AIzaSyBm0GOvYox4OyRG1WFOK7FT5fnNCHfubns"
+# Initialize Chroma DB
+@st.cache_resource
+def initialize_vectorstore(api_key):
+    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
+    zip_path = "src/chroma_db1.zip"
+    extract_dir = "src/chroma_db2"
+    if os.path.exists(zip_path):
+        try:
+            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+                zip_ref.extractall(extract_dir)
+            vectorstore = Chroma(persist_directory=extract_dir, embedding_function=embedding)
+            if vectorstore._collection.count() > 0:
+                return vectorstore
+            else:
+                st.error("Chroma DB is empty after extraction.")
+        except Exception as e:
+            st.error(f"Failed to load Chroma DB: {str(e)}")
+    else:
+        st.error(f"`chroma_db1.zip` not found at {zip_path}")
+    return None
+retriever = None
+vectorstore = None
+llm, parser = None, None
+if GOOGLE_API_KEY:
+    vectorstore = initialize_vectorstore(GOOGLE_API_KEY)
+    if vectorstore:
+        retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 1})
+    llm = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY, model="gemini-1.5-flash", temperature=1)
+    parser = StrOutputParser()
+# Prompt template
+prompt = ChatPromptTemplate.from_messages([
+    ("system",
+     """You are a domain-specific AI financial analyst focused on company-level performance evaluation.
+Your task is to analyze and respond to user financial queries strictly based on the provided transcript data: {context}.
+Rules:
+1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
+2. If data is missing or partially available, clearly state: "The required data is not available in the current transcript." Then provide a generic but relevant explanation based on standard financial principles.
+3. Maintain numerical accuracy and avoid interpretation beyond data boundaries.
+4. Prioritize answers relevant to ITC Ltd., but keep response format adaptable to other firms and fiscal years.
+5. Clearly present year-wise or metric-wise insights using bullet points or structured formats if applicable.
+Your goals:
+- Ensure 100% fidelity to source transcript.
+- Do not assume or hallucinate missing numbers.
+- Use clear, reproducible reasoning steps (e.g., show which line items support your conclusion).
+- Output should be modular enough to scale across other companies and time periods.
+Respond only to this question from the user."""
+    ),
+    ("human", "{question}")
+])
+# Helper functions
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+def retrieve_and_answer(question):
+    if not retriever or not llm:
+        return "Cannot process query: Retriever or LLM not initialized.", []
+    docs = retriever.invoke(question)
+    context = format_docs(docs)
+    final_input = {"question": question, "context": context}
+    result = (prompt | llm | parser).invoke(final_input)
+    return result, docs
+# Query input form
+st.subheader("🔍 Ask a Financial Question")
+with st.form(key="query_form", clear_on_submit=True):
+    query = st.text_input("Enter your question about ITC's financials:", placeholder="e.g., What was ITC's revenue in FY 2023?")
+    submit_button = st.form_submit_button("Get Answer")
+if submit_button:
+    if not query.strip():
+        st.warning("Please enter a valid question.")
+    elif not GOOGLE_API_KEY:
+        st.error("Google API Key not configured. Set it in Hugging Face Secrets to proceed.")
+    else:
+        with st.spinner("Generating answer..."):
+            try:
+                answer, source_docs = retrieve_and_answer(query)
+                st.markdown('<div class="answer-box">', unsafe_allow_html=True)
+                st.markdown("### ✅ Answer")
+                st.markdown(answer)
+                st.markdown('</div>', unsafe_allow_html=True)
+                with st.expander("📄 Source Documents", expanded=False):
+                    if source_docs:
+                        for doc in source_docs:
+                            st.markdown(f"- **Source**: {doc.metadata.get('source', 'Unknown document')}")
+                            st.markdown(f"  **Content**: {doc.page_content}")
+                    else:
+                        st.write("No source documents found.")
+            except Exception as e:
+                st.error(f"Error processing query: {str(e)}")