File size: 5,906 Bytes
0262917
5f37344
 
5eabeeb
d05a579
 
c07da2a
d05a579
 
565bb53
 
 
5f37344
 
 
 
 
 
 
 
 
 
 
 
d05a579
5f37344
 
 
 
 
 
d05a579
866cc2f
 
84b23b5
866cc2f
0262917
5f37344
 
5dc5c1e
 
0f4d876
 
5f37344
 
 
 
 
55bc799
5f37344
 
55bc799
5f37344
55bc799
5f37344
55bc799
 
5f37344
55bc799
5dc5c1e
 
4806e78
5dc5c1e
 
 
 
 
 
5f37344
 
d05a579
5f37344
55bc799
 
 
 
 
 
 
 
 
 
 
 
 
5dc5c1e
 
d05a579
c07da2a
 
5f37344
d05a579
 
c07da2a
d05a579
5f37344
 
4c1f03d
d05a579
 
 
 
 
55bc799
5f37344
 
 
 
d05a579
55bc799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import os
import zipfile
from langchain.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda

# Page setup
st.set_page_config(page_title="Financial QA - ITC Ltd.", layout="wide", initial_sidebar_state="expanded")

# Custom CSS for enhanced UI
st.markdown("""
<style>
    .main { background-color: #f8f9fa; }
    .header { text-align: center; padding: 20px; background-color: #007bff; color: white; border-radius: 10px; }
    .stTextInput>input { border-radius: 5px; padding: 10px; }
    .stButton>button { background-color: #28a745; color: white; border-radius: 5px; padding: 10px; width: 100%; }
    .answer-box { background-color: #e9ecef; border-radius: 10px; padding: 15px; margin-top: 10px; }
    .source-expander { background-color: #f1f3f5; border-radius: 5px; }
    .sidebar .stSelectbox { margin-bottom: 15px; }
</style>
""", unsafe_allow_html=True)

# Header
with st.container():
    st.markdown('<div class="header">', unsafe_allow_html=True)
    st.title("πŸ“Š Financial Q&A Chatbot (ITC Ltd.)")
    st.markdown("Ask financial questions about ITC Ltd. based on transcript data, powered by AI.")
    st.markdown('</div>', unsafe_allow_html=True)


# Safe way to access secrets
GOOGLE_API_KEY = "AIzaSyBm0GOvYox4OyRG1WFOK7FT5fnNCHfubns"


# Initialize Chroma DB
@st.cache_resource
def initialize_vectorstore(api_key):
    embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
    zip_path = "src/chroma_db1.zip"
    extract_dir = "src/chroma_db2"
    if os.path.exists(zip_path):
        try:
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(extract_dir)
            vectorstore = Chroma(persist_directory=extract_dir, embedding_function=embedding)
            if vectorstore._collection.count() > 0:
                return vectorstore
            else:
                st.error("Chroma DB is empty after extraction.")
        except Exception as e:
            st.error(f"Failed to load Chroma DB: {str(e)}")
    else:
        st.error(f"`chroma_db1.zip` not found at {zip_path}")
    return None

retriever = None
vectorstore = None
llm, parser = None, None

if GOOGLE_API_KEY:
    vectorstore = initialize_vectorstore(GOOGLE_API_KEY)
    if vectorstore:
        retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 1})
    llm = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY, model="gemini-1.5-flash", temperature=1)
    parser = StrOutputParser()

# Prompt template
prompt = ChatPromptTemplate.from_messages([
    ("system",
     """You are a domain-specific AI financial analyst focused on company-level performance evaluation.
Your task is to analyze and respond to user financial queries strictly based on the provided transcript data: {context}.
Rules:
1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
2. If data is missing or partially available, clearly state: "The required data is not available in the current transcript." Then provide a generic but relevant explanation based on standard financial principles.
3. Maintain numerical accuracy and avoid interpretation beyond data boundaries.
4. Prioritize answers relevant to ITC Ltd., but keep response format adaptable to other firms and fiscal years.
5. Clearly present year-wise or metric-wise insights using bullet points or structured formats if applicable.
Your goals:
- Ensure 100% fidelity to source transcript.
- Do not assume or hallucinate missing numbers.
- Use clear, reproducible reasoning steps (e.g., show which line items support your conclusion).
- Output should be modular enough to scale across other companies and time periods.
Respond only to this question from the user."""
    ),
    ("human", "{question}")
])

# Helper functions
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def retrieve_and_answer(question):
    if not retriever or not llm:
        return "Cannot process query: Retriever or LLM not initialized.", []
    docs = retriever.invoke(question)
    context = format_docs(docs)
    final_input = {"question": question, "context": context}
    result = (prompt | llm | parser).invoke(final_input)
    return result, docs

# Query input form
st.subheader("πŸ” Ask a Financial Question")
with st.form(key="query_form", clear_on_submit=True):
    query = st.text_input("Enter your question about ITC's financials:", placeholder="e.g., What was ITC's revenue in FY 2023?")
    submit_button = st.form_submit_button("Get Answer")

if submit_button:
    if not query.strip():
        st.warning("Please enter a valid question.")
    elif not GOOGLE_API_KEY:
        st.error("Google API Key not configured. Set it in Hugging Face Secrets to proceed.")
    else:
        with st.spinner("Generating answer..."):
            try:
                answer, source_docs = retrieve_and_answer(query)
                st.markdown('<div class="answer-box">', unsafe_allow_html=True)
                st.markdown("### βœ… Answer")
                st.markdown(answer)
                st.markdown('</div>', unsafe_allow_html=True)

                with st.expander("πŸ“„ Source Documents", expanded=False):
                    if source_docs:
                        for doc in source_docs:
                            st.markdown(f"- **Source**: {doc.metadata.get('source', 'Unknown document')}")
                            st.markdown(f"  **Content**: {doc.page_content}")
                    else:
                        st.write("No source documents found.")
            except Exception as e:
                st.error(f"Error processing query: {str(e)}")