Mpavan45 commited on
Commit
b4aa4f6
Β·
verified Β·
1 Parent(s): 04def97

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -0
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import zipfile
4
+ from langchain.vectorstores import Chroma
5
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
6
+ from langchain.prompts import ChatPromptTemplate
7
+ from langchain.schema.output_parser import StrOutputParser
8
+ from langchain.schema.runnable import RunnableLambda
9
+
10
+ # Page setup
11
+ st.set_page_config(page_title="Financial QA - ITC Ltd.", layout="wide", initial_sidebar_state="expanded")
12
+
13
+ # Custom CSS for enhanced UI
14
+ st.markdown("""
15
+ <style>
16
+ .main { background-color: #f8f9fa; }
17
+ .header { text-align: center; padding: 20px; background-color: #007bff; color: white; border-radius: 10px; }
18
+ .stTextInput>input { border-radius: 5px; padding: 10px; }
19
+ .stButton>button { background-color: #28a745; color: white; border-radius: 5px; padding: 10px; width: 100%; }
20
+ .answer-box { background-color: #e9ecef; border-radius: 10px; padding: 15px; margin-top: 10px; }
21
+ .source-expander { background-color: #f1f3f5; border-radius: 5px; }
22
+ .sidebar .stSelectbox { margin-bottom: 15px; }
23
+ </style>
24
+ """, unsafe_allow_html=True)
25
+
26
+ # Header
27
+ with st.container():
28
+ st.markdown('<div class="header">', unsafe_allow_html=True)
29
+ st.title("πŸ“Š Financial Q&A Chatbot (ITC Ltd.)")
30
+ st.markdown("Ask financial questions about ITC Ltd. based on transcript data, powered by AI.")
31
+ st.markdown('</div>', unsafe_allow_html=True)
32
+
33
+
34
+ # Safe way to access secrets
35
+ GOOGLE_API_KEY = "AIzaSyBm0GOvYox4OyRG1WFOK7FT5fnNCHfubns"
36
+
37
+
38
+ # Initialize Chroma DB
39
+ @st.cache_resource
40
+ def initialize_vectorstore(api_key):
41
+ embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
42
+ zip_path = "src/chroma_db1.zip"
43
+ extract_dir = "src/chroma_db2"
44
+ if os.path.exists(zip_path):
45
+ try:
46
+ with zipfile.ZipFile(zip_path, 'r') as zip_ref:
47
+ zip_ref.extractall(extract_dir)
48
+ vectorstore = Chroma(persist_directory=extract_dir, embedding_function=embedding)
49
+ if vectorstore._collection.count() > 0:
50
+ return vectorstore
51
+ else:
52
+ st.error("Chroma DB is empty after extraction.")
53
+ except Exception as e:
54
+ st.error(f"Failed to load Chroma DB: {str(e)}")
55
+ else:
56
+ st.error(f"`chroma_db1.zip` not found at {zip_path}")
57
+ return None
58
+
59
+ retriever = None
60
+ vectorstore = None
61
+ llm, parser = None, None
62
+
63
+ if GOOGLE_API_KEY:
64
+ vectorstore = initialize_vectorstore(GOOGLE_API_KEY)
65
+ if vectorstore:
66
+ retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 1})
67
+ llm = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY, model="gemini-1.5-flash", temperature=1)
68
+ parser = StrOutputParser()
69
+
70
+ # Prompt template
71
+ prompt = ChatPromptTemplate.from_messages([
72
+ ("system",
73
+ """You are a domain-specific AI financial analyst focused on company-level performance evaluation.
74
+ Your task is to analyze and respond to user financial queries strictly based on the provided transcript data: {context}.
75
+ Rules:
76
+ 1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
77
+ 2. If data is missing or partially available, clearly state: "The required data is not available in the current transcript." Then provide a generic but relevant explanation based on standard financial principles.
78
+ 3. Maintain numerical accuracy and avoid interpretation beyond data boundaries.
79
+ 4. Prioritize answers relevant to ITC Ltd., but keep response format adaptable to other firms and fiscal years.
80
+ 5. Clearly present year-wise or metric-wise insights using bullet points or structured formats if applicable.
81
+ Your goals:
82
+ - Ensure 100% fidelity to source transcript.
83
+ - Do not assume or hallucinate missing numbers.
84
+ - Use clear, reproducible reasoning steps (e.g., show which line items support your conclusion).
85
+ - Output should be modular enough to scale across other companies and time periods.
86
+ Respond only to this question from the user."""
87
+ ),
88
+ ("human", "{question}")
89
+ ])
90
+
91
+ # Helper functions
92
+ def format_docs(docs):
93
+ return "\n\n".join(doc.page_content for doc in docs)
94
+
95
+ def retrieve_and_answer(question):
96
+ if not retriever or not llm:
97
+ return "Cannot process query: Retriever or LLM not initialized.", []
98
+ docs = retriever.invoke(question)
99
+ context = format_docs(docs)
100
+ final_input = {"question": question, "context": context}
101
+ result = (prompt | llm | parser).invoke(final_input)
102
+ return result, docs
103
+
104
+ # Query input form
105
+ st.subheader("πŸ” Ask a Financial Question")
106
+ with st.form(key="query_form", clear_on_submit=True):
107
+ query = st.text_input("Enter your question about ITC's financials:", placeholder="e.g., What was ITC's revenue in FY 2023?")
108
+ submit_button = st.form_submit_button("Get Answer")
109
+
110
+ if submit_button:
111
+ if not query.strip():
112
+ st.warning("Please enter a valid question.")
113
+ elif not GOOGLE_API_KEY:
114
+ st.error("Google API Key not configured. Set it in Hugging Face Secrets to proceed.")
115
+ else:
116
+ with st.spinner("Generating answer..."):
117
+ try:
118
+ answer, source_docs = retrieve_and_answer(query)
119
+ st.markdown('<div class="answer-box">', unsafe_allow_html=True)
120
+ st.markdown("### βœ… Answer")
121
+ st.markdown(answer)
122
+ st.markdown('</div>', unsafe_allow_html=True)
123
+
124
+ with st.expander("πŸ“„ Source Documents", expanded=False):
125
+ if source_docs:
126
+ for doc in source_docs:
127
+ st.markdown(f"- **Source**: {doc.metadata.get('source', 'Unknown document')}")
128
+ st.markdown(f" **Content**: {doc.page_content}")
129
+ else:
130
+ st.write("No source documents found.")
131
+ except Exception as e:
132
+ st.error(f"Error processing query: {str(e)}")