Mpavan45 commited on
Commit
d05a579
Β·
verified Β·
1 Parent(s): 76d0d38

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +62 -141
src/streamlit_app.py CHANGED
@@ -1,89 +1,36 @@
1
  import streamlit as st
2
- import zipfile
3
  import os
4
-
5
- from langchain_community.vectorstores import Chroma
6
- from langchain_google_genai import ChatGoogleGenerativeAI
7
- from langchain_core.messages import HumanMessage, AIMessage
8
- from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
9
- from langchain.prompts.chat import ChatPromptTemplate, MessagesPlaceholder
10
  from langchain.schema.output_parser import StrOutputParser
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- # --- Streamlit Setup ---
13
- st.set_page_config(page_title="πŸ“Š ITC Financial Analyst AI", layout="wide")
14
-
15
- st.markdown("""
16
- <style>
17
- .main { background-color: #f9f9f9; }
18
- .block-container {
19
- padding-top: 2rem;
20
- padding-bottom: 2rem;
21
- }
22
- .stChatMessage {
23
- background-color: #ffffff;
24
- border: 1px solid #e0e0e0;
25
- padding: 1rem;
26
- border-radius: 12px;
27
- margin-bottom: 1rem;
28
- }
29
- .stButton button {
30
- background-color: #FF6347 !important;
31
- color: white !important;
32
- border-radius: 8px !important;
33
- font-weight: 600;
34
- }
35
- .source-box {
36
- background-color: #f0f0f0;
37
- border-left: 5px solid #555;
38
- padding: 0.5rem;
39
- margin-top: 0.5rem;
40
- border-radius: 8px;
41
- font-size: 0.9rem;
42
- }
43
- </style>
44
- """, unsafe_allow_html=True)
45
-
46
- st.title("πŸ“Š ITC Financial Analysis with AI-Powered Insights")
47
-
48
- # Chat history buffer
49
- memory_buffer = {"chat_history": []}
50
-
51
- # Sidebar - Clear chat
52
- st.sidebar.markdown("## πŸ› οΈ Options")
53
- if st.sidebar.button("πŸ” End Chat"):
54
- memory_buffer["chat_history"] = []
55
-
56
- # Extract Chroma DB ZIP (only if not already extracted)
57
-
58
- zip_path = 'src/chroma_db1.zip'
59
- extract_path = 'chroma_db'
60
-
61
- if not os.path.exists(extract_path):
62
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
63
- zip_ref.extractall(extract_path)
64
-
65
-
66
- # Load embeddings & vector DB
67
- embedding = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
68
- vectorstore = Chroma(persist_directory='chroma_db', embedding_function=embedding)
69
- mmr_retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 1})
70
-
71
- # Document formatter
72
- def format_docs(docs):
73
- return "\n\n".join(doc.page_content for doc in docs)
74
-
75
- def get_docs_and_context(question):
76
- docs = mmr_retriever.get_relevant_documents(question)
77
- return {"question": question, "docs": docs, "context": format_docs(docs)}
78
-
79
- # LLM + Prompt Setup
80
- parallel_chain = RunnableLambda(lambda x: {
81
- "question": x["input"],
82
- **get_docs_and_context(x["input"])
83
- })
84
-
85
- chat_prompt = ChatPromptTemplate.from_messages([
86
- ("system",
87
  """
88
  You are a domain-specific AI financial analyst focused on company-level performance evaluation.
89
 
@@ -91,69 +38,43 @@ chat_prompt = ChatPromptTemplate.from_messages([
91
 
92
  Rules:
93
  1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
94
- 2. If data is *missing or partially available*, clearly state: "The required data is not available in the current transcript." Then provide a generic but relevant explanation based on standard financial principles.
95
- 3. Maintain numerical accuracy and avoid interpretation beyond data boundaries.
96
- 4. Prioritize answers relevant to *ITC Ltd.*, but keep response format adaptable to other firms and fiscal years.
97
- 5. Clearly present year-wise or metric-wise insights using bullet points or structured formats if applicable.
98
-
99
- Your goals:
100
- - Ensure 100% fidelity to source transcript.
101
- - Do not assume or hallucinate missing numbers.
102
- - Use clear, reproducible reasoning steps (e.g., show which line items support your conclusion).
103
- - Output should be modular enough to scale across other companies and time periods.
104
-
105
- Respond only to this question from the user.
106
  """),
107
- MessagesPlaceholder(variable_name="chat_history", optional=True),
108
- ("human", "{input}")
109
  ])
110
 
 
111
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
112
- llm = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY, model="gemini-2.0-flash-exp", temperature=1)
113
- parser = StrOutputParser()
114
-
115
- def get_history_from_buffer(_):
116
- return memory_buffer['chat_history']
117
-
118
- runnable_get_history_from_buffer = RunnableLambda(get_history_from_buffer)
119
-
120
- main_chain = (
121
- parallel_chain |
122
- RunnableLambda(lambda x: {
123
- "llm_input": {"input": x["question"], "context": x["context"]},
124
- "docs": x["docs"]
125
- }) |
126
- RunnableLambda(lambda x: {
127
- "result": (chat_prompt | llm | parser).invoke(x["llm_input"]),
128
- "source_documents": x["docs"]
129
- })
130
  )
 
131
 
132
- chain = RunnablePassthrough.assign(chat_history=runnable_get_history_from_buffer) | main_chain
133
-
134
- # --- Chat UI ---
135
- st.markdown("### πŸ’¬ Conversation")
136
- for msg in memory_buffer["chat_history"]:
137
- role = "user" if isinstance(msg, HumanMessage) else "assistant"
138
- with st.chat_message(role):
139
- st.markdown(msg.content)
140
-
141
- # --- Input Section ---
142
- user_input = st.chat_input("Ask about ITC’s performance or any financial metric...")
143
-
144
- if user_input:
145
- with st.chat_message("user"):
146
- st.markdown(user_input)
147
-
148
- memory_buffer["chat_history"].append(HumanMessage(content=user_input))
149
- output = chain.invoke({"input": user_input})
150
- ai_response = output["result"]
151
- memory_buffer["chat_history"].append(AIMessage(content=ai_response))
152
 
153
- with st.chat_message("assistant"):
154
- st.markdown(ai_response)
155
- if output.get("source_documents"):
156
- st.markdown("**Sources:**")
157
- for doc in output["source_documents"]:
158
- source = doc.metadata.get("source", "Unknown document")
159
- st.markdown(f"<div class='source-box'>πŸ“„ {source}</div>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import os
3
+ import zipfile
4
+ from langchain_chroma import Chroma # βœ… Updated import
5
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
6
+ from langchain.prompts import ChatPromptTemplate
 
 
7
  from langchain.schema.output_parser import StrOutputParser
8
+ from langchain.schema.runnable import RunnableLambda
9
+ import tempfile
10
+
11
+ # === Page Setup ===
12
+ st.set_page_config(page_title="Financial QA - ITC Ltd.", layout="wide")
13
+ st.title("πŸ“Š Financial Q&A Chatbot (ITC Ltd.)")
14
+
15
+ # === Step 1: Extract Chroma DB from zip ===
16
+ def load_chroma_db():
17
+ with zipfile.ZipFile("chroma_db1.zip", 'r') as zip_ref:
18
+ temp_dir = tempfile.mkdtemp()
19
+ zip_ref.extractall(temp_dir)
20
+ embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
21
+ return Chroma(persist_directory=temp_dir, embedding_function=embedding)
22
+
23
+ vectorstore = load_chroma_db()
24
+
25
+ # === Step 2: MMR Retriever ===
26
+ retriever = vectorstore.as_retriever(
27
+ search_type="mmr",
28
+ search_kwargs={"k": 3, "lambda_mult": 1}
29
+ )
30
 
31
+ # === Step 3: Prompt Template ===
32
+ prompt = ChatPromptTemplate.from_messages([
33
+ ("system",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  """
35
  You are a domain-specific AI financial analyst focused on company-level performance evaluation.
36
 
 
38
 
39
  Rules:
40
  1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
41
+ 2. If data is *missing or partially available*, clearly state: "The required data is not available in the current transcript."
42
+ 3. Do not assume or hallucinate values. Be transparent and evidence-driven.
43
+ 4. Prioritize answers for ITC Ltd., but keep the structure reusable.
44
+ 5. Use bullet points or structure year-wise/metric-wise data when appropriate.
 
 
 
 
 
 
 
 
45
  """),
46
+ ("human", "{question}")
 
47
  ])
48
 
49
+ # === Step 4: LLM Setup ===
50
  GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
51
+ llm = ChatGoogleGenerativeAI(
52
+ api_key=GOOGLE_API_KEY,
53
+ model="gemini-2.0-flash",
54
+ temperature=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  )
56
+ parser = StrOutputParser()
57
 
58
+ # === Step 5: Helper Functions ===
59
+ def format_docs(docs):
60
+ return "\n\n".join(doc.page_content for doc in docs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
+ def retrieve_and_answer(question):
63
+ docs = retriever.invoke(question) # βœ… Updated to new `invoke()` method
64
+ context = format_docs(docs)
65
+ final_input = {"question": question, "context": context}
66
+ result = (prompt | llm | parser).invoke(final_input)
67
+ return result, docs
68
+
69
+ # === Step 6: Streamlit UI ===
70
+ query = st.text_input("πŸ” Enter your financial question:", "")
71
+
72
+ if st.button("Get Answer") and query.strip():
73
+ with st.spinner("Generating answer..."):
74
+ answer, source_docs = retrieve_and_answer(query)
75
+ st.markdown("### βœ… Answer")
76
+ st.markdown(answer)
77
+
78
+ st.markdown("### πŸ“„ Source Documents")
79
+ for doc in source_docs:
80
+ st.write(doc.metadata)