Mpavan45 commited on
Commit
5dc5c1e
·
verified ·
1 Parent(s): 4806e78

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +14 -23
src/streamlit_app.py CHANGED
@@ -30,16 +30,13 @@ with st.container():
30
  st.markdown("Ask financial questions about ITC Ltd. based on transcript data, powered by AI.")
31
  st.markdown('</div>', unsafe_allow_html=True)
32
 
33
-
34
-
35
- # Load API key securely from Hugging Face secrets
36
- GOOGLE_API_KEY = st.secrets.get("GOOGLE_API_KEY")
37
-
38
 
39
  # Initialize Chroma DB
40
  @st.cache_resource
41
- def initialize_vectorstore(GOOGLE_API_KEY):
42
- embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
43
  zip_path = "./chroma_db1.zip"
44
  extract_dir = "chroma_db2"
45
  if os.path.exists(zip_path):
@@ -47,7 +44,6 @@ def initialize_vectorstore(GOOGLE_API_KEY):
47
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
48
  zip_ref.extractall(extract_dir)
49
  vectorstore = Chroma(persist_directory=extract_dir, embedding_function=embedding)
50
- vectorstore.persist()
51
  if vectorstore._collection.count() > 0:
52
  return vectorstore
53
  else:
@@ -58,42 +54,37 @@ def initialize_vectorstore(GOOGLE_API_KEY):
58
  st.error(f"`chroma_db1.zip` not found at {zip_path}")
59
  return None
60
 
61
- # Initialize vectorstore and retriever
62
  retriever = None
 
 
63
 
64
- vectorstore = initialize_vectorstore(GOOGLE_API_KEY)
65
- if vectorstore:
66
- retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 1})
 
 
 
67
 
68
  # Prompt template
69
  prompt = ChatPromptTemplate.from_messages([
70
  ("system",
71
  """You are a domain-specific AI financial analyst focused on company-level performance evaluation.
72
-
73
  Your task is to analyze and respond to user financial queries strictly based on the provided transcript data: {context}.
74
-
75
  Rules:
76
  1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
77
  2. If data is missing or partially available, clearly state: "The required data is not available in the current transcript." Then provide a generic but relevant explanation based on standard financial principles.
78
  3. Maintain numerical accuracy and avoid interpretation beyond data boundaries.
79
  4. Prioritize answers relevant to ITC Ltd., but keep response format adaptable to other firms and fiscal years.
80
  5. Clearly present year-wise or metric-wise insights using bullet points or structured formats if applicable.
81
-
82
  Your goals:
83
  - Ensure 100% fidelity to source transcript.
84
  - Do not assume or hallucinate missing numbers.
85
  - Use clear, reproducible reasoning steps (e.g., show which line items support your conclusion).
86
  - Output should be modular enough to scale across other companies and time periods.
87
-
88
- Respond only to this question from the user."""),
89
-
90
  ("human", "{question}")
91
  ])
92
- # LLM setup
93
- llm, parser = None, None
94
- if GOOGLE_API_KEY:
95
- llm = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY, model="gemini-1.5-flash", temperature=1)
96
- parser = StrOutputParser()
97
 
98
  # Helper functions
99
  def format_docs(docs):
 
30
  st.markdown("Ask financial questions about ITC Ltd. based on transcript data, powered by AI.")
31
  st.markdown('</div>', unsafe_allow_html=True)
32
 
33
+ # Load API key
34
+ GOOGLE_API_KEY = st.secrets.get("genai")
 
 
 
35
 
36
  # Initialize Chroma DB
37
  @st.cache_resource
38
+ def initialize_vectorstore(api_key):
39
+ embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
40
  zip_path = "./chroma_db1.zip"
41
  extract_dir = "chroma_db2"
42
  if os.path.exists(zip_path):
 
44
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
45
  zip_ref.extractall(extract_dir)
46
  vectorstore = Chroma(persist_directory=extract_dir, embedding_function=embedding)
 
47
  if vectorstore._collection.count() > 0:
48
  return vectorstore
49
  else:
 
54
  st.error(f"`chroma_db1.zip` not found at {zip_path}")
55
  return None
56
 
 
57
  retriever = None
58
+ vectorstore = None
59
+ llm, parser = None, None
60
 
61
+ if GOOGLE_API_KEY:
62
+ vectorstore = initialize_vectorstore(GOOGLE_API_KEY)
63
+ if vectorstore:
64
+ retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3, "lambda_mult": 1})
65
+ llm = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY, model="gemini-1.5-flash", temperature=1)
66
+ parser = StrOutputParser()
67
 
68
  # Prompt template
69
  prompt = ChatPromptTemplate.from_messages([
70
  ("system",
71
  """You are a domain-specific AI financial analyst focused on company-level performance evaluation.
 
72
  Your task is to analyze and respond to user financial queries strictly based on the provided transcript data: {context}.
 
73
  Rules:
74
  1. ONLY extract facts, figures, and insights that are explicitly available in the transcript.
75
  2. If data is missing or partially available, clearly state: "The required data is not available in the current transcript." Then provide a generic but relevant explanation based on standard financial principles.
76
  3. Maintain numerical accuracy and avoid interpretation beyond data boundaries.
77
  4. Prioritize answers relevant to ITC Ltd., but keep response format adaptable to other firms and fiscal years.
78
  5. Clearly present year-wise or metric-wise insights using bullet points or structured formats if applicable.
 
79
  Your goals:
80
  - Ensure 100% fidelity to source transcript.
81
  - Do not assume or hallucinate missing numbers.
82
  - Use clear, reproducible reasoning steps (e.g., show which line items support your conclusion).
83
  - Output should be modular enough to scale across other companies and time periods.
84
+ Respond only to this question from the user."""
85
+ ),
 
86
  ("human", "{question}")
87
  ])
 
 
 
 
 
88
 
89
  # Helper functions
90
  def format_docs(docs):