yashgori20 commited on
Commit
c676ddb
·
verified ·
1 Parent(s): 0061d62

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Assets/financial_index.faiss filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Assets/financial_index.faiss filter=lfs diff=lfs merge=lfs -text
37
+ financial_index.faiss filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pickle
4
+ import faiss
5
+ import pandas as pd
6
+ from sentence_transformers import SentenceTransformer
7
+ from groq import Groq
8
+
9
+ # Set your Groq API Key (use environment variable for security)
10
+ GROQ_API_KEY = "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" # Ensure this environment variable is set
11
+ client = Groq(api_key=GROQ_API_KEY)
12
+
13
+ # Load the embedding model
14
+ model = SentenceTransformer('all-MiniLM-L6-v2')
15
+
16
+ # Paths to your assets folder
17
+ assets_folder = os.path.join(os.getcwd(), 'assets')
18
+
19
+ # Function to load resources from local storage
20
+ def load_resources():
21
+ # Paths to index and chunk files
22
+ industry_index_path = os.path.join( 'industry_index.faiss')
23
+ industry_chunks_path = os.path.join( 'industry_chunks.pkl')
24
+ circular_index_path = os.path.join( 'circular_index.faiss')
25
+ circular_chunks_path = os.path.join( 'circular_chunks.pkl')
26
+
27
+ # Check if the files exist
28
+ if not all(os.path.exists(path) for path in [industry_index_path, industry_chunks_path, circular_index_path, circular_chunks_path]):
29
+ st.error("FAISS indexes and chunk files not found in the assets folder. Please ensure they are present.")
30
+ st.stop()
31
+
32
+ # Load FAISS indexes and chunks
33
+ industry_index = faiss.read_index(industry_index_path)
34
+ with open(industry_chunks_path, 'rb') as f:
35
+ industry_chunks = pickle.load(f)
36
+ circular_index = faiss.read_index(circular_index_path)
37
+ with open(circular_chunks_path, 'rb') as f:
38
+ circular_chunks = pickle.load(f)
39
+ return industry_index, industry_chunks, circular_index, circular_chunks
40
+
41
+ # Prepare data
42
+ industry_index, industry_chunks, circular_index, circular_chunks = load_resources()
43
+
44
+ # Function to retrieve relevant chunks
45
+ def retrieve_relevant_chunks(query, index, chunks, top_k=5):
46
+ query_embedding = model.encode([query], convert_to_numpy=True)
47
+ distances, indices = index.search(query_embedding, top_k)
48
+ retrieved_chunks = [chunks[i] for i in indices[0]]
49
+ return retrieved_chunks
50
+
51
+ # Function for Circular Compliance (Problem Statement 2)
52
+ def circular_compliance():
53
+ st.header("Circular Compliance Assistant")
54
+ user_query = st.text_area("Enter your scenario or question:", key='circular_input')
55
+ if st.button("Check Compliance", key='circular_button'):
56
+ if user_query:
57
+ relevant_chunks = retrieve_relevant_chunks(user_query, circular_index, circular_chunks)
58
+ context = "\n".join(relevant_chunks)
59
+ prompt = f"""
60
+ You are an expert RBI compliance analyst. Based on the provided RBI Master Circular on Management of Advances:
61
+
62
+ {context}
63
+
64
+ Please analyze the following scenario for compliance:
65
+ {user_query}
66
+
67
+ Provide a detailed compliance analysis with the following structure:
68
+
69
+ 1. Compliance Status:
70
+ - Clear statement whether the scenario is compliant or non-compliant
71
+ - Level of certainty in the assessment
72
+
73
+ 2. Relevant Circular Details:
74
+ - Specific section(s) and paragraph references
75
+ - Direct quotes from applicable sections where relevant
76
+
77
+ 3. Detailed Analysis:
78
+ - Breakdown of key compliance requirements
79
+ - Calculation/numerical analysis if applicable
80
+ - Specific points of compliance/non-compliance
81
+
82
+ 4. Additional Considerations:
83
+ - Related requirements or obligations
84
+ - Monitoring/reporting requirements if applicable
85
+
86
+ 5. Recommendation:
87
+ - Clear guidance on what needs to be done for compliance
88
+ - Specific steps to address any non-compliance
89
+
90
+ Please provide definitive guidance based solely on the circular content, avoiding ambiguity or speculation.
91
+
92
+ Response:
93
+ """
94
+ chat_completion = client.chat.completions.create(
95
+ messages=[
96
+ {'role': 'user', 'content': prompt}
97
+ ],
98
+ model="gemma2-9b-it",
99
+ stream=False,
100
+ temperature=0.0
101
+ )
102
+ response = chat_completion.choices[0].message.content.strip()
103
+ st.write(response)
104
+
105
+ # Function for Industry Classification (Problem Statement 3)
106
+ def industry_classification():
107
+ st.header("Industry Classification Assistant")
108
+ user_keywords = st.text_input("Enter keywords related to the industry:", key='industry_input')
109
+ if st.button("Get Industry Classification", key='industry_button'):
110
+ if user_keywords:
111
+ relevant_chunks = retrieve_relevant_chunks(user_keywords, industry_index, industry_chunks)
112
+ context = "\n".join(relevant_chunks)
113
+ prompt = f"""
114
+ You are an assistant helping to classify industries based on keywords. Based on the following information:
115
+
116
+ {context}
117
+
118
+ User's Keywords:
119
+ {user_keywords}
120
+
121
+ Suggest the most appropriate industry classification codes. Ask any necessary follow-up questions to clarify if needed.
122
+
123
+ Answer:
124
+ """
125
+ chat_completion = client.chat.completions.create(
126
+ messages=[
127
+ {'role': 'user', 'content': prompt}
128
+ ],
129
+ model="gemma2-9b-it",
130
+ stream=False,
131
+ temperature=0.0
132
+ )
133
+ response = chat_completion.choices[0].message.content.strip()
134
+ st.write(response)
135
+
136
+ # Existing calculation function (Problem Statement 1)
137
+ def calculations():
138
+ st.subheader("Calculation Methodology")
139
+ calc_option = st.selectbox("Choose Calculation Method",
140
+ ("Maximum Permissible Bank Finance (MPBF)", "Drawing Power (DP)"))
141
+
142
+ if calc_option == "Maximum Permissible Bank Finance (MPBF)":
143
+ st.header("MPBF Calculation")
144
+ total_current_assets = st.number_input("Total Current Assets (TCA):", min_value=0.0, value=0.0)
145
+ other_current_liabilities = st.number_input("Other Current Liabilities (OCL):", min_value=0.0, value=0.0)
146
+ actual_nwc = st.number_input("Actual/Projected Net Working Capital (NWC):", min_value=0.0, value=0.0)
147
+
148
+ if st.button("Calculate MPBF"):
149
+ working_capital_gap = total_current_assets - other_current_liabilities
150
+ minimum_stipulated_nwc = 0.25 * total_current_assets
151
+ item_6 = working_capital_gap - minimum_stipulated_nwc
152
+ item_7 = working_capital_gap - actual_nwc
153
+ mpbf = min(item_6, item_7)
154
+
155
+ st.success(f"Working Capital Gap (WCG): {working_capital_gap:.2f}")
156
+ st.success(f"Minimum Stipulated NWC (25% of TCA): {minimum_stipulated_nwc:.2f}")
157
+ st.success(f"Item 6 (WCG - Minimum Stipulated NWC): {item_6:.2f}")
158
+ st.success(f"Item 7 (WCG - Actual NWC): {item_7:.2f}")
159
+ st.success(f"Maximum Permissible Bank Finance (MPBF): {mpbf:.2f}")
160
+
161
+ elif calc_option == "Drawing Power (DP)":
162
+ st.header("DP Calculation")
163
+ inventory_margin = 0.25
164
+ receivables_margin = 0.40
165
+ creditors_margin = 0.40
166
+
167
+ st.subheader("Inventory Details")
168
+ raw_material = st.number_input("Raw Material:", min_value=0.0, value=0.0)
169
+ consumable_spares = st.number_input("Other Consumable Spares:", min_value=0.0, value=0.0)
170
+ stock_in_process = st.number_input("Stock-in-process:", min_value=0.0, value=0.0)
171
+ finished_goods = st.number_input("Finished Goods:", min_value=0.0, value=0.0)
172
+
173
+ st.subheader("Receivables")
174
+ domestic_receivables = st.number_input("Domestic Receivables:", min_value=0.0, value=0.0)
175
+ export_receivables = st.number_input("Export Receivables:", min_value=0.0, value=0.0)
176
+
177
+ st.subheader("Creditors")
178
+ creditors = st.number_input("Creditors:", min_value=0.0, value=0.0)
179
+
180
+ if st.button("Calculate DP"):
181
+ inventory_total = raw_material + consumable_spares + stock_in_process + finished_goods
182
+ inventory_advance = inventory_total * (1 - inventory_margin)
183
+ receivables_total = domestic_receivables + export_receivables
184
+ receivables_advance = receivables_total * (1 - receivables_margin)
185
+ creditors_advance = creditors * (1 - creditors_margin)
186
+ total_A = inventory_advance + receivables_advance
187
+ total_B = creditors_advance
188
+ dp = total_A - total_B
189
+
190
+ st.success(f"Total Inventory (After Margin): {inventory_advance:.2f}")
191
+ st.success(f"Total Receivables (After Margin): {receivables_advance:.2f}")
192
+ st.success(f"Total (A): {total_A:.2f}")
193
+ st.success(f"Creditors (After Margin): {total_B:.2f}")
194
+ st.success(f"Drawing Power (DP): {dp:.2f}")
195
+
196
+ # Function for Model 1 chat interface
197
+ def run_model1_chat():
198
+ st.header("Model 1 Chat Interface")
199
+
200
+ if 'chat_history' not in st.session_state:
201
+ st.session_state['chat_history'] = []
202
+
203
+ user_input = st.text_input("You:", key="model1_input")
204
+
205
+ if st.button("Send", key='model1_send'):
206
+ if user_input:
207
+ st.session_state.chat_history.append(("User", user_input))
208
+
209
+ try:
210
+ # Get model response
211
+ chat_completion = client.chat.completions.create(
212
+ messages=[
213
+ {'role': 'user', 'content': user_input}
214
+ ],
215
+ model="gemma2-9b-it",
216
+ stream=False,
217
+ temperature=0.0
218
+ )
219
+ response = chat_completion.choices[0].message.content.strip()
220
+ st.session_state.chat_history.append(("Model", response))
221
+ except Exception as e:
222
+ st.error(f"An error occurred: {e}")
223
+ st.error("Please check your API key and model availability.")
224
+
225
+ # Display chat history
226
+ for speaker, message in st.session_state.chat_history:
227
+ if speaker == "User":
228
+ st.markdown(f"**You:** {message}")
229
+ else:
230
+ st.markdown(f"**Model 1:** {message}")
231
+
232
+
233
+ def retrieve_relevant_financial_statements(query, index, statements, model, top_k=10, max_tokens=1500):
234
+ query_embedding = model.encode([query], convert_to_numpy=True)
235
+ distances, indices = index.search(query_embedding.astype('float32'), top_k)
236
+ retrieved_statements = []
237
+ total_tokens = 0
238
+ for idx in indices[0]:
239
+ statement = statements[idx]['statement']
240
+ token_count = len(statement.split())
241
+ if total_tokens + token_count > max_tokens:
242
+ break
243
+ retrieved_statements.append(statements[idx])
244
+ total_tokens += token_count
245
+ return retrieved_statements
246
+
247
+
248
+ def model2_financial_data():
249
+ st.header("Financial Data Assistant (Model 2)")
250
+
251
+ # Load the FAISS index and financial statements
252
+ financial_index_path = os.path.join( 'financial_index.faiss')
253
+ financial_statements_path = os.path.join( 'financial_statements.pkl')
254
+
255
+ # Load FAISS index
256
+ if not os.path.exists(financial_index_path):
257
+ st.error("Financial FAISS index not found.")
258
+ st.stop()
259
+ financial_index = faiss.read_index(financial_index_path)
260
+
261
+ # Load statements
262
+ if not os.path.exists(financial_statements_path):
263
+ st.error("Financial statements data not found.")
264
+ st.stop()
265
+ with open(financial_statements_path, 'rb') as f:
266
+ financial_statements = pickle.load(f)
267
+
268
+ # Allow the user to input a query
269
+ user_query = st.text_area("Ask a question about Indian state-wise financial details (1980-2015):", key='model2_input')
270
+
271
+ if st.button("Get Answer", key='model2_button'):
272
+ if user_query:
273
+ # Extract metric, state, and year from the user's query
274
+ import re
275
+
276
+ # List of possible metrics
277
+ metrics_list = [
278
+ 'aggregate expenditure', 'capital expenditure', 'gross fiscal deficits',
279
+ 'nominal gsdp series', 'own tax revenues', 'revenue deficits',
280
+ 'revenue expenditure', 'social sector expenditure'
281
+ ]
282
+
283
+ # Create a pattern to match any of the metrics
284
+ metrics_pattern = '|'.join(metrics_list)
285
+ metric_regex = re.compile(rf'\b({metrics_pattern})\b', re.IGNORECASE)
286
+
287
+ # Extract metric
288
+ metric_match = metric_regex.search(user_query)
289
+ if metric_match:
290
+ query_metric = metric_match.group(1).strip().title()
291
+ else:
292
+ query_metric = None
293
+
294
+ # Extract state
295
+ # Assuming state names are capitalized properly in the data
296
+ states_list = list(set(s['state'] for s in financial_statements))
297
+ states_pattern = '|'.join(states_list)
298
+ state_regex = re.compile(rf'\b({states_pattern})\b', re.IGNORECASE)
299
+ state_match = state_regex.search(user_query)
300
+ if state_match:
301
+ query_state = state_match.group(1).strip()
302
+ else:
303
+ query_state = None
304
+
305
+ # Extract year
306
+ year_regex = re.compile(r'(\d{4}(?:-\d{2})?)')
307
+ year_match = year_regex.search(user_query)
308
+ if year_match:
309
+ query_year = year_match.group(1)
310
+ # Normalize the year format if needed
311
+ if len(query_year) == 4:
312
+ # Convert "1992" to "1992-93"
313
+ query_year = f"{query_year}-{str(int(query_year[-2:])+1).zfill(2)}"
314
+ elif len(query_year) == 7:
315
+ # Already in "1992-93" format
316
+ pass
317
+ else:
318
+ query_year = None
319
+
320
+ if query_state and query_year:
321
+ # Collect data based on the extracted information
322
+ data = {}
323
+ for s in financial_statements:
324
+ if (
325
+ s['state'].lower() == query_state.lower() and
326
+ s['year'] == query_year
327
+ ):
328
+ if query_metric:
329
+ if s['metric_type'].lower() == query_metric.lower():
330
+ data[s['metric_type']] = s['value']
331
+ break # Since we found the specific metric, we can stop
332
+ else:
333
+ data[s['metric_type']] = s['value']
334
+
335
+ if data:
336
+ if query_metric:
337
+ # Display only the specific metric
338
+ value = data.get(query_metric)
339
+ if value is not None:
340
+ st.write(f"The {query_metric} of {query_state} in {query_year} is {value}")
341
+ else:
342
+ st.write(f"{query_metric} data not found for {query_state} in {query_year}.")
343
+ else:
344
+ # Display all metrics
345
+ st.write(f"Financial data for **{query_state}** in **{query_year}**:")
346
+ df = pd.DataFrame(list(data.items()), columns=['Metric', 'Value'])
347
+ st.table(df)
348
+ else:
349
+ st.write("Data not found for the specified state, year, or metric.")
350
+ else:
351
+ st.write("Could not understand the query. Please specify the state and year.")
352
+
353
+ def main():
354
+ st.set_page_config(page_title="Finance Assistant", page_icon="💸", layout="wide")
355
+ st.title("💸 Finance Assistant")
356
+
357
+ option = st.radio(
358
+ "Choose a Functionality",
359
+ ("Calculation Methodology", "Circular Compliance", "Industry Classification", "Model 1", "Model 2")
360
+ )
361
+
362
+ if option == "Calculation Methodology":
363
+ calculations()
364
+ elif option == "Circular Compliance":
365
+ circular_compliance()
366
+ elif option == "Industry Classification":
367
+ industry_classification()
368
+ elif option == "Model 1":
369
+ run_model1_chat()
370
+ elif option == "Model 2":
371
+ model2_financial_data()
372
+
373
+ if __name__ == "__main__":
374
+ main()
circular_chunks.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1368965f16a8b92a1482e82409d36bee91386be3a9bdfcfee3f49416519377
3
+ size 328319
circular_index.faiss ADDED
Binary file (161 kB). View file
 
financial_embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad42b853679129285bc951a702021a0bab596f6013e34d9fad5b0fbda9b24705
3
+ size 11960960
financial_index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f1980b095d018dfb11c17c5d2b643ce5966655d6744fd21c445ebba262494a0
3
+ size 11960877
financial_statements.json ADDED
The diff for this file is too large to render. See raw diff
 
financial_statements.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5aca07d9108332093958b7abae64ab2c3de162d292cd76486047df9c85faec
3
+ size 770727
industry_chunks.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:418842c0f448fbe3bf61fe92435da38ca667fde36feafbcc63a858647e3163d8
3
+ size 416898
industry_index.faiss ADDED
Binary file (184 kB). View file