mrfirdauss commited on
Commit
d2ef3e4
·
1 Parent(s): 9236ded

feat: make it factory design pattern

Browse files
fraudTrainData.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3363405c1efbd9862c0b6c4ebebdac1fc9aff175063847c4cda60dc8c50f32
3
+ size 254853611
requirements.txt CHANGED
@@ -3,4 +3,6 @@ pandas
3
  streamlit
4
  matplotlib
5
  pandas
6
- tabulate
 
 
 
3
  streamlit
4
  matplotlib
5
  pandas
6
+ tabulate
7
+ huggingface_hub
8
+ langchain_community
src/FinancialAgentApp.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ import pickle
3
+ from models import ResponseState
4
+ from prompt import REFINERY_PROMPT, FINAL_PROMPT
5
+ from langchain_community.vectorstores import FAISS
6
+ import numpy as np
7
+ import matplotlib.pyplot as plt
8
+ import pandas as pd
9
+ from openai import OpenAI
10
+ import pickle
11
+ import io
12
+
13
+ class FinancialAgentApp (ABC):
14
+ def __init__(self, st, model_name):
15
+ self.st = st
16
+ self.df = pickle.load(open("fraudTrainData.pkl", "rb"))
17
+
18
+ self.model_name = model_name
19
+
20
+ if "messages" not in self.st.session_state:
21
+ self.st.session_state.messages = []
22
+
23
+ def render_header(self):
24
+ self.st.title("Financial Agent")
25
+
26
+ def render_messages(self):
27
+ """Render previous chat messages."""
28
+ for message in self.st.session_state.messages:
29
+ with self.st.chat_message(message["role"]):
30
+ self.st.markdown(message["content"])
31
+
32
+ @abstractmethod
33
+ def __stream_answer__(self, instructions, input_messages):
34
+ """Stream OpenAI response as a generator."""
35
+ pass
36
+
37
+ def process_prompt(self, prompt):
38
+ """Main pipeline for processing a new user input."""
39
+ self.st.session_state.messages.append({"role": "user", "content": prompt})
40
+ with self.st.chat_message("user"):
41
+ self.st.markdown(prompt)
42
+
43
+ # Step 1: Run refinery prompt
44
+ response = self.client.responses.parse(
45
+ model=self.model_name,
46
+ instructions=REFINERY_PROMPT.format(
47
+ df_head=self.df.head().to_markdown(),
48
+ df_columns=self.df.columns.tolist(),
49
+ df_sample=self.df.sample(5).to_markdown()
50
+ ),
51
+ input=[{"role": m["role"], "content": m["content"]} for m in self.st.session_state.messages],
52
+ stream=False,
53
+ text_format=ResponseState
54
+ )
55
+
56
+ response_state: ResponseState = response.output_parsed
57
+
58
+ # Step 2: Check if context is needed
59
+ if response_state.isNeedContext:
60
+ context_prompt = self.handle_context(response_state)
61
+ self.generate_final_answer(context_prompt)
62
+ else:
63
+ self.display_final_answer(response_state.response)
64
+
65
+ def __safe_savefig__(*args, **kwargs):
66
+ buf = io.BytesIO()
67
+ plt.savefig(buf, format="png")
68
+ buf.seek(0)
69
+ return buf
70
+
71
+ @abstractmethod
72
+ def handle_context(self, response_state: ResponseState) -> str:
73
+ """Handle context if need to add context from data/pdf"""
74
+ pass
75
+
76
+ def generate_final_answer(self, context_prompt: str):
77
+ """Generate and stream the final answer with context."""
78
+ with self.st.chat_message("assistant"):
79
+ answer = self.st.write_stream(
80
+ self.stream_answer(
81
+ instructions=FINAL_PROMPT,
82
+ input_messages=[
83
+ {"role": m["role"], "content": m["content"]}
84
+ for m in self.st.session_state.messages
85
+ ] + [{"role": "user", "content": context_prompt}]
86
+ )
87
+ )
88
+ self.st.session_state.messages.append({"role": "assistant", "content": answer})
89
+
90
+ def display_final_answer(self, answer: str):
91
+ """Display a non-streamed assistant answer."""
92
+ self.st.session_state.messages.append({"role": "assistant", "content": answer})
93
+ with self.st.chat_message("assistant"):
94
+ self.st.markdown(answer)
95
+
96
+ def run(self):
97
+ """Run the app."""
98
+ self.render_header()
99
+ self.render_messages()
100
+
101
+ if prompt := self.st.chat_input("What is up?"):
102
+ self.process_prompt(prompt)
103
+
104
+
105
+
106
+ class HFFinancialRAG(FinancialAgentApp):
107
+ def __init__(self, st, base_url, api_key, model_name = 'Qwen/Qwen3-4B', vector_id="vs_68bf713eea2c81919ac08298a05d6704", embedding=None):
108
+ if not base_url:
109
+ raise ValueError("base_url cannot be None or empty.")
110
+ if not api_key:
111
+ raise ValueError("api_key cannot be None or empty.")
112
+ super().__init__(st, model_name)
113
+ self.client = OpenAI(base_url=base_url, api_key=api_key)
114
+ self.vector_db = FAISS.load_local(vector_id, embedding, allow_dangerous_deserialization=True)
115
+
116
+
117
+ def __handle_context__(self, response_state: ResponseState) -> str:
118
+ """Handle additional context (data, PDF, etc.)."""
119
+ context_prompt = ""
120
+ if response_state.contextType in ("data", "both"):
121
+ local_scope = {"df": self.df, "np": np, "pd": pd, "plt": plt, "savefig": self.__safe_savefig__}
122
+ exec(response_state.code, {}, local_scope)
123
+
124
+ fig = plt.gcf()
125
+ if fig.get_axes(): # if a chart was generated
126
+ with self.st.chat_message("assistant"):
127
+ self.st.pyplot(fig)
128
+ plt.close(fig)
129
+
130
+ context_prompt = "## CONTEXT DATAFRAME.\n"
131
+ context_prompt += str(local_scope.get("result", ""))
132
+
133
+ if response_state.contextType in ("pdf", "both"):
134
+ context_prompt += "## CONTEXT PDF.\n"
135
+ results = self.vector_db.similarity_search(response_state.retriverKey, k=5)
136
+ for i, doc in enumerate(results, 1):
137
+ context_prompt += f"### Document {i}\n{doc.page_content}\n"
138
+ return context_prompt
139
+
140
+ def __stream_answer__(self, instructions, input_messages):
141
+ response_stream = self.client.responses.create(
142
+ model=self.model_name,
143
+ instructions=instructions,
144
+ input=input_messages,
145
+ stream=True
146
+ )
147
+ for chunk in response_stream:
148
+ if chunk.type == 'response.output_text.delta':
149
+ yield chunk.delta
150
+
151
+
152
+ class OpenAIFinancialRAG(FinancialAgentApp):
153
+ def __init__(self, st, model_name = "gpt-5-mini-2025-08-07"):
154
+ super().__init__(st, model_name)
155
+ self.clien = OpenAI()
156
+
157
+ def __stream_answer__(self, instructions, input_messages):
158
+ response_stream = self.client.responses.create(
159
+ model=self.model_name,
160
+ instructions=instructions,
161
+ input=input_messages,
162
+ stream=True,
163
+ tools=[{
164
+ "type": "file_search",
165
+ "vector_store_ids": ['vs_68bf713eea2c81919ac08298a05d6704']
166
+ }]
167
+ )
168
+ for chunk in response_stream:
169
+ if chunk.type == 'response.output_text.delta':
170
+ yield chunk.delta
171
+
172
+ def __handle_context__(self, response_state: ResponseState):
173
+ """Handle additional context (data, PDF, etc.)."""
174
+ context_prompt = ""
175
+ if response_state.contextType in ("data", "both"):
176
+ local_scope = {"df": self.df, "np": np, "pd": pd, "plt": plt, "savefig": self.__safe_savefig__}
177
+ exec(response_state.code, {}, local_scope)
178
+
179
+ fig = plt.gcf()
180
+ if fig.get_axes(): # if a chart was generated
181
+ with self.st.chat_message("assistant"):
182
+ self.st.pyplot(fig)
183
+ plt.close(fig)
184
+
185
+ context_prompt = "## CONTEXT DATAFRAME.\n"
186
+ context_prompt += str(local_scope.get("result", ""))
187
+
188
+ # Placeholder for PDF or other context handling
189
+ # elif response_state.contextType in ("pdf", "both"):
190
+ # context_prompt = "Provide the relevant information from the PDF documents."
191
+
192
+ return context_prompt
src/streamlit_app.py CHANGED
@@ -1,131 +1,14 @@
1
- from models import ResponseState
2
- from prompt import REFINERY_PROMPT, FINAL_PROMPT
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- import pandas as pd
6
- import streamlit as st
7
- from openai import OpenAI
8
- import pickle
9
- import io
10
 
11
- class FinancialAgentApp:
12
- def __init__(self):
13
- self.client = OpenAI()
14
- self.df = pickle.load(open("fraudTrainData.pkl", "rb"))
15
- self.model_name = "gpt-5-mini-2025-08-07"
16
-
17
- if "messages" not in st.session_state:
18
- st.session_state.messages = []
19
- st.session_state["openai_model"] = self.model_name
20
-
21
- def render_header(self):
22
- st.title("Financial Agent")
23
-
24
- def render_messages(self):
25
- """Render previous chat messages."""
26
- for message in st.session_state.messages:
27
- with st.chat_message(message["role"]):
28
- st.markdown(message["content"])
29
-
30
- def stream_answer(self, instructions, input_messages):
31
- """Stream OpenAI response as a generator."""
32
- response_stream = self.client.responses.create(
33
- model=self.model_name,
34
- instructions=instructions,
35
- input=input_messages,
36
- stream=True,
37
- tools=[{
38
- "type": "file_search",
39
- "vector_store_ids": ['vs_68bf713eea2c81919ac08298a05d6704']
40
- }]
41
- )
42
- for chunk in response_stream:
43
- if chunk.type == 'response.output_text.delta':
44
- yield chunk.delta
45
-
46
- def process_prompt(self, prompt):
47
- """Main pipeline for processing a new user input."""
48
- st.session_state.messages.append({"role": "user", "content": prompt})
49
- with st.chat_message("user"):
50
- st.markdown(prompt)
51
-
52
- # Step 1: Run refinery prompt
53
- response = self.client.responses.parse(
54
- model=self.model_name,
55
- instructions=REFINERY_PROMPT.format(
56
- df_head=self.df.head().to_markdown(),
57
- df_columns=self.df.columns.tolist(),
58
- df_sample=self.df.sample(5).to_markdown()
59
- ),
60
- input=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages],
61
- stream=False,
62
- text_format=ResponseState
63
- )
64
-
65
- response_state: ResponseState = response.output_parsed
66
-
67
- # Step 2: Check if context is needed
68
- if response_state.isNeedContext:
69
- context_prompt = self.handle_context(response_state)
70
- self.generate_final_answer(context_prompt)
71
- else:
72
- self.display_final_answer(response_state.response)
73
- def __safe_savefig__(*args, **kwargs):
74
- buf = io.BytesIO()
75
- plt.savefig(buf, format="png")
76
- buf.seek(0)
77
- return buf
78
-
79
- def handle_context(self, response_state: ResponseState) -> str:
80
- """Handle additional context (data, PDF, etc.)."""
81
- context_prompt = ""
82
- if response_state.contextType in ("data", "both"):
83
- local_scope = {"df": self.df, "np": np, "pd": pd, "plt": plt, "savefig": self.__safe_savefig__}
84
- exec(response_state.code, {}, local_scope)
85
-
86
- fig = plt.gcf()
87
- if fig.get_axes(): # if a chart was generated
88
- with st.chat_message("assistant"):
89
- st.pyplot(fig)
90
- plt.close(fig)
91
-
92
- context_prompt = "## CONTEXT DATAFRAME.\n"
93
- context_prompt += str(local_scope.get("result", ""))
94
-
95
- # Placeholder for PDF or other context handling
96
- # elif response_state.contextType in ("pdf", "both"):
97
- # context_prompt = "Provide the relevant information from the PDF documents."
98
-
99
- return context_prompt
100
-
101
- def generate_final_answer(self, context_prompt: str):
102
- """Generate and stream the final answer with context."""
103
- with st.chat_message("assistant"):
104
- answer = st.write_stream(
105
- self.stream_answer(
106
- instructions=FINAL_PROMPT,
107
- input_messages=[
108
- {"role": m["role"], "content": m["content"]}
109
- for m in st.session_state.messages
110
- ] + [{"role": "user", "content": context_prompt}]
111
- )
112
- )
113
- st.session_state.messages.append({"role": "assistant", "content": answer})
114
-
115
- def display_final_answer(self, answer: str):
116
- """Display a non-streamed assistant answer."""
117
- st.session_state.messages.append({"role": "assistant", "content": answer})
118
- with st.chat_message("assistant"):
119
- st.markdown(answer)
120
-
121
- def run(self):
122
- """Run the app."""
123
- self.render_header()
124
- self.render_messages()
125
-
126
- if prompt := st.chat_input("What is up?"):
127
- self.process_prompt(prompt)
128
 
129
  if __name__ == "__main__":
130
- app = FinancialAgentApp()
 
 
 
 
 
 
131
  app.run()
 
1
+ from huggingface_hub import hf_hub_download
2
+ from FinancialAgentApp import HFFinancialRAG, OpenAIFinancialRAG
3
+ import os
 
 
 
 
 
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  if __name__ == "__main__":
7
+ index_path = hf_hub_download(
8
+ repo_id="mrfirdauss/FaissBhatlaBook",
9
+ filename="vs_68bf713eea2c81919ac08298a05d6704/index.faiss",
10
+ repo_type="dataset"
11
+ )
12
+ app = OpenAIFinancialRAG()
13
+
14
  app.run()
vs_68bf713eea2c81919ac08298a05d6704/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ed1be49bcb8d019522a1838992eaad2c3fd5f8ed62c4da9b6c8cee3f17bfc78
3
+ size 69695