jsds003 commited on
Commit
2728d04
·
1 Parent(s): 27a10b6

Removed all content to test deployment

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +0 -292
src/streamlit_app.py CHANGED
@@ -5,304 +5,12 @@ from pygwalker.api.streamlit import StreamlitRenderer
5
  import re
6
  from typing import List, Any
7
 
8
- @st.cache_resource
9
- def getPipeline():
10
- return pipeline("text-generation", model="nvidia/Llama-3.1-Nemotron-Nano-4B-v1.1")
11
-
12
-
13
- @st.cache_resource
14
- def get_pyg_renderer(df: pd.DataFrame):
15
- return StreamlitRenderer(st.session_state.df)
16
-
17
- pipe = getPipeline()
18
-
19
- def FileSummaryHelper(df: pd.DataFrame) -> str:
20
- """Gathers basiline information about the dataset"""
21
-
22
- colSummaries = []
23
-
24
- for col in df:
25
- colSummaries.append(f"'{col}' | Data Type: {df[col].dtype} | Missing Percentage: {df[col].isna().mean()*100:.2f}%")
26
- colTypesAndNulls = "\n".join(colSummaries)
27
-
28
- duplicateVals = df.duplicated(keep=False).sum()
29
- totalVals = len(df)
30
-
31
- return f"""
32
- The columns of the data have the following datatypes and missing value percentages:
33
- {colTypesAndNulls}
34
-
35
- The dataset has {totalVals} total rows.
36
-
37
- The dataset has {duplicateVals} duplicated rows.
38
- """
39
-
40
- def FileDescriptionAgent(userDesc:str, df: pd.DataFrame) -> str:
41
- """Generates a description of the contents of the file based on initial analysis."""
42
-
43
- userDesc = "" if not userDesc else "I have described the dataset as follows: " + userDesc
44
- fileSummary = FileSummaryHelper(df)
45
-
46
- prompt = f""" You are given a DataFrame `df` with columns: {', '.join(df.columns.tolist())}
47
- {fileSummary}
48
- {userDesc}
49
-
50
- Qualitatively describe the dataset in 2-3 concise sentences. Your response must only include the description with no explanations before or after."""
51
-
52
- messages = [
53
- {"role": "system", "content": \
54
- "detailed thinking off. You are an insightful Data Analyst."},
55
- {"role": "user","content":prompt}
56
- ]
57
-
58
- response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
59
-
60
- return response
61
-
62
- def AnlaysisQuestionAgent(summary:str):
63
-
64
- messages = [
65
- {"role": "system", "content": \
66
- """detailed thinking off. You are an inquisitive Data Analyst.
67
- Given the following summary of a dataset, create a list of 3 analytical questions, following these rules:
68
-
69
- Rules
70
- -----
71
- 1. The questions must be answerable through simple Pandas operations with only the given data.
72
- 2. Your response must only include the three questions in a numbered list. Do not include explanations or caveats before or after.
73
- 3. Ensure the output list is formated: 1. question1, 2. question2, 3. question3
74
- """},
75
- {"role":"user","content":summary}
76
- ]
77
-
78
- response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
79
-
80
- parts = re.split(r'\d+\.\s*', response)
81
-
82
- result = [p.strip() for p in parts if p]
83
-
84
- return result
85
-
86
- def CodeGeneratorTool(cols: List[str], query: str) -> str:
87
- """Generate a prompt for the LLM to write pandas-only code for a data query (no plotting)."""
88
-
89
- return f"""
90
- Given DataFrame `df` with columns: {', '.join(cols)}
91
- Write Python code (pandas **only**, no plotting) to answer:
92
- "{query}"
93
-
94
- Rules
95
- -----
96
- 1. Use pandas operations on `df` only.
97
- 2. Assign the final result to `result`.
98
- 3. Wrap the snippet in a single ```python code fence (no extra prose).
99
- """
100
-
101
- def CodeExecutionHelper(code: str, df: pd.DataFrame):
102
- """Executes the generated code, returning the result or error"""
103
-
104
- env = {"pd": pd, "df": df}
105
- try:
106
- exec(code, {}, env)
107
- return env.get("result", None)
108
- except Exception as exc:
109
- return f"Error executing code: {exc}"
110
-
111
- def CodeExtractorHelper(text: str) -> str:
112
- """Extracts the first python code block from the output"""
113
-
114
- start = text.find("```python")
115
- if start == -1:
116
- return ""
117
- start += len("```python")
118
- end = text.find("```", start)
119
- if end == -1:
120
- return ""
121
- return text[start:end].strip()
122
-
123
- def ToolSelectorAgent(query: str, df: pd.DataFrame):
124
- """Selects the appropriate tool for the users query"""
125
-
126
- prompt = CodeGeneratorTool(df.columns.tolist(), query)
127
-
128
- messages = [
129
- {"role": "system", "content": \
130
- "detailed thinking off. You are a Python data-analysis expert who writes clean, efficient code. \
131
- Solve the given problem with optimal pandas operations. Be concise and focused. \
132
- Your response must contain ONLY a properly-closed ```python code block with no explanations before or after. \
133
- Ensure your solution is correct, handles edge cases, and follows best practices for data analysis."},
134
- {"role": "user", "content": prompt}
135
- ]
136
-
137
- response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
138
- return CodeExtractorHelper(response)
139
-
140
- def ReasoningPromptGenerator(query: str, result: Any) -> str:
141
- """Packages the output into a response, provinding reasoning about the result."""
142
-
143
- isError = isinstance(result, str) and result.startswith("Error executing code")
144
-
145
- if isError:
146
- desc = result
147
- else:
148
- desc = str(result)[:300] #why slice it
149
-
150
- prompt = f"""
151
- The user asked: "{query}".
152
- The result value is: {desc}
153
- Explain in 2-3 concise sentences what this tells about the data (no mention of charts)."""
154
- return prompt
155
-
156
- def ReasoningAgent(query: str, result: Any):
157
- """Executes the reasoning prompt and returns the results and explination to the user"""
158
-
159
- prompt = ReasoningPromptGenerator(query, result)
160
- isError = isinstance(result, str) and result.startswith("Error executing code")
161
-
162
- messages = [
163
- {"role": "system", "content": \
164
- "detailed thinking on. You are an insightful data analyst"},
165
- {"role": "user","content": prompt}
166
-
167
- ]
168
-
169
- response = pipe(messages, temperature = 0.2, max_new_tokens = 1024, return_full_text=False)[0]['generated_text']
170
- if "</think>" in response:
171
- splitResponse = response.split("</think>",1)
172
- response = splitResponse[1]
173
- thinking = splitResponse[0]
174
- return response, thinking
175
-
176
- def ResponseBuilderTool(question:str)->str:
177
- code = ToolSelectorAgent(question, st.session_state.df)
178
- result = CodeExecutionHelper(code, st.session_state.df)
179
- reasoning_txt, raw_thinking = ReasoningAgent(question, result)
180
- reasoning_txt = reasoning_txt.replace("`", "")
181
-
182
- # Build assistant response
183
-
184
- if isinstance(result, (pd.DataFrame, pd.Series)):
185
- header = f"Result: {len(result)} rows" if isinstance(result, pd.DataFrame) else "Result series"
186
- else:
187
- header = f"Result: {result}"
188
-
189
- # Show only reasoning thinking in Model Thinking (collapsed by default)
190
- thinking_html = ""
191
- if raw_thinking:
192
- thinking_html = (
193
- '<details class="thinking">'
194
- '<summary>🧠 Reasoning</summary>'
195
- f'<pre>{raw_thinking}</pre>'
196
- '</details>'
197
- )
198
-
199
- # Code accordion with proper HTML <pre><code> syntax highlighting
200
- code_html = (
201
- '<details class="code">'
202
- '<summary>View code</summary>'
203
- '<pre><code class="language-python">'
204
- f'{code}'
205
- '</code></pre>'
206
- '</details>'
207
- )
208
-
209
- # Combine thinking, explanation, and code accordion
210
- return f"{header}\n\n{thinking_html}{reasoning_txt}\n\n{code_html}"
211
-
212
-
213
  def main():
214
  """Streamlit App"""
215
 
216
  st.set_page_config(layout="wide")
217
  st.title("Analytics Agent")
218
 
219
- file = st.file_uploader("Choose CSV", type=["csv"])
220
-
221
- if file:
222
- if("df" not in st.session_state) or (st.session_state.get("current_file") != file.name):
223
- st.session_state.df = pd.read_csv(file)
224
- st.session_state.current_file = file.name
225
- with st.spinner("Summarizing..."):
226
- st.session_state.file_summary = FileDescriptionAgent("",st.session_state.df)
227
- st.markdown("### Data Summary:")
228
- st.text(st.session_state.file_summary)
229
-
230
- pygApp = get_pyg_renderer(st.session_state.df)
231
- pygApp.explorer(default_tab="data")
232
-
233
- st.markdown(
234
- """
235
- <style>
236
- section[data-testid="stSidebar"] {
237
- width: 500px !important; # Set the width to your desired value
238
- }
239
- </style>
240
- """,
241
- unsafe_allow_html=True,
242
- )
243
-
244
- with st.sidebar:
245
- st.markdown("## Analysis Discussion:")
246
-
247
- if("first_question" not in st.session_state):
248
- st.session_state.first_question = ""
249
-
250
- if("num_question_asked" not in st.session_state):
251
- st.session_state.num_question_asked = 0
252
-
253
- if("messages" not in st.session_state):
254
- st.session_state.messages = []
255
-
256
- if st.session_state.num_question_asked == 0:
257
- with st.spinner("Preparing Anlaysis..."):
258
- if("analsyis_questions" not in st.session_state):
259
- st.session_state.analsyis_questions = AnlaysisQuestionAgent(st.session_state.file_summary)
260
-
261
- with st.container():
262
- if q1:= st.button(st.session_state.analsyis_questions[0]):
263
- st.session_state.first_question = st.session_state.analsyis_questions[0]
264
- if q2:= st.button(st.session_state.analsyis_questions[1]):
265
- st.session_state.first_question = st.session_state.analsyis_questions[1]
266
- if q3:= st.button(st.session_state.analsyis_questions[2]):
267
- st.session_state.first_question = st.session_state.analsyis_questions[2]
268
-
269
- chat = st.chat_input("Something else...")
270
- if chat:
271
- st.session_state.first_question = chat
272
-
273
- st.session_state.num_question_asked += 1 if(q1 or q2 or q3 or chat is not None) else 0
274
- if st.session_state.num_question_asked == 1:
275
- st.session_state.messages.append({"role": "user", "content": st.session_state.first_question})
276
- st.rerun()
277
-
278
- elif st.session_state.num_question_asked == 1:
279
- with st.container():
280
- for msg in st.session_state.messages:
281
- with st.chat_message(msg["role"]):
282
- st.markdown(msg["content"], unsafe_allow_html=True)
283
- with st.spinner("Working …"):
284
- st.session_state.messages.append({
285
- "role": "assistant",
286
- "content": ResponseBuilderTool(st.session_state.first_question)
287
- })
288
- st.session_state.num_question_asked += 1
289
- st.rerun()
290
-
291
- else:
292
- with st.container():
293
- for msg in st.session_state.messages:
294
- with st.chat_message(msg["role"]):
295
- st.markdown(msg["content"], unsafe_allow_html=True)
296
- if user_q := st.chat_input("Ask about your data…"):
297
- st.session_state.messages.append({"role": "user", "content": user_q})
298
- with st.spinner("Working …"):
299
- st.session_state.messages.append({
300
- "role": "assistant",
301
- "content": ResponseBuilderTool(user_q)
302
- })
303
- st.session_state.num_question_asked += 1
304
- st.rerun()
305
-
306
  if __name__ == "__main__":
307
  main()
308
 
 
5
  import re
6
  from typing import List, Any
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  def main():
9
  """Streamlit App"""
10
 
11
  st.set_page_config(layout="wide")
12
  st.title("Analytics Agent")
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  if __name__ == "__main__":
15
  main()
16