cryogenic22 commited on
Commit
2b2cad6
·
verified ·
1 Parent(s): 2a4697e

Update agents/analytics_agent.py

Browse files
Files changed (1) hide show
  1. agents/analytics_agent.py +203 -91
agents/analytics_agent.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import json
3
  import pandas as pd
4
  import numpy as np
 
5
  from typing import Dict, List, Any, Tuple, Optional
6
  from pydantic import BaseModel, Field
7
  from langchain_anthropic import ChatAnthropic
@@ -11,6 +12,11 @@ import re
11
  import matplotlib.pyplot as plt
12
  import seaborn as sns
13
  from io import StringIO
 
 
 
 
 
14
 
15
  class AnalysisRequest(BaseModel):
16
  """Structure for an analysis request"""
@@ -125,6 +131,8 @@ Please generate Python code to perform this analysis.
125
 
126
  # In-memory storage for analysis artifacts
127
  self.analysis_artifacts = {}
 
 
128
 
129
  def extract_python_from_response(self, response: str) -> str:
130
  """Extract Python code from LLM response"""
@@ -139,6 +147,7 @@ Please generate Python code to perform this analysis.
139
  return python_match.group(1).strip()
140
 
141
  # If all else fails, return empty string
 
142
  return ""
143
 
144
  def extract_insights_from_code_output(self, output: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], Dict[str, float], float]:
@@ -149,104 +158,207 @@ Please generate Python code to perform this analysis.
149
 
150
  return insights, attribution, confidence
151
 
152
- def perform_analysis(self, request: AnalysisRequest, data_sources: Dict[str, Any]) -> AnalysisResult:
153
- """Perform analysis based on request and return results"""
154
- print(f"Analytics Agent: Performing {request.analysis_type} analysis - {request.description}")
155
-
156
- # Format data sources description for the prompt
157
- data_sources_desc = ""
158
- for source_id, source in data_sources.items():
159
- df = source.content
160
- data_sources_desc += f"Data source '{source_id}' ({source.name}):\n"
161
- data_sources_desc += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
162
- data_sources_desc += f"- Columns: {', '.join(df.columns)}\n"
163
- data_sources_desc += f"- Sample data:\n{df.head(3).to_string()}\n\n"
164
 
165
- # Format the request for the prompt
166
- request_data = {
167
- "description": request.description,
168
- "data_sources": data_sources_desc,
169
- "analysis_type": request.analysis_type,
170
- "parameters": json.dumps(request.parameters, indent=2) if request.parameters else "None",
171
- "purpose": request.purpose
172
- }
173
 
174
- # Generate analysis code
175
- response = self.analysis_chain.invoke(request_data)
176
 
177
- # Extract Python code
178
- python_code = self.extract_python_from_response(response)
179
 
180
- # Execute analysis (with safety checks)
181
- insights = []
182
- attribution = {}
183
- confidence = 0.0
184
- visualizations = []
185
- metrics = {}
 
 
 
 
 
186
 
187
- if not python_code:
188
- print("Warning: No analysis code generated.")
189
- else:
190
- try:
191
- # Prepare data sources for the analysis
192
- analysis_data_sources = {src_id: src.content for src_id, src in data_sources.items()}
193
-
194
- # Create a local namespace with access to pandas, numpy, etc.
195
- local_namespace = {
196
- "pd": pd,
197
- "np": np,
198
- "plt": plt,
199
- "sns": sns,
200
- "data_sources": analysis_data_sources
201
- }
202
 
203
- # Capture print outputs
204
- original_stdout = sys.stdout
205
- sys.stdout = mystdout = StringIO()
206
-
207
- # Execute the code
208
- exec(python_code, local_namespace)
209
-
210
- # Restore stdout
211
- sys.stdout = original_stdout
212
- print_output = mystdout.getvalue()
213
-
214
- # Look for a run_analysis function and execute it
215
- if "run_analysis" in local_namespace:
216
- analysis_output = local_namespace["run_analysis"](analysis_data_sources)
217
- if isinstance(analysis_output, dict):
218
- insights = analysis_output.get("insights", [])
219
- attribution = analysis_output.get("attribution", {})
220
- confidence = analysis_output.get("confidence", 0.0)
221
- metrics = analysis_output.get("metrics", {})
222
- visualizations = analysis_output.get("visualizations", [])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
- # Store any figures in the local namespace as base64 encoded images
225
- for var_name, var_value in local_namespace.items():
226
- if isinstance(var_value, plt.Figure):
227
- fig_filename = f"figure_{request.request_id}_{var_name}.png"
228
- var_value.savefig(fig_filename)
229
- self.analysis_artifacts[fig_filename] = fig_filename
230
- visualizations.append(fig_filename)
231
-
232
- except Exception as e:
233
- print(f"Analysis execution error: {e}")
234
-
235
- # Create analysis result
236
- result = AnalysisResult(
237
- result_id=f"analysis_{request.request_id}",
238
- name=f"Analysis of {request.description}",
239
- description=request.description,
240
- analysis_type=request.analysis_type,
241
- code=python_code,
242
- visualizations=visualizations,
243
- insights=insights,
244
- metrics=metrics,
245
- attribution=attribution,
246
- confidence=confidence
247
- )
248
-
249
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
  # For testing
252
  if __name__ == "__main__":
 
2
  import json
3
  import pandas as pd
4
  import numpy as np
5
+ import sys
6
  from typing import Dict, List, Any, Tuple, Optional
7
  from pydantic import BaseModel, Field
8
  from langchain_anthropic import ChatAnthropic
 
12
  import matplotlib.pyplot as plt
13
  import seaborn as sns
14
  from io import StringIO
15
+ import logging
16
+
17
+ # Set up logging
18
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
19
+ logger = logging.getLogger("analytics_agent")
20
 
21
  class AnalysisRequest(BaseModel):
22
  """Structure for an analysis request"""
 
131
 
132
  # In-memory storage for analysis artifacts
133
  self.analysis_artifacts = {}
134
+
135
+ logger.info("Analytics Agent initialized successfully")
136
 
137
  def extract_python_from_response(self, response: str) -> str:
138
  """Extract Python code from LLM response"""
 
147
  return python_match.group(1).strip()
148
 
149
  # If all else fails, return empty string
150
+ logger.warning("No code block found in response")
151
  return ""
152
 
153
  def extract_insights_from_code_output(self, output: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], Dict[str, float], float]:
 
158
 
159
  return insights, attribution, confidence
160
 
161
+ def generate_default_analysis(self, request: AnalysisRequest, data_sources: Dict[str, Any]) -> Dict[str, Any]:
162
+ """Generate a default analysis output when code execution fails"""
163
+ logger.info(f"Generating default analysis for {request.description}")
 
 
 
 
 
 
 
 
 
164
 
165
+ # Create default insights based on request description
166
+ insights = [
167
+ {
168
+ "finding": f"Analysis of {request.description}",
169
+ "details": "Default analysis created due to execution issues",
170
+ "impact": "Recommend manual investigation"
171
+ }
172
+ ]
173
 
174
+ # Create default attribution
175
+ attribution = {"unknown_factors": 1.0}
176
 
177
+ # Default metrics
178
+ metrics = {"analysis_completion": 0.0}
179
 
180
+ return {
181
+ "insights": insights,
182
+ "attribution": attribution,
183
+ "metrics": metrics,
184
+ "visualizations": [],
185
+ "confidence": 0.5
186
+ }
187
+
188
+ def perform_analysis(self, request: AnalysisRequest, data_sources: Dict[str, Any]) -> AnalysisResult:
189
+ """Perform analysis based on request and return results"""
190
+ logger.info(f"Analytics Agent: Performing {request.analysis_type} analysis - {request.description}")
191
 
192
+ try:
193
+ # Format data sources description for the prompt
194
+ data_sources_desc = ""
195
+ for source_id, source in data_sources.items():
196
+ if not hasattr(source, 'content') or source.content is None:
197
+ logger.warning(f"Data source {source_id} has no content attribute or content is None")
198
+ continue
 
 
 
 
 
 
 
 
199
 
200
+ df = source.content
201
+ data_sources_desc += f"Data source '{source_id}' ({source.name}):\n"
202
+ data_sources_desc += f"- Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
203
+ data_sources_desc += f"- Columns: {', '.join(df.columns)}\n"
204
+ data_sources_desc += f"- Sample data:\n{df.head(3).to_string()}\n\n"
205
+
206
+ # Format the request for the prompt
207
+ request_data = {
208
+ "description": request.description,
209
+ "data_sources": data_sources_desc,
210
+ "analysis_type": request.analysis_type,
211
+ "parameters": json.dumps(request.parameters, indent=2) if request.parameters else "None",
212
+ "purpose": request.purpose
213
+ }
214
+
215
+ # Generate analysis code
216
+ logger.info("Generating analysis code")
217
+ response = self.analysis_chain.invoke(request_data)
218
+
219
+ # Extract Python code
220
+ python_code = self.extract_python_from_response(response)
221
+
222
+ # Initialize default values
223
+ insights = []
224
+ attribution = {}
225
+ confidence = 0.0
226
+ visualizations = []
227
+ metrics = {}
228
+
229
+ if not python_code:
230
+ logger.warning("No analysis code generated. Using default analysis.")
231
+ default_analysis = self.generate_default_analysis(request, data_sources)
232
+ insights = default_analysis["insights"]
233
+ attribution = default_analysis["attribution"]
234
+ confidence = default_analysis["confidence"]
235
+ metrics = default_analysis["metrics"]
236
+ else:
237
+ try:
238
+ # Prepare data sources for the analysis
239
+ analysis_data_sources = {}
240
+ for src_id, src in data_sources.items():
241
+ if hasattr(src, 'content') and src.content is not None:
242
+ analysis_data_sources[src_id] = src.content
243
+
244
+ if not analysis_data_sources:
245
+ logger.warning("No valid data sources found for analysis")
246
+ default_analysis = self.generate_default_analysis(request, data_sources)
247
+ insights = default_analysis["insights"]
248
+ attribution = default_analysis["attribution"]
249
+ confidence = default_analysis["confidence"]
250
+ metrics = default_analysis["metrics"]
251
+ else:
252
+ # Create a local namespace with access to pandas, numpy, etc.
253
+ local_namespace = {
254
+ "pd": pd,
255
+ "np": np,
256
+ "plt": plt,
257
+ "sns": sns,
258
+ "data_sources": analysis_data_sources
259
+ }
260
 
261
+ # Capture print outputs
262
+ stdout_backup = sys.stdout
263
+ sys.stdout = mystdout = StringIO()
264
+
265
+ # Execute the code
266
+ logger.info("Executing analysis code")
267
+ exec(python_code, local_namespace)
268
+
269
+ # Restore stdout
270
+ sys.stdout = stdout_backup
271
+ print_output = mystdout.getvalue()
272
+ logger.debug(f"Code execution output: {print_output}")
273
+
274
+ # Look for a run_analysis function and execute it
275
+ if "run_analysis" in local_namespace:
276
+ logger.info("Running analysis function")
277
+ analysis_output = local_namespace["run_analysis"](analysis_data_sources)
278
+
279
+ if isinstance(analysis_output, dict):
280
+ insights = analysis_output.get("insights", [])
281
+ attribution = analysis_output.get("attribution", {})
282
+ confidence = analysis_output.get("confidence", 0.0)
283
+ metrics = analysis_output.get("metrics", {})
284
+ visualizations = analysis_output.get("visualizations", [])
285
+
286
+ # Store any figures in the local namespace as base64 encoded images
287
+ for var_name, var_value in local_namespace.items():
288
+ if isinstance(var_value, plt.Figure):
289
+ fig_filename = f"figure_{request.request_id}_{var_name}.png"
290
+ var_value.savefig(fig_filename)
291
+ self.analysis_artifacts[fig_filename] = fig_filename
292
+ visualizations.append(fig_filename)
293
+ else:
294
+ logger.warning(f"run_analysis returned non-dict type: {type(analysis_output)}")
295
+ default_analysis = self.generate_default_analysis(request, data_sources)
296
+ insights = default_analysis["insights"]
297
+ attribution = default_analysis["attribution"]
298
+ confidence = default_analysis["confidence"]
299
+ metrics = default_analysis["metrics"]
300
+ else:
301
+ logger.warning("No run_analysis function found in generated code")
302
+ # Generate a minimal default analysis
303
+ default_analysis = self.generate_default_analysis(request, data_sources)
304
+ insights = default_analysis["insights"]
305
+ attribution = default_analysis["attribution"]
306
+ confidence = default_analysis["confidence"]
307
+ metrics = default_analysis["metrics"]
308
+
309
+ except Exception as e:
310
+ logger.error(f"Analysis execution error: {e}", exc_info=True)
311
+ logger.error(f"Python code that failed: {python_code}")
312
+
313
+ # Generate a minimal default analysis on execution failure
314
+ default_analysis = self.generate_default_analysis(request, data_sources)
315
+ insights = default_analysis["insights"]
316
+ attribution = default_analysis["attribution"]
317
+ confidence = default_analysis["confidence"]
318
+ metrics = default_analysis["metrics"]
319
+
320
+ # Ensure we have at least one insight
321
+ if not insights:
322
+ insights = [{"finding": "No specific insights found", "details": "Analysis completed but no significant patterns were identified", "impact": "No immediate action required"}]
323
+
324
+ # Ensure we have attribution
325
+ if not attribution:
326
+ attribution = {"unattributed_factors": 1.0}
327
+
328
+ # Create analysis result
329
+ result = AnalysisResult(
330
+ result_id=f"analysis_{request.request_id}",
331
+ name=f"Analysis of {request.description}",
332
+ description=request.description,
333
+ analysis_type=request.analysis_type,
334
+ code=python_code,
335
+ visualizations=visualizations,
336
+ insights=insights,
337
+ metrics=metrics,
338
+ attribution=attribution,
339
+ confidence=confidence
340
+ )
341
+
342
+ logger.info(f"Analysis for {request.description} completed successfully")
343
+ return result
344
+
345
+ except Exception as e:
346
+ logger.error(f"Error in perform_analysis: {e}", exc_info=True)
347
+
348
+ # Create a fallback analysis result on error
349
+ default_analysis = self.generate_default_analysis(request, data_sources)
350
+
351
+ return AnalysisResult(
352
+ result_id=f"analysis_{request.request_id}",
353
+ name=f"Analysis of {request.description} (Error)",
354
+ description=request.description,
355
+ analysis_type=request.analysis_type,
356
+ code="# Error during analysis",
357
+ insights=default_analysis["insights"],
358
+ metrics=default_analysis["metrics"],
359
+ attribution=default_analysis["attribution"],
360
+ confidence=default_analysis["confidence"]
361
+ )
362
 
363
  # For testing
364
  if __name__ == "__main__":