Aka18 commited on
Commit
014c08f
Β·
verified Β·
1 Parent(s): 3038e0c

Update data_analysis_agent.py

Browse files
Files changed (1) hide show
  1. data_analysis_agent.py +143 -45
data_analysis_agent.py CHANGED
@@ -518,55 +518,153 @@ class DataAnalysisAgent:
518
  return state
519
 
520
  def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
521
- """Main method to analyze a dataset"""
522
- logger.info(f"Starting analysis of dataset: {dataset_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
 
524
  try:
525
- # Load dataset
526
- if dataset_path.endswith('.csv'):
527
- df = pd.read_csv(dataset_path)
528
- elif dataset_path.endswith(('.xlsx', '.xls')):
529
- df = pd.read_excel(dataset_path)
530
- elif dataset_path.endswith('.json'):
531
- df = pd.read_json(dataset_path)
 
 
 
 
 
 
 
532
  else:
533
- raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
534
-
535
- # Initialize state with all required fields
536
- initial_state = AnalysisState(
537
- dataset=df,
538
- dataset_info={},
539
- column_analysis={},
540
- insights=[],
541
- visualizations=[],
542
- recommendations=[],
543
- current_step="",
544
- error_messages=[]
545
- )
546
-
547
- # Run the workflow
548
- final_state = self.workflow.invoke(initial_state)
549
-
550
- # Prepare results
551
- results = {
552
- "dataset_info": final_state.get("dataset_info", {}),
553
- "column_analysis": final_state.get("column_analysis", {}),
554
- "insights": final_state.get("insights", []),
555
- "visualizations": final_state.get("visualizations", []),
556
- "recommendations": final_state.get("recommendations", []),
557
- "analysis_timestamp": datetime.now().isoformat(),
558
- "errors": final_state.get("error_messages", [])
559
- }
560
-
561
- # Generate summary report
562
- self._generate_report(results, dataset_path)
563
-
564
- logger.info("Analysis completed successfully!")
565
- return results
566
-
567
  except Exception as e:
568
- logger.error(f"Error in dataset analysis: {str(e)}")
569
- return {"error": str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
 
571
  def _generate_report(self, results: Dict[str, Any], dataset_path: str):
572
  """Generate a comprehensive analysis report"""
 
518
  return state
519
 
520
  def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
521
+ """Diagnostic version to identify the exact failure point"""
522
+ logger.info(f"πŸ” Starting analysis of dataset: {dataset_path}")
523
+
524
+ try:
525
+ # Load dataset
526
+ logger.info("πŸ“ Loading dataset...")
527
+ if dataset_path.endswith('.csv'):
528
+ df = pd.read_csv(dataset_path)
529
+ elif dataset_path.endswith(('.xlsx', '.xls')):
530
+ df = pd.read_excel(dataset_path)
531
+ elif dataset_path.endswith('.json'):
532
+ df = pd.read_json(dataset_path)
533
+ else:
534
+ raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
535
+
536
+ logger.info(f"βœ… Dataset loaded: {df.shape}")
537
+
538
+ # Initialize state with all required fields
539
+ initial_state = AnalysisState(
540
+ dataset=df,
541
+ dataset_info={},
542
+ column_analysis={},
543
+ insights=[],
544
+ visualizations=[],
545
+ recommendations=[],
546
+ current_step="",
547
+ error_messages=[]
548
+ )
549
+
550
+ logger.info("πŸš€ Starting workflow execution...")
551
+
552
+ # Test each step individually to find the failure point
553
+ try:
554
+ logger.info("πŸ“Š Step 1: Data profiling...")
555
+ state = self._profile_dataset(initial_state)
556
+ logger.info(f"βœ… Data profiling completed. Info keys: {list(state.get('dataset_info', {}).keys())}")
557
+ except Exception as e:
558
+ logger.error(f"❌ Data profiling failed: {str(e)}")
559
+ state = initial_state
560
+ state['error_messages'] = [f"Data profiling failed: {str(e)}"]
561
 
562
  try:
563
+ logger.info("πŸ” Step 2: Column analysis...")
564
+ state = self._analyze_columns(state)
565
+ logger.info(f"βœ… Column analysis completed. Columns analyzed: {len(state.get('column_analysis', {}))}")
566
+ except Exception as e:
567
+ logger.error(f"❌ Column analysis failed: {str(e)}")
568
+ state['error_messages'].append(f"Column analysis failed: {str(e)}")
569
+
570
+ try:
571
+ logger.info("πŸ’‘ Step 3: Generating insights...")
572
+ state = self._generate_insights(state)
573
+ insights_count = len(state.get('insights', []))
574
+ logger.info(f"βœ… Insights generation completed. Generated: {insights_count} insights")
575
+ if insights_count > 0:
576
+ logger.info(f"First insight: {state['insights'][0][:100]}...")
577
  else:
578
+ logger.warning("⚠️ No insights were generated!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
579
  except Exception as e:
580
+ logger.error(f"❌ Insights generation failed: {str(e)}")
581
+ state['error_messages'].append(f"Insights generation failed: {str(e)}")
582
+ # Add fallback insights
583
+ state['insights'] = [
584
+ "Basic dataset analysis completed",
585
+ f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns",
586
+ "Manual review recommended for detailed insights"
587
+ ]
588
+
589
+ try:
590
+ logger.info("πŸ“ˆ Step 4: Planning visualizations...")
591
+ state = self._plan_visualizations(state)
592
+ viz_count = len(state.get('visualizations', []))
593
+ logger.info(f"βœ… Visualization planning completed. Planned: {viz_count} visualizations")
594
+ except Exception as e:
595
+ logger.error(f"❌ Visualization planning failed: {str(e)}")
596
+ state['error_messages'].append(f"Visualization planning failed: {str(e)}")
597
+
598
+ try:
599
+ logger.info("🎨 Step 5: Creating charts...")
600
+ state = self._create_charts(state)
601
+ logger.info("βœ… Chart creation completed")
602
+ except Exception as e:
603
+ logger.error(f"❌ Chart creation failed: {str(e)}")
604
+ state['error_messages'].append(f"Chart creation failed: {str(e)}")
605
+
606
+ try:
607
+ logger.info("🎯 Step 6: Generating recommendations...")
608
+ state = self._generate_recommendations(state)
609
+ rec_count = len(state.get('recommendations', []))
610
+ logger.info(f"βœ… Recommendations generation completed. Generated: {rec_count} recommendations")
611
+ if rec_count > 0:
612
+ logger.info(f"First recommendation: {state['recommendations'][0][:100]}...")
613
+ else:
614
+ logger.warning("⚠️ No recommendations were generated!")
615
+ except Exception as e:
616
+ logger.error(f"❌ Recommendations generation failed: {str(e)}")
617
+ state['error_messages'].append(f"Recommendations generation failed: {str(e)}")
618
+ # Add fallback recommendations
619
+ state['recommendations'] = [
620
+ "Conduct detailed data quality assessment",
621
+ "Implement data monitoring processes",
622
+ "Consider advanced analytics for business insights",
623
+ "Review data collection and validation procedures"
624
+ ]
625
+
626
+ # Clean up temp file
627
+ if os.path.exists(dataset_path):
628
+ try:
629
+ os.remove(dataset_path)
630
+ logger.info("🧹 Temporary file cleaned up")
631
+ except:
632
+ pass
633
+
634
+ # Prepare results
635
+ results = {
636
+ "dataset_info": state.get("dataset_info", {}),
637
+ "column_analysis": state.get("column_analysis", {}),
638
+ "insights": state.get("insights", []),
639
+ "visualizations": state.get("visualizations", []),
640
+ "recommendations": state.get("recommendations", []),
641
+ "analysis_timestamp": datetime.now().isoformat(),
642
+ "errors": state.get("error_messages", [])
643
+ }
644
+
645
+ # Log final results
646
+ logger.info(f"πŸŽ‰ Analysis completed!")
647
+ logger.info(f"πŸ“Š Final results: {len(results['insights'])} insights, {len(results['recommendations'])} recommendations")
648
+ logger.info(f"❌ Errors encountered: {len(results['errors'])}")
649
+
650
+ for error in results['errors']:
651
+ logger.error(f"Error details: {error}")
652
+
653
+ return results
654
+
655
+ except Exception as e:
656
+ logger.error(f"πŸ’₯ Critical analysis failure: {str(e)}")
657
+ logger.error(traceback.format_exc())
658
+ return {
659
+ "error": str(e),
660
+ "dataset_info": {},
661
+ "insights": [f"Analysis failed: {str(e)}"],
662
+ "recommendations": ["Please check logs and try again"],
663
+ "visualizations": [],
664
+ "column_analysis": {},
665
+ "analysis_timestamp": datetime.now().isoformat(),
666
+ "errors": [str(e)]
667
+ }
668
 
669
  def _generate_report(self, results: Dict[str, Any], dataset_path: str):
670
  """Generate a comprehensive analysis report"""