Aka18 commited on
Commit
73ebf19
Β·
verified Β·
1 Parent(s): 014c08f

Update data_analysis_agent.py

Browse files
Files changed (1) hide show
  1. data_analysis_agent.py +148 -147
data_analysis_agent.py CHANGED
@@ -7,6 +7,7 @@ import plotly.express as px
7
  import plotly.graph_objects as go
8
  from plotly.subplots import make_subplots
9
  import warnings
 
10
  warnings.filterwarnings('ignore')
11
 
12
  from typing import Dict, List, Any, Optional, TypedDict
@@ -19,7 +20,6 @@ from langgraph.graph import StateGraph, END
19
  from langchain_groq import ChatGroq
20
  from langchain_core.messages import HumanMessage, SystemMessage
21
  from langchain_core.prompts import ChatPromptTemplate
22
- os.environ['GROQ_TIMEOUT'] = '60'
23
 
24
  # Configure logging
25
  logging.basicConfig(level=logging.INFO)
@@ -39,12 +39,13 @@ class AnalysisState(TypedDict):
39
  class DataAnalysisAgent:
40
  def __init__(self, groq_api_key: str, model_name: str = "llama3-70b-8192"):
41
  """Initialize the Data Analysis Agent"""
42
- # Fixed: Use correct model name format
43
  self.llm = ChatGroq(
44
  groq_api_key=groq_api_key,
45
- model_name=model_name, # Fixed: Use standard model names
46
  temperature=0.1,
47
- max_tokens=2000
 
48
  )
49
 
50
  # Set up the analysis workflow graph
@@ -518,153 +519,153 @@ class DataAnalysisAgent:
518
  return state
519
 
520
  def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
521
- """Diagnostic version to identify the exact failure point"""
522
- logger.info(f"πŸ” Starting analysis of dataset: {dataset_path}")
523
-
524
- try:
525
- # Load dataset
526
- logger.info("πŸ“ Loading dataset...")
527
- if dataset_path.endswith('.csv'):
528
- df = pd.read_csv(dataset_path)
529
- elif dataset_path.endswith(('.xlsx', '.xls')):
530
- df = pd.read_excel(dataset_path)
531
- elif dataset_path.endswith('.json'):
532
- df = pd.read_json(dataset_path)
533
- else:
534
- raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
535
-
536
- logger.info(f"βœ… Dataset loaded: {df.shape}")
537
 
538
- # Initialize state with all required fields
539
- initial_state = AnalysisState(
540
- dataset=df,
541
- dataset_info={},
542
- column_analysis={},
543
- insights=[],
544
- visualizations=[],
545
- recommendations=[],
546
- current_step="",
547
- error_messages=[]
548
- )
549
-
550
- logger.info("πŸš€ Starting workflow execution...")
551
-
552
- # Test each step individually to find the failure point
553
  try:
554
- logger.info("πŸ“Š Step 1: Data profiling...")
555
- state = self._profile_dataset(initial_state)
556
- logger.info(f"βœ… Data profiling completed. Info keys: {list(state.get('dataset_info', {}).keys())}")
557
- except Exception as e:
558
- logger.error(f"❌ Data profiling failed: {str(e)}")
559
- state = initial_state
560
- state['error_messages'] = [f"Data profiling failed: {str(e)}"]
561
-
562
- try:
563
- logger.info("πŸ” Step 2: Column analysis...")
564
- state = self._analyze_columns(state)
565
- logger.info(f"βœ… Column analysis completed. Columns analyzed: {len(state.get('column_analysis', {}))}")
566
- except Exception as e:
567
- logger.error(f"❌ Column analysis failed: {str(e)}")
568
- state['error_messages'].append(f"Column analysis failed: {str(e)}")
569
-
570
- try:
571
- logger.info("πŸ’‘ Step 3: Generating insights...")
572
- state = self._generate_insights(state)
573
- insights_count = len(state.get('insights', []))
574
- logger.info(f"βœ… Insights generation completed. Generated: {insights_count} insights")
575
- if insights_count > 0:
576
- logger.info(f"First insight: {state['insights'][0][:100]}...")
577
  else:
578
- logger.warning("⚠️ No insights were generated!")
579
- except Exception as e:
580
- logger.error(f"❌ Insights generation failed: {str(e)}")
581
- state['error_messages'].append(f"Insights generation failed: {str(e)}")
582
- # Add fallback insights
583
- state['insights'] = [
584
- "Basic dataset analysis completed",
585
- f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns",
586
- "Manual review recommended for detailed insights"
587
- ]
588
-
589
- try:
590
- logger.info("πŸ“ˆ Step 4: Planning visualizations...")
591
- state = self._plan_visualizations(state)
592
- viz_count = len(state.get('visualizations', []))
593
- logger.info(f"βœ… Visualization planning completed. Planned: {viz_count} visualizations")
594
- except Exception as e:
595
- logger.error(f"❌ Visualization planning failed: {str(e)}")
596
- state['error_messages'].append(f"Visualization planning failed: {str(e)}")
597
-
598
- try:
599
- logger.info("🎨 Step 5: Creating charts...")
600
- state = self._create_charts(state)
601
- logger.info("βœ… Chart creation completed")
602
- except Exception as e:
603
- logger.error(f"❌ Chart creation failed: {str(e)}")
604
- state['error_messages'].append(f"Chart creation failed: {str(e)}")
605
-
606
- try:
607
- logger.info("🎯 Step 6: Generating recommendations...")
608
- state = self._generate_recommendations(state)
609
- rec_count = len(state.get('recommendations', []))
610
- logger.info(f"βœ… Recommendations generation completed. Generated: {rec_count} recommendations")
611
- if rec_count > 0:
612
- logger.info(f"First recommendation: {state['recommendations'][0][:100]}...")
613
- else:
614
- logger.warning("⚠️ No recommendations were generated!")
615
- except Exception as e:
616
- logger.error(f"❌ Recommendations generation failed: {str(e)}")
617
- state['error_messages'].append(f"Recommendations generation failed: {str(e)}")
618
- # Add fallback recommendations
619
- state['recommendations'] = [
620
- "Conduct detailed data quality assessment",
621
- "Implement data monitoring processes",
622
- "Consider advanced analytics for business insights",
623
- "Review data collection and validation procedures"
624
- ]
625
-
626
- # Clean up temp file
627
- if os.path.exists(dataset_path):
628
  try:
629
- os.remove(dataset_path)
630
- logger.info("🧹 Temporary file cleaned up")
631
- except:
632
- pass
633
-
634
- # Prepare results
635
- results = {
636
- "dataset_info": state.get("dataset_info", {}),
637
- "column_analysis": state.get("column_analysis", {}),
638
- "insights": state.get("insights", []),
639
- "visualizations": state.get("visualizations", []),
640
- "recommendations": state.get("recommendations", []),
641
- "analysis_timestamp": datetime.now().isoformat(),
642
- "errors": state.get("error_messages", [])
643
- }
644
-
645
- # Log final results
646
- logger.info(f"πŸŽ‰ Analysis completed!")
647
- logger.info(f"πŸ“Š Final results: {len(results['insights'])} insights, {len(results['recommendations'])} recommendations")
648
- logger.info(f"❌ Errors encountered: {len(results['errors'])}")
649
-
650
- for error in results['errors']:
651
- logger.error(f"Error details: {error}")
652
-
653
- return results
654
-
655
- except Exception as e:
656
- logger.error(f"πŸ’₯ Critical analysis failure: {str(e)}")
657
- logger.error(traceback.format_exc())
658
- return {
659
- "error": str(e),
660
- "dataset_info": {},
661
- "insights": [f"Analysis failed: {str(e)}"],
662
- "recommendations": ["Please check logs and try again"],
663
- "visualizations": [],
664
- "column_analysis": {},
665
- "analysis_timestamp": datetime.now().isoformat(),
666
- "errors": [str(e)]
667
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
 
669
  def _generate_report(self, results: Dict[str, Any], dataset_path: str):
670
  """Generate a comprehensive analysis report"""
 
7
  import plotly.graph_objects as go
8
  from plotly.subplots import make_subplots
9
  import warnings
10
+ import traceback
11
  warnings.filterwarnings('ignore')
12
 
13
  from typing import Dict, List, Any, Optional, TypedDict
 
20
  from langchain_groq import ChatGroq
21
  from langchain_core.messages import HumanMessage, SystemMessage
22
  from langchain_core.prompts import ChatPromptTemplate
 
23
 
24
  # Configure logging
25
  logging.basicConfig(level=logging.INFO)
 
39
  class DataAnalysisAgent:
40
  def __init__(self, groq_api_key: str, model_name: str = "llama3-70b-8192"):
41
  """Initialize the Data Analysis Agent"""
42
+ # Fixed: Use correct model name format and add timeout
43
  self.llm = ChatGroq(
44
  groq_api_key=groq_api_key,
45
+ model_name=model_name,
46
  temperature=0.1,
47
+ max_tokens=2000,
48
+ timeout=60 # Added timeout for container environments
49
  )
50
 
51
  # Set up the analysis workflow graph
 
519
  return state
520
 
521
  def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
522
+ """DIAGNOSTIC VERSION: Main method to analyze a dataset with detailed logging"""
523
+ logger.info(f"πŸ” Starting analysis of dataset: {dataset_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  try:
526
+ # Load dataset
527
+ logger.info("πŸ“ Loading dataset...")
528
+ if dataset_path.endswith('.csv'):
529
+ df = pd.read_csv(dataset_path)
530
+ elif dataset_path.endswith(('.xlsx', '.xls')):
531
+ df = pd.read_excel(dataset_path)
532
+ elif dataset_path.endswith('.json'):
533
+ df = pd.read_json(dataset_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  else:
535
+ raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
536
+
537
+ logger.info(f"βœ… Dataset loaded: {df.shape}")
538
+
539
+ # Initialize state with all required fields
540
+ initial_state = AnalysisState(
541
+ dataset=df,
542
+ dataset_info={},
543
+ column_analysis={},
544
+ insights=[],
545
+ visualizations=[],
546
+ recommendations=[],
547
+ current_step="",
548
+ error_messages=[]
549
+ )
550
+
551
+ logger.info("πŸš€ Starting workflow execution...")
552
+
553
+ # Test each step individually to find the failure point
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
554
  try:
555
+ logger.info("πŸ“Š Step 1: Data profiling...")
556
+ state = self._profile_dataset(initial_state)
557
+ logger.info(f"βœ… Data profiling completed. Info keys: {list(state.get('dataset_info', {}).keys())}")
558
+ except Exception as e:
559
+ logger.error(f"❌ Data profiling failed: {str(e)}")
560
+ state = initial_state
561
+ state['error_messages'] = [f"Data profiling failed: {str(e)}"]
562
+
563
+ try:
564
+ logger.info("πŸ” Step 2: Column analysis...")
565
+ state = self._analyze_columns(state)
566
+ logger.info(f"βœ… Column analysis completed. Columns analyzed: {len(state.get('column_analysis', {}))}")
567
+ except Exception as e:
568
+ logger.error(f"❌ Column analysis failed: {str(e)}")
569
+ state['error_messages'].append(f"Column analysis failed: {str(e)}")
570
+
571
+ try:
572
+ logger.info("πŸ’‘ Step 3: Generating insights...")
573
+ state = self._generate_insights(state)
574
+ insights_count = len(state.get('insights', []))
575
+ logger.info(f"βœ… Insights generation completed. Generated: {insights_count} insights")
576
+ if insights_count > 0:
577
+ logger.info(f"First insight: {state['insights'][0][:100]}...")
578
+ else:
579
+ logger.warning("⚠️ No insights were generated!")
580
+ except Exception as e:
581
+ logger.error(f"❌ Insights generation failed: {str(e)}")
582
+ state['error_messages'].append(f"Insights generation failed: {str(e)}")
583
+ # Add fallback insights
584
+ state['insights'] = [
585
+ "Basic dataset analysis completed",
586
+ f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns",
587
+ "Manual review recommended for detailed insights"
588
+ ]
589
+
590
+ try:
591
+ logger.info("πŸ“ˆ Step 4: Planning visualizations...")
592
+ state = self._plan_visualizations(state)
593
+ viz_count = len(state.get('visualizations', []))
594
+ logger.info(f"βœ… Visualization planning completed. Planned: {viz_count} visualizations")
595
+ except Exception as e:
596
+ logger.error(f"❌ Visualization planning failed: {str(e)}")
597
+ state['error_messages'].append(f"Visualization planning failed: {str(e)}")
598
+
599
+ try:
600
+ logger.info("🎨 Step 5: Creating charts...")
601
+ state = self._create_charts(state)
602
+ logger.info("βœ… Chart creation completed")
603
+ except Exception as e:
604
+ logger.error(f"❌ Chart creation failed: {str(e)}")
605
+ state['error_messages'].append(f"Chart creation failed: {str(e)}")
606
+
607
+ try:
608
+ logger.info("🎯 Step 6: Generating recommendations...")
609
+ state = self._generate_recommendations(state)
610
+ rec_count = len(state.get('recommendations', []))
611
+ logger.info(f"βœ… Recommendations generation completed. Generated: {rec_count} recommendations")
612
+ if rec_count > 0:
613
+ logger.info(f"First recommendation: {state['recommendations'][0][:100]}...")
614
+ else:
615
+ logger.warning("⚠️ No recommendations were generated!")
616
+ except Exception as e:
617
+ logger.error(f"❌ Recommendations generation failed: {str(e)}")
618
+ state['error_messages'].append(f"Recommendations generation failed: {str(e)}")
619
+ # Add fallback recommendations
620
+ state['recommendations'] = [
621
+ "Conduct detailed data quality assessment",
622
+ "Implement data monitoring processes",
623
+ "Consider advanced analytics for business insights",
624
+ "Review data collection and validation procedures"
625
+ ]
626
+
627
+ # Clean up temp file
628
+ if os.path.exists(dataset_path):
629
+ try:
630
+ os.remove(dataset_path)
631
+ logger.info("🧹 Temporary file cleaned up")
632
+ except:
633
+ pass
634
+
635
+ # Prepare results
636
+ results = {
637
+ "dataset_info": state.get("dataset_info", {}),
638
+ "column_analysis": state.get("column_analysis", {}),
639
+ "insights": state.get("insights", []),
640
+ "visualizations": state.get("visualizations", []),
641
+ "recommendations": state.get("recommendations", []),
642
+ "analysis_timestamp": datetime.now().isoformat(),
643
+ "errors": state.get("error_messages", [])
644
+ }
645
+
646
+ # Log final results
647
+ logger.info(f"πŸŽ‰ Analysis completed!")
648
+ logger.info(f"πŸ“Š Final results: {len(results['insights'])} insights, {len(results['recommendations'])} recommendations")
649
+ logger.info(f"❌ Errors encountered: {len(results['errors'])}")
650
+
651
+ for error in results['errors']:
652
+ logger.error(f"Error details: {error}")
653
+
654
+ return results
655
+
656
+ except Exception as e:
657
+ logger.error(f"πŸ’₯ Critical analysis failure: {str(e)}")
658
+ logger.error(traceback.format_exc())
659
+ return {
660
+ "error": str(e),
661
+ "dataset_info": {},
662
+ "insights": [f"Analysis failed: {str(e)}"],
663
+ "recommendations": ["Please check logs and try again"],
664
+ "visualizations": [],
665
+ "column_analysis": {},
666
+ "analysis_timestamp": datetime.now().isoformat(),
667
+ "errors": [str(e)]
668
+ }
669
 
670
  def _generate_report(self, results: Dict[str, Any], dataset_path: str):
671
  """Generate a comprehensive analysis report"""