Update data_analysis_agent.py
Browse files- data_analysis_agent.py +143 -45
data_analysis_agent.py
CHANGED
|
@@ -518,55 +518,153 @@ class DataAnalysisAgent:
|
|
| 518 |
return state
|
| 519 |
|
| 520 |
def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
|
| 521 |
-
"""
|
| 522 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 523 |
|
| 524 |
try:
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
else:
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
# Initialize state with all required fields
|
| 536 |
-
initial_state = AnalysisState(
|
| 537 |
-
dataset=df,
|
| 538 |
-
dataset_info={},
|
| 539 |
-
column_analysis={},
|
| 540 |
-
insights=[],
|
| 541 |
-
visualizations=[],
|
| 542 |
-
recommendations=[],
|
| 543 |
-
current_step="",
|
| 544 |
-
error_messages=[]
|
| 545 |
-
)
|
| 546 |
-
|
| 547 |
-
# Run the workflow
|
| 548 |
-
final_state = self.workflow.invoke(initial_state)
|
| 549 |
-
|
| 550 |
-
# Prepare results
|
| 551 |
-
results = {
|
| 552 |
-
"dataset_info": final_state.get("dataset_info", {}),
|
| 553 |
-
"column_analysis": final_state.get("column_analysis", {}),
|
| 554 |
-
"insights": final_state.get("insights", []),
|
| 555 |
-
"visualizations": final_state.get("visualizations", []),
|
| 556 |
-
"recommendations": final_state.get("recommendations", []),
|
| 557 |
-
"analysis_timestamp": datetime.now().isoformat(),
|
| 558 |
-
"errors": final_state.get("error_messages", [])
|
| 559 |
-
}
|
| 560 |
-
|
| 561 |
-
# Generate summary report
|
| 562 |
-
self._generate_report(results, dataset_path)
|
| 563 |
-
|
| 564 |
-
logger.info("Analysis completed successfully!")
|
| 565 |
-
return results
|
| 566 |
-
|
| 567 |
except Exception as e:
|
| 568 |
-
logger.error(f"
|
| 569 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
|
| 571 |
def _generate_report(self, results: Dict[str, Any], dataset_path: str):
|
| 572 |
"""Generate a comprehensive analysis report"""
|
|
|
|
| 518 |
return state
|
| 519 |
|
| 520 |
def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
|
| 521 |
+
"""Diagnostic version to identify the exact failure point"""
|
| 522 |
+
logger.info(f"π Starting analysis of dataset: {dataset_path}")
|
| 523 |
+
|
| 524 |
+
try:
|
| 525 |
+
# Load dataset
|
| 526 |
+
logger.info("π Loading dataset...")
|
| 527 |
+
if dataset_path.endswith('.csv'):
|
| 528 |
+
df = pd.read_csv(dataset_path)
|
| 529 |
+
elif dataset_path.endswith(('.xlsx', '.xls')):
|
| 530 |
+
df = pd.read_excel(dataset_path)
|
| 531 |
+
elif dataset_path.endswith('.json'):
|
| 532 |
+
df = pd.read_json(dataset_path)
|
| 533 |
+
else:
|
| 534 |
+
raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
|
| 535 |
+
|
| 536 |
+
logger.info(f"β
Dataset loaded: {df.shape}")
|
| 537 |
+
|
| 538 |
+
# Initialize state with all required fields
|
| 539 |
+
initial_state = AnalysisState(
|
| 540 |
+
dataset=df,
|
| 541 |
+
dataset_info={},
|
| 542 |
+
column_analysis={},
|
| 543 |
+
insights=[],
|
| 544 |
+
visualizations=[],
|
| 545 |
+
recommendations=[],
|
| 546 |
+
current_step="",
|
| 547 |
+
error_messages=[]
|
| 548 |
+
)
|
| 549 |
+
|
| 550 |
+
logger.info("π Starting workflow execution...")
|
| 551 |
+
|
| 552 |
+
# Test each step individually to find the failure point
|
| 553 |
+
try:
|
| 554 |
+
logger.info("π Step 1: Data profiling...")
|
| 555 |
+
state = self._profile_dataset(initial_state)
|
| 556 |
+
logger.info(f"β
Data profiling completed. Info keys: {list(state.get('dataset_info', {}).keys())}")
|
| 557 |
+
except Exception as e:
|
| 558 |
+
logger.error(f"β Data profiling failed: {str(e)}")
|
| 559 |
+
state = initial_state
|
| 560 |
+
state['error_messages'] = [f"Data profiling failed: {str(e)}"]
|
| 561 |
|
| 562 |
try:
|
| 563 |
+
logger.info("π Step 2: Column analysis...")
|
| 564 |
+
state = self._analyze_columns(state)
|
| 565 |
+
logger.info(f"β
Column analysis completed. Columns analyzed: {len(state.get('column_analysis', {}))}")
|
| 566 |
+
except Exception as e:
|
| 567 |
+
logger.error(f"β Column analysis failed: {str(e)}")
|
| 568 |
+
state['error_messages'].append(f"Column analysis failed: {str(e)}")
|
| 569 |
+
|
| 570 |
+
try:
|
| 571 |
+
logger.info("π‘ Step 3: Generating insights...")
|
| 572 |
+
state = self._generate_insights(state)
|
| 573 |
+
insights_count = len(state.get('insights', []))
|
| 574 |
+
logger.info(f"β
Insights generation completed. Generated: {insights_count} insights")
|
| 575 |
+
if insights_count > 0:
|
| 576 |
+
logger.info(f"First insight: {state['insights'][0][:100]}...")
|
| 577 |
else:
|
| 578 |
+
logger.warning("β οΈ No insights were generated!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 579 |
except Exception as e:
|
| 580 |
+
logger.error(f"β Insights generation failed: {str(e)}")
|
| 581 |
+
state['error_messages'].append(f"Insights generation failed: {str(e)}")
|
| 582 |
+
# Add fallback insights
|
| 583 |
+
state['insights'] = [
|
| 584 |
+
"Basic dataset analysis completed",
|
| 585 |
+
f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns",
|
| 586 |
+
"Manual review recommended for detailed insights"
|
| 587 |
+
]
|
| 588 |
+
|
| 589 |
+
try:
|
| 590 |
+
logger.info("π Step 4: Planning visualizations...")
|
| 591 |
+
state = self._plan_visualizations(state)
|
| 592 |
+
viz_count = len(state.get('visualizations', []))
|
| 593 |
+
logger.info(f"β
Visualization planning completed. Planned: {viz_count} visualizations")
|
| 594 |
+
except Exception as e:
|
| 595 |
+
logger.error(f"β Visualization planning failed: {str(e)}")
|
| 596 |
+
state['error_messages'].append(f"Visualization planning failed: {str(e)}")
|
| 597 |
+
|
| 598 |
+
try:
|
| 599 |
+
logger.info("π¨ Step 5: Creating charts...")
|
| 600 |
+
state = self._create_charts(state)
|
| 601 |
+
logger.info("β
Chart creation completed")
|
| 602 |
+
except Exception as e:
|
| 603 |
+
logger.error(f"β Chart creation failed: {str(e)}")
|
| 604 |
+
state['error_messages'].append(f"Chart creation failed: {str(e)}")
|
| 605 |
+
|
| 606 |
+
try:
|
| 607 |
+
logger.info("π― Step 6: Generating recommendations...")
|
| 608 |
+
state = self._generate_recommendations(state)
|
| 609 |
+
rec_count = len(state.get('recommendations', []))
|
| 610 |
+
logger.info(f"β
Recommendations generation completed. Generated: {rec_count} recommendations")
|
| 611 |
+
if rec_count > 0:
|
| 612 |
+
logger.info(f"First recommendation: {state['recommendations'][0][:100]}...")
|
| 613 |
+
else:
|
| 614 |
+
logger.warning("β οΈ No recommendations were generated!")
|
| 615 |
+
except Exception as e:
|
| 616 |
+
logger.error(f"β Recommendations generation failed: {str(e)}")
|
| 617 |
+
state['error_messages'].append(f"Recommendations generation failed: {str(e)}")
|
| 618 |
+
# Add fallback recommendations
|
| 619 |
+
state['recommendations'] = [
|
| 620 |
+
"Conduct detailed data quality assessment",
|
| 621 |
+
"Implement data monitoring processes",
|
| 622 |
+
"Consider advanced analytics for business insights",
|
| 623 |
+
"Review data collection and validation procedures"
|
| 624 |
+
]
|
| 625 |
+
|
| 626 |
+
# Clean up temp file
|
| 627 |
+
if os.path.exists(dataset_path):
|
| 628 |
+
try:
|
| 629 |
+
os.remove(dataset_path)
|
| 630 |
+
logger.info("π§Ή Temporary file cleaned up")
|
| 631 |
+
except:
|
| 632 |
+
pass
|
| 633 |
+
|
| 634 |
+
# Prepare results
|
| 635 |
+
results = {
|
| 636 |
+
"dataset_info": state.get("dataset_info", {}),
|
| 637 |
+
"column_analysis": state.get("column_analysis", {}),
|
| 638 |
+
"insights": state.get("insights", []),
|
| 639 |
+
"visualizations": state.get("visualizations", []),
|
| 640 |
+
"recommendations": state.get("recommendations", []),
|
| 641 |
+
"analysis_timestamp": datetime.now().isoformat(),
|
| 642 |
+
"errors": state.get("error_messages", [])
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
# Log final results
|
| 646 |
+
logger.info(f"π Analysis completed!")
|
| 647 |
+
logger.info(f"π Final results: {len(results['insights'])} insights, {len(results['recommendations'])} recommendations")
|
| 648 |
+
logger.info(f"β Errors encountered: {len(results['errors'])}")
|
| 649 |
+
|
| 650 |
+
for error in results['errors']:
|
| 651 |
+
logger.error(f"Error details: {error}")
|
| 652 |
+
|
| 653 |
+
return results
|
| 654 |
+
|
| 655 |
+
except Exception as e:
|
| 656 |
+
logger.error(f"π₯ Critical analysis failure: {str(e)}")
|
| 657 |
+
logger.error(traceback.format_exc())
|
| 658 |
+
return {
|
| 659 |
+
"error": str(e),
|
| 660 |
+
"dataset_info": {},
|
| 661 |
+
"insights": [f"Analysis failed: {str(e)}"],
|
| 662 |
+
"recommendations": ["Please check logs and try again"],
|
| 663 |
+
"visualizations": [],
|
| 664 |
+
"column_analysis": {},
|
| 665 |
+
"analysis_timestamp": datetime.now().isoformat(),
|
| 666 |
+
"errors": [str(e)]
|
| 667 |
+
}
|
| 668 |
|
| 669 |
def _generate_report(self, results: Dict[str, Any], dataset_path: str):
|
| 670 |
"""Generate a comprehensive analysis report"""
|