Spaces:

Aka18
/

AIDA

Sleeping

App Files Files Community

Aka18 commited on Jul 6, 2025

Commit

014c08f

verified ·

1 Parent(s): 3038e0c

Update data_analysis_agent.py

Browse files

Files changed (1) hide show

data_analysis_agent.py +143 -45

data_analysis_agent.py CHANGED Viewed

@@ -518,55 +518,153 @@ class DataAnalysisAgent:
         return state
     def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
-        """Main method to analyze a dataset"""
-        logger.info(f"Starting analysis of dataset: {dataset_path}")
         try:
-            # Load dataset
-            if dataset_path.endswith('.csv'):
-                df = pd.read_csv(dataset_path)
-            elif dataset_path.endswith(('.xlsx', '.xls')):
-                df = pd.read_excel(dataset_path)
-            elif dataset_path.endswith('.json'):
-                df = pd.read_json(dataset_path)
             else:
-                raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
-            # Initialize state with all required fields
-            initial_state = AnalysisState(
-                dataset=df,
-                dataset_info={},
-                column_analysis={},
-                insights=[],
-                visualizations=[],
-                recommendations=[],
-                current_step="",
-                error_messages=[]
-            )
-            # Run the workflow
-            final_state = self.workflow.invoke(initial_state)
-            # Prepare results
-            results = {
-                "dataset_info": final_state.get("dataset_info", {}),
-                "column_analysis": final_state.get("column_analysis", {}),
-                "insights": final_state.get("insights", []),
-                "visualizations": final_state.get("visualizations", []),
-                "recommendations": final_state.get("recommendations", []),
-                "analysis_timestamp": datetime.now().isoformat(),
-                "errors": final_state.get("error_messages", [])
-            }
-            # Generate summary report
-            self._generate_report(results, dataset_path)
-            logger.info("Analysis completed successfully!")
-            return results
         except Exception as e:
-            logger.error(f"Error in dataset analysis: {str(e)}")
-            return {"error": str(e)}
     def _generate_report(self, results: Dict[str, Any], dataset_path: str):
         """Generate a comprehensive analysis report"""

         return state
     def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
+        """Diagnostic version to identify the exact failure point"""
+    logger.info(f"🔍 Starting analysis of dataset: {dataset_path}")
+    try:
+        # Load dataset
+        logger.info("📁 Loading dataset...")
+        if dataset_path.endswith('.csv'):
+            df = pd.read_csv(dataset_path)
+        elif dataset_path.endswith(('.xlsx', '.xls')):
+            df = pd.read_excel(dataset_path)
+        elif dataset_path.endswith('.json'):
+            df = pd.read_json(dataset_path)
+        else:
+            raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
+        logger.info(f"✅ Dataset loaded: {df.shape}")
+        # Initialize state with all required fields
+        initial_state = AnalysisState(
+            dataset=df,
+            dataset_info={},
+            column_analysis={},
+            insights=[],
+            visualizations=[],
+            recommendations=[],
+            current_step="",
+            error_messages=[]
+        )
+        logger.info("🚀 Starting workflow execution...")
+        # Test each step individually to find the failure point
+        try:
+            logger.info("📊 Step 1: Data profiling...")
+            state = self._profile_dataset(initial_state)
+            logger.info(f"✅ Data profiling completed. Info keys: {list(state.get('dataset_info', {}).keys())}")
+        except Exception as e:
+            logger.error(f"❌ Data profiling failed: {str(e)}")
+            state = initial_state
+            state['error_messages'] = [f"Data profiling failed: {str(e)}"]
         try:
+            logger.info("🔍 Step 2: Column analysis...")
+            state = self._analyze_columns(state)
+            logger.info(f"✅ Column analysis completed. Columns analyzed: {len(state.get('column_analysis', {}))}")
+        except Exception as e:
+            logger.error(f"❌ Column analysis failed: {str(e)}")
+            state['error_messages'].append(f"Column analysis failed: {str(e)}")
+        try:
+            logger.info("💡 Step 3: Generating insights...")
+            state = self._generate_insights(state)
+            insights_count = len(state.get('insights', []))
+            logger.info(f"✅ Insights generation completed. Generated: {insights_count} insights")
+            if insights_count > 0:
+                logger.info(f"First insight: {state['insights'][0][:100]}...")
             else:
+                logger.warning("⚠️ No insights were generated!")
         except Exception as e:
+            logger.error(f"❌ Insights generation failed: {str(e)}")
+            state['error_messages'].append(f"Insights generation failed: {str(e)}")
+            # Add fallback insights
+            state['insights'] = [
+                "Basic dataset analysis completed",
+                f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns",
+                "Manual review recommended for detailed insights"
+            ]
+        try:
+            logger.info("📈 Step 4: Planning visualizations...")
+            state = self._plan_visualizations(state)
+            viz_count = len(state.get('visualizations', []))
+            logger.info(f"✅ Visualization planning completed. Planned: {viz_count} visualizations")
+        except Exception as e:
+            logger.error(f"❌ Visualization planning failed: {str(e)}")
+            state['error_messages'].append(f"Visualization planning failed: {str(e)}")
+        try:
+            logger.info("🎨 Step 5: Creating charts...")
+            state = self._create_charts(state)
+            logger.info("✅ Chart creation completed")
+        except Exception as e:
+            logger.error(f"❌ Chart creation failed: {str(e)}")
+            state['error_messages'].append(f"Chart creation failed: {str(e)}")
+        try:
+            logger.info("🎯 Step 6: Generating recommendations...")
+            state = self._generate_recommendations(state)
+            rec_count = len(state.get('recommendations', []))
+            logger.info(f"✅ Recommendations generation completed. Generated: {rec_count} recommendations")
+            if rec_count > 0:
+                logger.info(f"First recommendation: {state['recommendations'][0][:100]}...")
+            else:
+                logger.warning("⚠️ No recommendations were generated!")
+        except Exception as e:
+            logger.error(f"❌ Recommendations generation failed: {str(e)}")
+            state['error_messages'].append(f"Recommendations generation failed: {str(e)}")
+            # Add fallback recommendations
+            state['recommendations'] = [
+                "Conduct detailed data quality assessment",
+                "Implement data monitoring processes",
+                "Consider advanced analytics for business insights",
+                "Review data collection and validation procedures"
+            ]
+        # Clean up temp file
+        if os.path.exists(dataset_path):
+            try:
+                os.remove(dataset_path)
+                logger.info("🧹 Temporary file cleaned up")
+            except:
+                pass
+        # Prepare results
+        results = {
+            "dataset_info": state.get("dataset_info", {}),
+            "column_analysis": state.get("column_analysis", {}),
+            "insights": state.get("insights", []),
+            "visualizations": state.get("visualizations", []),
+            "recommendations": state.get("recommendations", []),
+            "analysis_timestamp": datetime.now().isoformat(),
+            "errors": state.get("error_messages", [])
+        }
+        # Log final results
+        logger.info(f"🎉 Analysis completed!")
+        logger.info(f"📊 Final results: {len(results['insights'])} insights, {len(results['recommendations'])} recommendations")
+        logger.info(f"❌ Errors encountered: {len(results['errors'])}")
+        for error in results['errors']:
+            logger.error(f"Error details: {error}")
+        return results
+    except Exception as e:
+        logger.error(f"💥 Critical analysis failure: {str(e)}")
+        logger.error(traceback.format_exc())
+        return {
+            "error": str(e),
+            "dataset_info": {},
+            "insights": [f"Analysis failed: {str(e)}"],
+            "recommendations": ["Please check logs and try again"],
+            "visualizations": [],
+            "column_analysis": {},
+            "analysis_timestamp": datetime.now().isoformat(),
+            "errors": [str(e)]
+        }
     def _generate_report(self, results: Dict[str, Any], dataset_path: str):
         """Generate a comprehensive analysis report"""