Spaces:

Aka18
/

AIDA

Sleeping

App Files Files Community

Aka18 commited on Jul 6, 2025

Commit

73ebf19

verified ·

1 Parent(s): 014c08f

Update data_analysis_agent.py

Browse files

Files changed (1) hide show

data_analysis_agent.py +148 -147

data_analysis_agent.py CHANGED Viewed

@@ -7,6 +7,7 @@ import plotly.express as px
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import warnings
 warnings.filterwarnings('ignore')
 from typing import Dict, List, Any, Optional, TypedDict
@@ -19,7 +20,6 @@ from langgraph.graph import StateGraph, END
 from langchain_groq import ChatGroq
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.prompts import ChatPromptTemplate
-os.environ['GROQ_TIMEOUT'] = '60'
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -39,12 +39,13 @@ class AnalysisState(TypedDict):
 class DataAnalysisAgent:
     def __init__(self, groq_api_key: str, model_name: str = "llama3-70b-8192"):
         """Initialize the Data Analysis Agent"""
-        # Fixed: Use correct model name format
         self.llm = ChatGroq(
             groq_api_key=groq_api_key,
-            model_name=model_name,  # Fixed: Use standard model names
             temperature=0.1,
-            max_tokens=2000
         )
         # Set up the analysis workflow graph
@@ -518,153 +519,153 @@ class DataAnalysisAgent:
         return state
     def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
-        """Diagnostic version to identify the exact failure point"""
-    logger.info(f"🔍 Starting analysis of dataset: {dataset_path}")
-    try:
-        # Load dataset
-        logger.info("📁 Loading dataset...")
-        if dataset_path.endswith('.csv'):
-            df = pd.read_csv(dataset_path)
-        elif dataset_path.endswith(('.xlsx', '.xls')):
-            df = pd.read_excel(dataset_path)
-        elif dataset_path.endswith('.json'):
-            df = pd.read_json(dataset_path)
-        else:
-            raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
-        logger.info(f"✅ Dataset loaded: {df.shape}")
-        # Initialize state with all required fields
-        initial_state = AnalysisState(
-            dataset=df,
-            dataset_info={},
-            column_analysis={},
-            insights=[],
-            visualizations=[],
-            recommendations=[],
-            current_step="",
-            error_messages=[]
-        )
-        logger.info("🚀 Starting workflow execution...")
-        # Test each step individually to find the failure point
         try:
-            logger.info("📊 Step 1: Data profiling...")
-            state = self._profile_dataset(initial_state)
-            logger.info(f"✅ Data profiling completed. Info keys: {list(state.get('dataset_info', {}).keys())}")
-        except Exception as e:
-            logger.error(f"❌ Data profiling failed: {str(e)}")
-            state = initial_state
-            state['error_messages'] = [f"Data profiling failed: {str(e)}"]
-        try:
-            logger.info("🔍 Step 2: Column analysis...")
-            state = self._analyze_columns(state)
-            logger.info(f"✅ Column analysis completed. Columns analyzed: {len(state.get('column_analysis', {}))}")
-        except Exception as e:
-            logger.error(f"❌ Column analysis failed: {str(e)}")
-            state['error_messages'].append(f"Column analysis failed: {str(e)}")
-        try:
-            logger.info("💡 Step 3: Generating insights...")
-            state = self._generate_insights(state)
-            insights_count = len(state.get('insights', []))
-            logger.info(f"✅ Insights generation completed. Generated: {insights_count} insights")
-            if insights_count > 0:
-                logger.info(f"First insight: {state['insights'][0][:100]}...")
             else:
-                logger.warning("⚠️ No insights were generated!")
-        except Exception as e:
-            logger.error(f"❌ Insights generation failed: {str(e)}")
-            state['error_messages'].append(f"Insights generation failed: {str(e)}")
-            # Add fallback insights
-            state['insights'] = [
-                "Basic dataset analysis completed",
-                f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns",
-                "Manual review recommended for detailed insights"
-            ]
-        try:
-            logger.info("📈 Step 4: Planning visualizations...")
-            state = self._plan_visualizations(state)
-            viz_count = len(state.get('visualizations', []))
-            logger.info(f"✅ Visualization planning completed. Planned: {viz_count} visualizations")
-        except Exception as e:
-            logger.error(f"❌ Visualization planning failed: {str(e)}")
-            state['error_messages'].append(f"Visualization planning failed: {str(e)}")
-        try:
-            logger.info("🎨 Step 5: Creating charts...")
-            state = self._create_charts(state)
-            logger.info("✅ Chart creation completed")
-        except Exception as e:
-            logger.error(f"❌ Chart creation failed: {str(e)}")
-            state['error_messages'].append(f"Chart creation failed: {str(e)}")
-        try:
-            logger.info("🎯 Step 6: Generating recommendations...")
-            state = self._generate_recommendations(state)
-            rec_count = len(state.get('recommendations', []))
-            logger.info(f"✅ Recommendations generation completed. Generated: {rec_count} recommendations")
-            if rec_count > 0:
-                logger.info(f"First recommendation: {state['recommendations'][0][:100]}...")
-            else:
-                logger.warning("⚠️ No recommendations were generated!")
-        except Exception as e:
-            logger.error(f"❌ Recommendations generation failed: {str(e)}")
-            state['error_messages'].append(f"Recommendations generation failed: {str(e)}")
-            # Add fallback recommendations
-            state['recommendations'] = [
-                "Conduct detailed data quality assessment",
-                "Implement data monitoring processes",
-                "Consider advanced analytics for business insights",
-                "Review data collection and validation procedures"
-            ]
-        # Clean up temp file
-        if os.path.exists(dataset_path):
             try:
-                os.remove(dataset_path)
-                logger.info("🧹 Temporary file cleaned up")
-            except:
-                pass
-        # Prepare results
-        results = {
-            "dataset_info": state.get("dataset_info", {}),
-            "column_analysis": state.get("column_analysis", {}),
-            "insights": state.get("insights", []),
-            "visualizations": state.get("visualizations", []),
-            "recommendations": state.get("recommendations", []),
-            "analysis_timestamp": datetime.now().isoformat(),
-            "errors": state.get("error_messages", [])
-        }
-        # Log final results
-        logger.info(f"🎉 Analysis completed!")
-        logger.info(f"📊 Final results: {len(results['insights'])} insights, {len(results['recommendations'])} recommendations")
-        logger.info(f"❌ Errors encountered: {len(results['errors'])}")
-        for error in results['errors']:
-            logger.error(f"Error details: {error}")
-        return results
-    except Exception as e:
-        logger.error(f"💥 Critical analysis failure: {str(e)}")
-        logger.error(traceback.format_exc())
-        return {
-            "error": str(e),
-            "dataset_info": {},
-            "insights": [f"Analysis failed: {str(e)}"],
-            "recommendations": ["Please check logs and try again"],
-            "visualizations": [],
-            "column_analysis": {},
-            "analysis_timestamp": datetime.now().isoformat(),
-            "errors": [str(e)]
-        }
     def _generate_report(self, results: Dict[str, Any], dataset_path: str):
         """Generate a comprehensive analysis report"""

 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 import warnings
+import traceback
 warnings.filterwarnings('ignore')
 from typing import Dict, List, Any, Optional, TypedDict
 from langchain_groq import ChatGroq
 from langchain_core.messages import HumanMessage, SystemMessage
 from langchain_core.prompts import ChatPromptTemplate
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 class DataAnalysisAgent:
     def __init__(self, groq_api_key: str, model_name: str = "llama3-70b-8192"):
         """Initialize the Data Analysis Agent"""
+        # Fixed: Use correct model name format and add timeout
         self.llm = ChatGroq(
             groq_api_key=groq_api_key,
+            model_name=model_name,
             temperature=0.1,
+            max_tokens=2000,
+            timeout=60  # Added timeout for container environments
         )
         # Set up the analysis workflow graph
         return state
     def analyze_dataset(self, dataset_path: str) -> Dict[str, Any]:
+        """DIAGNOSTIC VERSION: Main method to analyze a dataset with detailed logging"""
+        logger.info(f"🔍 Starting analysis of dataset: {dataset_path}")
         try:
+            # Load dataset
+            logger.info("📁 Loading dataset...")
+            if dataset_path.endswith('.csv'):
+                df = pd.read_csv(dataset_path)
+            elif dataset_path.endswith(('.xlsx', '.xls')):
+                df = pd.read_excel(dataset_path)
+            elif dataset_path.endswith('.json'):
+                df = pd.read_json(dataset_path)
             else:
+                raise ValueError("Unsupported file format. Use CSV, Excel, or JSON.")
+            logger.info(f"✅ Dataset loaded: {df.shape}")
+            # Initialize state with all required fields
+            initial_state = AnalysisState(
+                dataset=df,
+                dataset_info={},
+                column_analysis={},
+                insights=[],
+                visualizations=[],
+                recommendations=[],
+                current_step="",
+                error_messages=[]
+            )
+            logger.info("🚀 Starting workflow execution...")
+            # Test each step individually to find the failure point
             try:
+                logger.info("📊 Step 1: Data profiling...")
+                state = self._profile_dataset(initial_state)
+                logger.info(f"✅ Data profiling completed. Info keys: {list(state.get('dataset_info', {}).keys())}")
+            except Exception as e:
+                logger.error(f"❌ Data profiling failed: {str(e)}")
+                state = initial_state
+                state['error_messages'] = [f"Data profiling failed: {str(e)}"]
+            try:
+                logger.info("🔍 Step 2: Column analysis...")
+                state = self._analyze_columns(state)
+                logger.info(f"✅ Column analysis completed. Columns analyzed: {len(state.get('column_analysis', {}))}")
+            except Exception as e:
+                logger.error(f"❌ Column analysis failed: {str(e)}")
+                state['error_messages'].append(f"Column analysis failed: {str(e)}")
+            try:
+                logger.info("💡 Step 3: Generating insights...")
+                state = self._generate_insights(state)
+                insights_count = len(state.get('insights', []))
+                logger.info(f"✅ Insights generation completed. Generated: {insights_count} insights")
+                if insights_count > 0:
+                    logger.info(f"First insight: {state['insights'][0][:100]}...")
+                else:
+                    logger.warning("⚠️ No insights were generated!")
+            except Exception as e:
+                logger.error(f"❌ Insights generation failed: {str(e)}")
+                state['error_messages'].append(f"Insights generation failed: {str(e)}")
+                # Add fallback insights
+                state['insights'] = [
+                    "Basic dataset analysis completed",
+                    f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns",
+                    "Manual review recommended for detailed insights"
+                ]
+            try:
+                logger.info("📈 Step 4: Planning visualizations...")
+                state = self._plan_visualizations(state)
+                viz_count = len(state.get('visualizations', []))
+                logger.info(f"✅ Visualization planning completed. Planned: {viz_count} visualizations")
+            except Exception as e:
+                logger.error(f"❌ Visualization planning failed: {str(e)}")
+                state['error_messages'].append(f"Visualization planning failed: {str(e)}")
+            try:
+                logger.info("🎨 Step 5: Creating charts...")
+                state = self._create_charts(state)
+                logger.info("✅ Chart creation completed")
+            except Exception as e:
+                logger.error(f"❌ Chart creation failed: {str(e)}")
+                state['error_messages'].append(f"Chart creation failed: {str(e)}")
+            try:
+                logger.info("🎯 Step 6: Generating recommendations...")
+                state = self._generate_recommendations(state)
+                rec_count = len(state.get('recommendations', []))
+                logger.info(f"✅ Recommendations generation completed. Generated: {rec_count} recommendations")
+                if rec_count > 0:
+                    logger.info(f"First recommendation: {state['recommendations'][0][:100]}...")
+                else:
+                    logger.warning("⚠️ No recommendations were generated!")
+            except Exception as e:
+                logger.error(f"❌ Recommendations generation failed: {str(e)}")
+                state['error_messages'].append(f"Recommendations generation failed: {str(e)}")
+                # Add fallback recommendations
+                state['recommendations'] = [
+                    "Conduct detailed data quality assessment",
+                    "Implement data monitoring processes",
+                    "Consider advanced analytics for business insights",
+                    "Review data collection and validation procedures"
+                ]
+            # Clean up temp file
+            if os.path.exists(dataset_path):
+                try:
+                    os.remove(dataset_path)
+                    logger.info("🧹 Temporary file cleaned up")
+                except:
+                    pass
+            # Prepare results
+            results = {
+                "dataset_info": state.get("dataset_info", {}),
+                "column_analysis": state.get("column_analysis", {}),
+                "insights": state.get("insights", []),
+                "visualizations": state.get("visualizations", []),
+                "recommendations": state.get("recommendations", []),
+                "analysis_timestamp": datetime.now().isoformat(),
+                "errors": state.get("error_messages", [])
+            }
+            # Log final results
+            logger.info(f"🎉 Analysis completed!")
+            logger.info(f"📊 Final results: {len(results['insights'])} insights, {len(results['recommendations'])} recommendations")
+            logger.info(f"❌ Errors encountered: {len(results['errors'])}")
+            for error in results['errors']:
+                logger.error(f"Error details: {error}")
+            return results
+        except Exception as e:
+            logger.error(f"💥 Critical analysis failure: {str(e)}")
+            logger.error(traceback.format_exc())
+            return {
+                "error": str(e),
+                "dataset_info": {},
+                "insights": [f"Analysis failed: {str(e)}"],
+                "recommendations": ["Please check logs and try again"],
+                "visualizations": [],
+                "column_analysis": {},
+                "analysis_timestamp": datetime.now().isoformat(),
+                "errors": [str(e)]
+            }
     def _generate_report(self, results: Dict[str, Any], dataset_path: str):
         """Generate a comprehensive analysis report"""