Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing_2

Configuration error

App Files Files Community

jzou19950715 commited on Jan 24, 2025

Commit

829203b

verified ·

1 Parent(s): cce2b52

Update tools.py

Browse files

Files changed (1) hide show

tools.py +190 -80

tools.py CHANGED Viewed

@@ -4,14 +4,49 @@ Integrates smolagents, GPT-4, and interactive Plotly visualizations.
 """
 import json
 import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Union, Tuple
 from datetime import datetime
 import gradio as gr
 import pandas as pd
 from smolagents import CodeAgent, LiteLLMModel
 from tools import (
@@ -22,13 +57,26 @@ from tools import (
     validate_dataframe,
     get_numeric_columns,
     get_temporal_columns,
-    AnalysisError
 )
 # Constants
 SUPPORTED_FILE_TYPES = [".csv", ".xlsx", ".xls"]
 DEFAULT_MODEL = "gpt-4o-mini"
 HISTORY_FILE = "analysis_history.json"
 @dataclass
 class VisualizationConfig:
@@ -38,48 +86,54 @@ class VisualizationConfig:
     template: str = "plotly_white"
     show_grid: bool = True
     interactive: bool = True
-class DataPreprocessor:
-    """Handles data preprocessing and validation."""
-    @staticmethod
-    def preprocess_dataframe(df: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, Any]]:
-        """Preprocess the dataframe and return metadata."""
-        # First validate the dataframe
-        is_valid, error_msg = validate_dataframe(df)
-        if not is_valid:
-            raise ValueError(error_msg)
-        metadata = {
-            "original_shape": df.shape,
-            "missing_values": df.isnull().sum().to_dict(),
-            "dtypes": df.dtypes.astype(str).to_dict(),
-            "numeric_columns": get_numeric_columns(df),
-            "categorical_columns": df.select_dtypes(include=['object']).columns.tolist(),
-            "temporal_columns": get_temporal_columns(df)
-        }
-        # Handle missing values
-        df = df.fillna(method='ffill').fillna(method='bfill')
-        return df, metadata
 class AnalysisHistory:
     """Manages analysis history and persistence."""
     def __init__(self, history_file: str = HISTORY_FILE):
-        self.history_file = history_file
-        self.history = self._load_history()
     def _load_history(self) -> List[Dict]:
-        if os.path.exists(self.history_file):
             try:
-                with open(self.history_file, 'r') as f:
                     return json.load(f)
-            except:
                 return []
         return []
     def add_entry(self, query: str, result: str) -> None:
         """Add new analysis entry to history."""
         entry = {
@@ -88,10 +142,8 @@ class AnalysisHistory:
             'result': result
         }
         self.history.append(entry)
-        with open(self.history_file, 'w') as f:
-            json.dump(self.history, f)
     def get_recent_analyses(self, limit: int = 5) -> List[Dict]:
         """Get recent analysis entries."""
         return sorted(
@@ -99,16 +151,56 @@ class AnalysisHistory:
             key=lambda x: x['timestamp'],
             reverse=True
         )[:limit]
 class DataAnalysisAssistant:
     """Enhanced data analysis assistant with visualization capabilities."""
     def __init__(self, api_key: str):
         self.model = LiteLLMModel(
             model_id=DEFAULT_MODEL,
             api_key=api_key
         )
         self.history = AnalysisHistory()
         self.agent = CodeAgent(
             model=self.model,
@@ -123,18 +215,16 @@ class DataAnalysisAssistant:
                 'seaborn', 'scipy', 'statsmodels'
             ],
         )
     def analyze(self, df: pd.DataFrame, query: str) -> str:
         """Perform analysis with interactive visualizations."""
-        try:
-            df, metadata = DataPreprocessor.preprocess_dataframe(df)
-            context = self._create_analysis_context(df, metadata, query)
-            response = self.agent.run(context, additional_args={"df": df})
-            self.history.add_entry(query, str(response))
-            return self._format_results(response)
-        except Exception as e:
-            return f"Analysis failed: {str(e)}"
     def _create_analysis_context(self, df: pd.DataFrame, metadata: Dict, query: str) -> str:
         """Create detailed context for analysis."""
         tools_description = """
@@ -172,22 +262,28 @@ class DataAnalysisAssistant:
         """Format analysis results with visualizations."""
         return f'<div class="analysis-text">{response}</div>'
 def process_file(file: gr.File) -> Optional[pd.DataFrame]:
     """Process uploaded file into DataFrame."""
     if not file:
-        return None
     try:
-        file_path = Path(file.name)
         if file_path.suffix == '.csv':
             return pd.read_csv(file_path)
-        elif file_path.suffix in ('.xlsx', '.xls'):
             return pd.read_excel(file_path)
-        else:
-            raise ValueError(f"Unsupported file type: {file_path.suffix}")
     except Exception as e:
-        raise RuntimeError(f"Error reading file: {str(e)}")
 def analyze_data(
     file: gr.File,
     query: str,
@@ -195,23 +291,19 @@ def analyze_data(
 ) -> str:
     """Main analysis function for Gradio interface."""
     if not api_key:
-        return "Error: Please provide an API key"
     if not file:
-        return "Error: Please upload a data file"
-    try:
-        df = process_file(file)
-        if df is None:
-            return "Error: Could not process file"
-        assistant = DataAnalysisAssistant(api_key)
-        return assistant.analyze(df, query)
-    except Exception as e:
-        return f"Error: {str(e)}"
-def create_interface():
     """Create enhanced Gradio interface."""
     css = """
     .plot-container {
@@ -225,12 +317,15 @@ def create_interface():
     .analysis-text {
         margin: 20px 0;
         line-height: 1.6;
     }
     .error {
-        color: red;
         padding: 10px;
         margin: 10px 0;
-        border-left: 4px solid red;
     }
     """
@@ -242,7 +337,7 @@ def create_interface():
         **Features:**
         - Interactive Plotly visualizations
-        - gpt-4o-mini powered analysis
         - Time series analysis
         - Statistical insights
         - Natural language queries
@@ -279,16 +374,31 @@ def create_interface():
         gr.Examples(
             examples=[
-                [None, "Show trends over time with interactive visualizations"],
-                [None, "Create a comprehensive analysis of relationships between variables"],
-                [None, "Analyze distributions and statistical patterns"],
-                [None, "Generate financial metrics and performance indicators"],
             ],
-            inputs=[file, query]
         )
     return interface
 if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch()

 """
 import json
+import logging
 import os
+import sys
+import subprocess
+from dataclasses import dataclass, asdict
 from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+from functools import wraps
+# Set up logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Auto-install required packages
+def install_missing_packages():
+    required_packages = [
+        'gradio',
+        'pandas',
+        'smolagents',
+        'plotly',
+        'numpy',
+        'scikit-learn',
+        'seaborn',
+        'openpyxl'  # For Excel support
+    ]
+    for package in required_packages:
+        try:
+            __import__(package)
+        except ImportError:
+            logger.info(f"Installing {package}...")
+            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
+install_missing_packages()
+# Now import the installed packages
 import gradio as gr
 import pandas as pd
+import numpy as np
 from smolagents import CodeAgent, LiteLLMModel
 from tools import (
     validate_dataframe,
     get_numeric_columns,
     get_temporal_columns,
 )
+# Custom Exceptions
+class AnalysisError(Exception):
+    """Base exception for analysis errors."""
+    pass
+class DataValidationError(AnalysisError):
+    """Exception for data validation errors."""
+    pass
+class APIKeyError(AnalysisError):
+    """Exception for API key related errors."""
+    pass
 # Constants
 SUPPORTED_FILE_TYPES = [".csv", ".xlsx", ".xls"]
 DEFAULT_MODEL = "gpt-4o-mini"
 HISTORY_FILE = "analysis_history.json"
+MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB
 @dataclass
 class VisualizationConfig:
     template: str = "plotly_white"
     show_grid: bool = True
     interactive: bool = True
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert config to dictionary."""
+        return asdict(self)
+def error_handler(func):
+    """Decorator for handling errors gracefully."""
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            return func(*args, **kwargs)
+        except AnalysisError as e:
+            logger.error(f"Analysis error: {str(e)}")
+            return f"Analysis error: {str(e)}"
+        except Exception as e:
+            logger.exception("Unexpected error occurred")
+            return f"An unexpected error occurred: {str(e)}"
+    return wrapper
 class AnalysisHistory:
     """Manages analysis history and persistence."""
     def __init__(self, history_file: str = HISTORY_FILE):
+        self.history_file = Path(history_file)
+        self.history: List[Dict] = self._load_history()
     def _load_history(self) -> List[Dict]:
+        """Load history from file with error handling."""
+        if self.history_file.exists():
             try:
+                with self.history_file.open('r') as f:
                     return json.load(f)
+            except json.JSONDecodeError as e:
+                logger.error(f"Error loading history file: {e}")
+                return []
+            except Exception as e:
+                logger.exception("Unexpected error loading history")
                 return []
         return []
+    def _save_history(self) -> None:
+        """Save history to file with error handling."""
+        try:
+            with self.history_file.open('w') as f:
+                json.dump(self.history, f, indent=2)
+        except Exception as e:
+            logger.error(f"Error saving history: {e}")
     def add_entry(self, query: str, result: str) -> None:
         """Add new analysis entry to history."""
         entry = {
             'result': result
         }
         self.history.append(entry)
+        self._save_history()
     def get_recent_analyses(self, limit: int = 5) -> List[Dict]:
         """Get recent analysis entries."""
         return sorted(
             key=lambda x: x['timestamp'],
             reverse=True
         )[:limit]
+    def clear_history(self) -> None:
+        """Clear analysis history."""
+        self.history = []
+        self._save_history()
+class DataPreprocessor:
+    """Handles data preprocessing and validation."""
+    @staticmethod
+    def preprocess_dataframe(df: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, Any]]:
+        """Preprocess the dataframe and return metadata."""
+        if df.empty:
+            raise DataValidationError("DataFrame is empty")
+        # First validate the dataframe
+        is_valid, error_msg = validate_dataframe(df)
+        if not is_valid:
+            raise DataValidationError(error_msg)
+        # Generate metadata
+        metadata = {
+            "original_shape": df.shape,
+            "missing_values": df.isnull().sum().to_dict(),
+            "dtypes": df.dtypes.astype(str).to_dict(),
+            "numeric_columns": get_numeric_columns(df),
+            "categorical_columns": df.select_dtypes(include=['object']).columns.tolist(),
+            "temporal_columns": get_temporal_columns(df),
+            "memory_usage": df.memory_usage(deep=True).sum() / (1024 * 1024)  # MB
+        }
+        # Handle missing values
+        df = df.copy()  # Avoid modifying original
+        df = df.fillna(method='ffill').fillna(method='bfill')
+        return df, metadata
 class DataAnalysisAssistant:
     """Enhanced data analysis assistant with visualization capabilities."""
     def __init__(self, api_key: str):
+        if not api_key:
+            raise APIKeyError("API key is required")
         self.model = LiteLLMModel(
             model_id=DEFAULT_MODEL,
             api_key=api_key
         )
         self.history = AnalysisHistory()
+        self.viz_config = VisualizationConfig()
         self.agent = CodeAgent(
             model=self.model,
                 'seaborn', 'scipy', 'statsmodels'
             ],
         )
+    @error_handler
     def analyze(self, df: pd.DataFrame, query: str) -> str:
         """Perform analysis with interactive visualizations."""
+        df, metadata = DataPreprocessor.preprocess_dataframe(df)
+        context = self._create_analysis_context(df, metadata, query)
+        response = self.agent.run(context, additional_args={"df": df})
+        self.history.add_entry(query, str(response))
+        return self._format_results(response)
     def _create_analysis_context(self, df: pd.DataFrame, metadata: Dict, query: str) -> str:
         """Create detailed context for analysis."""
         tools_description = """
         """Format analysis results with visualizations."""
         return f'<div class="analysis-text">{response}</div>'
+@error_handler
 def process_file(file: gr.File) -> Optional[pd.DataFrame]:
     """Process uploaded file into DataFrame."""
     if not file:
+        raise DataValidationError("No file provided")
+    file_path = Path(file.name)
+    if file_path.stat().st_size > MAX_FILE_SIZE:
+        raise DataValidationError(f"File size exceeds maximum limit of {MAX_FILE_SIZE/1024/1024}MB")
+    if file_path.suffix not in SUPPORTED_FILE_TYPES:
+        raise DataValidationError(f"Unsupported file type: {file_path.suffix}")
     try:
         if file_path.suffix == '.csv':
             return pd.read_csv(file_path)
+        else:  # .xlsx or .xls
             return pd.read_excel(file_path)
     except Exception as e:
+        raise DataValidationError(f"Error reading file: {str(e)}")
+@error_handler
 def analyze_data(
     file: gr.File,
     query: str,
 ) -> str:
     """Main analysis function for Gradio interface."""
     if not api_key:
+        raise APIKeyError("Please provide an API key")
     if not file:
+        raise DataValidationError("Please upload a data file")
+    df = process_file(file)
+    if df is None:
+        raise DataValidationError("Could not process file")
+    assistant = DataAnalysisAssistant(api_key)
+    return assistant.analyze(df, query)
+def create_interface() -> gr.Blocks:
     """Create enhanced Gradio interface."""
     css = """
     .plot-container {
     .analysis-text {
         margin: 20px 0;
         line-height: 1.6;
+        font-size: 16px;
     }
     .error {
+        color: #721c24;
+        background-color: #f8d7da;
         padding: 10px;
         margin: 10px 0;
+        border-left: 4px solid #f5c6cb;
+        border-radius: 4px;
     }
     """
         **Features:**
         - Interactive Plotly visualizations
+        - GPT-4 powered analysis
         - Time series analysis
         - Statistical insights
         - Natural language queries
         gr.Examples(
             examples=[
+                [None, "Show trends over time with interactive visualizations", None],
+                [None, "Create a comprehensive analysis of relationships between variables", None],
+                [None, "Analyze distributions and statistical patterns", None],
+                [None, "Generate financial metrics and performance indicators", None],
             ],
+            inputs=[file, query, api_key]
         )
     return interface
 if __name__ == "__main__":
+    # Configure logging for production
+    logging.basicConfig(
+        filename='analysis_assistant.log',
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    try:
+        interface = create_interface()
+        interface.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=True
+        )
+    except Exception as e:
+        logger.exception("Failed to launch interface")
+        raise