Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing_2

Configuration error

App Files Files Community

jzou19950715 commited on Jan 24, 2025

Commit

ca192a7

verified ·

1 Parent(s): 47e9852

Delete tools.py

Browse files

Files changed (1) hide show

tools.py +0 -404

tools.py DELETED Viewed

@@ -1,404 +0,0 @@
-"""
-Advanced Data Analysis Assistant with Interactive Visualizations
-Integrates smolagents, GPT-4, and interactive Plotly visualizations.
-"""
-import json
-import logging
-import os
-import sys
-import subprocess
-from dataclasses import dataclass, asdict
-from datetime import datetime
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
-from functools import wraps
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-# Auto-install required packages
-def install_missing_packages():
-    required_packages = [
-        'gradio',
-        'pandas',
-        'smolagents',
-        'plotly',
-        'numpy',
-        'scikit-learn',
-        'seaborn',
-        'openpyxl'  # For Excel support
-    ]
-    for package in required_packages:
-        try:
-            __import__(package)
-        except ImportError:
-            logger.info(f"Installing {package}...")
-            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
-install_missing_packages()
-# Now import the installed packages
-import gradio as gr
-import pandas as pd
-import numpy as np
-from smolagents import CodeAgent, LiteLLMModel
-from tools import (
-    create_time_series_plot,
-    create_correlation_heatmap,
-    create_statistical_summary,
-    detect_outliers,
-    validate_dataframe,
-    get_numeric_columns,
-    get_temporal_columns,
-)
-# Custom Exceptions
-class AnalysisError(Exception):
-    """Base exception for analysis errors."""
-    pass
-class DataValidationError(AnalysisError):
-    """Exception for data validation errors."""
-    pass
-class APIKeyError(AnalysisError):
-    """Exception for API key related errors."""
-    pass
-# Constants
-SUPPORTED_FILE_TYPES = [".csv", ".xlsx", ".xls"]
-DEFAULT_MODEL = "gpt-4o-mini"
-HISTORY_FILE = "analysis_history.json"
-MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB
-@dataclass
-class VisualizationConfig:
-    """Configuration for visualizations."""
-    width: int = 800
-    height: int = 500
-    template: str = "plotly_white"
-    show_grid: bool = True
-    interactive: bool = True
-    def to_dict(self) -> Dict[str, Any]:
-        """Convert config to dictionary."""
-        return asdict(self)
-def error_handler(func):
-    """Decorator for handling errors gracefully."""
-    @wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            return func(*args, **kwargs)
-        except AnalysisError as e:
-            logger.error(f"Analysis error: {str(e)}")
-            return f"Analysis error: {str(e)}"
-        except Exception as e:
-            logger.exception("Unexpected error occurred")
-            return f"An unexpected error occurred: {str(e)}"
-    return wrapper
-class AnalysisHistory:
-    """Manages analysis history and persistence."""
-    def __init__(self, history_file: str = HISTORY_FILE):
-        self.history_file = Path(history_file)
-        self.history: List[Dict] = self._load_history()
-    def _load_history(self) -> List[Dict]:
-        """Load history from file with error handling."""
-        if self.history_file.exists():
-            try:
-                with self.history_file.open('r') as f:
-                    return json.load(f)
-            except json.JSONDecodeError as e:
-                logger.error(f"Error loading history file: {e}")
-                return []
-            except Exception as e:
-                logger.exception("Unexpected error loading history")
-                return []
-        return []
-    def _save_history(self) -> None:
-        """Save history to file with error handling."""
-        try:
-            with self.history_file.open('w') as f:
-                json.dump(self.history, f, indent=2)
-        except Exception as e:
-            logger.error(f"Error saving history: {e}")
-    def add_entry(self, query: str, result: str) -> None:
-        """Add new analysis entry to history."""
-        entry = {
-            'timestamp': datetime.now().isoformat(),
-            'query': query,
-            'result': result
-        }
-        self.history.append(entry)
-        self._save_history()
-    def get_recent_analyses(self, limit: int = 5) -> List[Dict]:
-        """Get recent analysis entries."""
-        return sorted(
-            self.history,
-            key=lambda x: x['timestamp'],
-            reverse=True
-        )[:limit]
-    def clear_history(self) -> None:
-        """Clear analysis history."""
-        self.history = []
-        self._save_history()
-class DataPreprocessor:
-    """Handles data preprocessing and validation."""
-    @staticmethod
-    def preprocess_dataframe(df: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, Any]]:
-        """Preprocess the dataframe and return metadata."""
-        if df.empty:
-            raise DataValidationError("DataFrame is empty")
-        # First validate the dataframe
-        is_valid, error_msg = validate_dataframe(df)
-        if not is_valid:
-            raise DataValidationError(error_msg)
-        # Generate metadata
-        metadata = {
-            "original_shape": df.shape,
-            "missing_values": df.isnull().sum().to_dict(),
-            "dtypes": df.dtypes.astype(str).to_dict(),
-            "numeric_columns": get_numeric_columns(df),
-            "categorical_columns": df.select_dtypes(include=['object']).columns.tolist(),
-            "temporal_columns": get_temporal_columns(df),
-            "memory_usage": df.memory_usage(deep=True).sum() / (1024 * 1024)  # MB
-        }
-        # Handle missing values
-        df = df.copy()  # Avoid modifying original
-        df = df.fillna(method='ffill').fillna(method='bfill')
-        return df, metadata
-class DataAnalysisAssistant:
-    """Enhanced data analysis assistant with visualization capabilities."""
-    def __init__(self, api_key: str):
-        if not api_key:
-            raise APIKeyError("API key is required")
-        self.model = LiteLLMModel(
-            model_id=DEFAULT_MODEL,
-            api_key=api_key
-        )
-        self.history = AnalysisHistory()
-        self.viz_config = VisualizationConfig()
-        self.agent = CodeAgent(
-            model=self.model,
-            tools=[
-                create_time_series_plot,
-                create_correlation_heatmap,
-                create_statistical_summary,
-                detect_outliers
-            ],
-            additional_authorized_imports=[
-                'pandas', 'numpy', 'plotly.express', 'plotly.graph_objects',
-                'seaborn', 'scipy', 'statsmodels'
-            ],
-        )
-    @error_handler
-    def analyze(self, df: pd.DataFrame, query: str) -> str:
-        """Perform analysis with interactive visualizations."""
-        df, metadata = DataPreprocessor.preprocess_dataframe(df)
-        context = self._create_analysis_context(df, metadata, query)
-        response = self.agent.run(context, additional_args={"df": df})
-        self.history.add_entry(query, str(response))
-        return self._format_results(response)
-    def _create_analysis_context(self, df: pd.DataFrame, metadata: Dict, query: str) -> str:
-        """Create detailed context for analysis."""
-        tools_description = """
-        Available analysis tools:
-        - create_time_series_plot: Create interactive time series visualizations
-        - create_correlation_heatmap: Generate correlation analysis with heatmap
-        - create_statistical_summary: Compute statistical summaries with visualizations
-        - detect_outliers: Identify and visualize outliers
-        """
-        return f"""
-        Analyze the following data with interactive visualizations.
-        DataFrame Information:
-        - Shape: {metadata['original_shape']}
-        - Numeric columns: {', '.join(metadata['numeric_columns'])}
-        - Categorical columns: {', '.join(metadata['categorical_columns'])}
-        - Temporal columns: {', '.join(metadata['temporal_columns'])}
-        {tools_description}
-        User Query: {query}
-        Guidelines:
-        1. Use the provided analysis tools for visualizations
-        2. Include clear titles and labels
-        3. Handle errors gracefully
-        4. Chain multiple analyses when needed
-        5. Provide insights along with visualizations
-        The DataFrame is available as 'df'.
-        """
-    def _format_results(self, response: str) -> str:
-        """Format analysis results with visualizations."""
-        return f'<div class="analysis-text">{response}</div>'
-@error_handler
-def process_file(file: gr.File) -> Optional[pd.DataFrame]:
-    """Process uploaded file into DataFrame."""
-    if not file:
-        raise DataValidationError("No file provided")
-    file_path = Path(file.name)
-    if file_path.stat().st_size > MAX_FILE_SIZE:
-        raise DataValidationError(f"File size exceeds maximum limit of {MAX_FILE_SIZE/1024/1024}MB")
-    if file_path.suffix not in SUPPORTED_FILE_TYPES:
-        raise DataValidationError(f"Unsupported file type: {file_path.suffix}")
-    try:
-        if file_path.suffix == '.csv':
-            return pd.read_csv(file_path)
-        else:  # .xlsx or .xls
-            return pd.read_excel(file_path)
-    except Exception as e:
-        raise DataValidationError(f"Error reading file: {str(e)}")
-@error_handler
-def analyze_data(
-    file: gr.File,
-    query: str,
-    api_key: str,
-) -> str:
-    """Main analysis function for Gradio interface."""
-    if not api_key:
-        raise APIKeyError("Please provide an API key")
-    if not file:
-        raise DataValidationError("Please upload a data file")
-    df = process_file(file)
-    if df is None:
-        raise DataValidationError("Could not process file")
-    assistant = DataAnalysisAssistant(api_key)
-    return assistant.analyze(df, query)
-def create_interface() -> gr.Blocks:
-    """Create enhanced Gradio interface."""
-    css = """
-    .plot-container {
-        margin: 20px 0;
-        padding: 15px;
-        border: 1px solid #e0e0e0;
-        border-radius: 8px;
-        background: white;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-    }
-    .analysis-text {
-        margin: 20px 0;
-        line-height: 1.6;
-        font-size: 16px;
-    }
-    .error {
-        color: #721c24;
-        background-color: #f8d7da;
-        padding: 10px;
-        margin: 10px 0;
-        border-left: 4px solid #f5c6cb;
-        border-radius: 4px;
-    }
-    """
-    with gr.Blocks(css=css) as interface:
-        gr.Markdown("""
-        # Advanced Data Analysis Assistant
-        Upload your data and get AI-powered analysis with interactive visualizations.
-        **Features:**
-        - Interactive Plotly visualizations
-        - GPT-4 powered analysis
-        - Time series analysis
-        - Statistical insights
-        - Natural language queries
-        **Required:** OpenAI API key
-        """)
-        with gr.Row():
-            with gr.Column():
-                file = gr.File(
-                    label="Upload Data File",
-                    file_types=SUPPORTED_FILE_TYPES
-                )
-                query = gr.Textbox(
-                    label="What would you like to analyze?",
-                    placeholder="e.g., Analyze trends and patterns in the data with interactive visualizations",
-                    lines=3
-                )
-                api_key = gr.Textbox(
-                    label="OpenAI API Key",
-                    placeholder="Your API key",
-                    type="password"
-                )
-                analyze_btn = gr.Button("Analyze")
-            with gr.Column():
-                output = gr.HTML(label="Analysis Results")
-        analyze_btn.click(
-            analyze_data,
-            inputs=[file, query, api_key],
-            outputs=output
-        )
-        gr.Examples(
-            examples=[
-                [None, "Show trends over time with interactive visualizations", None],
-                [None, "Create a comprehensive analysis of relationships between variables", None],
-                [None, "Analyze distributions and statistical patterns", None],
-                [None, "Generate financial metrics and performance indicators", None],
-            ],
-            inputs=[file, query, api_key]
-        )
-    return interface
-if __name__ == "__main__":
-    # Configure logging for production
-    logging.basicConfig(
-        filename='analysis_assistant.log',
-        level=logging.INFO,
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-    )
-    try:
-        interface = create_interface()
-        interface.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            share=True
-        )
-    except Exception as e:
-        logger.exception("Failed to launch interface")
-        raise