Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing_2

Configuration error

App Files Files Community

jzou19950715 commited on Jan 24, 2025

Commit

356d9f3

verified ·

1 Parent(s): 54d2c65

Update app.py

Browse files

Files changed (1) hide show

app.py +176 -279

app.py CHANGED Viewed

@@ -1,283 +1,207 @@
 import base64
 import io
 import os
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, List, Optional, Union
 import gradio as gr
-import matplotlib.pyplot as plt
-import numpy as np
 import pandas as pd
-import seaborn as sns
 import plotly.express as px
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
-from litellm import completion
-class CodeEnvironment:
-    """Safe environment for executing code with data analysis capabilities"""
-    def __init__(self):
-        # Initialize libraries in globals
-        self.globals = {
-            'pd': pd,
-            'np': np,
-            'plt': plt,
-            'sns': sns,
-            'px': px,
-            'go': go,
-            'make_subplots': make_subplots
-        }
-        self.locals = {}
-    def execute(self, code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
-        """Execute code and capture both static and interactive outputs"""
-        if df is not None:
-            self.globals['df'] = df
-        # Capture output
-        output_buffer = io.StringIO()
-        # Redirect stdout to capture print statements
-        import sys
-        sys.stdout = output_buffer
-        result = {
-            'output': '',
-            'figures': [],      # For matplotlib figures
-            'plotly_html': [],  # For Plotly figures
-            'error': None
-        }
-        try:
-            # Execute code
-            exec(code, self.globals, self.locals)
-            # Capture matplotlib figures
-            for i in plt.get_fignums():
-                fig = plt.figure(i)
-                buf = io.BytesIO()
-                fig.savefig(buf, format='png')
-                buf.seek(0)
-                img_str = base64.b64encode(buf.read()).decode()
-                result['figures'].append(f"data:image/png;base64,{img_str}")
-                plt.close(fig)
-            # Capture Plotly figures
-            if 'fig' in self.locals:
-                if isinstance(self.locals['fig'], (go.Figure, px.Figure)):
-                    # Convert Plotly figure to HTML
-                    html = self.locals['fig'].to_html(
-                        include_plotlyjs=True,
-                        full_html=False,
-                        config={'displayModeBar': True}
-                    )
-                    result['plotly_html'].append(html)
-            # Get printed output
-            result['output'] = output_buffer.getvalue()
-        except Exception as e:
-            result['error'] = str(e)
-        finally:
-            # Reset stdout
-            sys.stdout = sys.__stdout__
-            output_buffer.close()
-        return result
-@dataclass
-class Tool:
-    """Tool for data analysis"""
-    name: str
-    description: str
-    func: Callable
-class AnalysisAgent:
-    """Agent that can analyze data and execute code"""
-    def __init__(
-        self,
-        model_id: str = "gpt-4o-mini",
-        temperature: float = 0.7,
-    ):
-        self.model_id = model_id
-        self.temperature = temperature
-        self.tools: List[Tool] = []
-        self.code_env = CodeEnvironment()
-    def run(self, prompt: str, df: pd.DataFrame = None) -> str:
-        """Run analysis with code execution"""
-        messages = [
-            {"role": "system", "content": self._get_system_prompt()},
-            {"role": "user", "content": prompt}
-        ]
-        try:
-            # Get response from model
-            response = completion(
-                model=self.model_id,
-                messages=messages,
-                temperature=self.temperature,
-            )
-            analysis = response.choices[0].message.content
-            # Extract code blocks
-            code_blocks = self._extract_code(analysis)
-            # Execute code and capture results
-            results = []
-            for code in code_blocks:
-                result = self.code_env.execute(code, df)
-                if result['error']:
-                    results.append(f"Error executing code: {result['error']}")
-                else:
-                    # Add output text
-                    if result['output']:
-                        results.append(result['output'])
-                    # Add Plotly interactive visualizations
-                    for html in result['plotly_html']:
-                        results.append(f'<div class="plot-container">{html}</div>')
-                    # Add static matplotlib figures as fallback
-                    for fig in result['figures']:
-                        results.append(f'<img src="{fig}" style="max-width: 100%; height: auto;">')
-            # Combine analysis and results
-            return f'<div class="analysis-text">{analysis}</div>' + "\n\n" + "\n".join(results)
-        except Exception as e:
-            return f"Error: {str(e)}"
-    def _get_system_prompt(self) -> str:
-        """Get system prompt with tools and capabilities"""
-        tools_desc = "\n".join([
-            f"- {tool.name}: {tool.description}"
-            for tool in self.tools
-        ])
-        return """You are a data analysis assistant with interactive visualization capabilities.
-When analyzing data, use Plotly for interactive visualizations. Here are examples:
-```python
-# Create interactive scatter plot
-import plotly.express as px
-fig = px.scatter(df, x='Date', y='Salary', color='Title')
-fig.show()  # This will be captured and displayed
-# Create interactive box plot
-fig = px.box(df, x='Title', y='Salary')
-fig.show()
-# Create interactive time series
-fig = px.line(df, x='Date', y='Salary', color='Title')
-fig.show()
-```
-Remember to:
-1. Always store Plotly figures in a variable named 'fig'
-2. Use fig.show() to display the plot
-3. Create clear labels and titles
-4. Include hover information
-5. Use colors effectively
-For static visualizations, you can still use matplotlib:
-```python
-import matplotlib.pyplot as plt
-plt.figure(figsize=(10, 6))
-plt.plot(df['Date'], df['Salary'])
-plt.show()
-```
-"""
-    @staticmethod
-    def _extract_code(text: str) -> List[str]:
-        """Extract Python code blocks from markdown"""
-        import re
-        pattern = r'```python\n(.*?)```'
-        return re.findall(pattern, text, re.DOTALL)
 def process_file(file: gr.File) -> Optional[pd.DataFrame]:
-    """Process uploaded file into DataFrame"""
     if not file:
         return None
     try:
-        if file.name.endswith('.csv'):
-            return pd.read_csv(file.name)
-        elif file.name.endswith(('.xlsx', '.xls')):
-            return pd.read_excel(file.name)
     except Exception as e:
-        print(f"Error reading file: {str(e)}")
-    return None
-def analyze_data(
-    file: gr.File,
-    query: str,
-    api_key: str,
-    temperature: float = 0.7,
-) -> str:
-    """Process user request and generate enhanced analysis"""
     if not api_key:
-        return "Error: Please provide an API key."
     if not file:
-        return "Error: Please upload a file."
     try:
-        # Set up environment
-        os.environ["OPENAI_API_KEY"] = api_key
-        # Create agent
-        agent = AnalysisAgent(
-            model_id="gpt-4o-mini",
-            temperature=temperature
-        )
-        # Process file
         df = process_file(file)
         if df is None:
-            return "Error: Could not process file."
-        # Build context
-        file_info = f"""
-        File: {file.name}
-        Shape: {df.shape}
-        Columns: {', '.join(df.columns)}
-        Column Types:
-        {chr(10).join([f'- {col}: {dtype}' for col, dtype in df.dtypes.items()])}
-        """
-        # Run analysis
-        prompt = f"""
-        {file_info}
-        The data is loaded in a pandas DataFrame called 'df'.
-        User request: {query}
-        Please analyze the data and provide:
-        1. Key insights and findings
-        2. Interactive visualizations where appropriate
-        3. Statistical summaries when relevant
-        4. Clear explanations of patterns and trends
-        """
-        return agent.run(prompt, df=df)
     except Exception as e:
-        return f"Error occurred: {str(e)}"
 def create_interface():
-    """Create enhanced Gradio interface"""
     css = """
     .plot-container {
         margin: 20px 0;
@@ -285,70 +209,43 @@ def create_interface():
         border: 1px solid #e0e0e0;
         border-radius: 8px;
         background: white;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
     }
     """
-    with gr.Blocks(title="AI Data Analysis Assistant", css=css) as interface:
         gr.Markdown("""
-        # AI Data Analysis Assistant
-        Upload your data file and get AI-powered analysis with interactive visualizations.
-        **Features:**
-        - Interactive data visualization
-        - Statistical analysis
-        - Machine learning capabilities
-        - Natural language interaction
-        **Note**: Requires your own OpenAI API key.
         """)
         with gr.Row():
             with gr.Column():
                 file = gr.File(
                     label="Upload Data File",
-                    file_types=[".csv", ".xlsx", ".xls"]
                 )
                 query = gr.Textbox(
                     label="What would you like to analyze?",
-                    placeholder="e.g., Create interactive visualizations showing relationships between variables",
                     lines=3
                 )
                 api_key = gr.Textbox(
-                    label="API Key (Required)",
-                    placeholder="Your API key",
                     type="password"
                 )
-                temperature = gr.Slider(
-                    label="Temperature",
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=0.7,
-                    step=0.1
-                )
                 analyze_btn = gr.Button("Analyze")
             with gr.Column():
-                output = gr.HTML(label="Output")  # Changed to HTML for interactive plots
         analyze_btn.click(
             analyze_data,
-            inputs=[file, query, api_key, temperature],
             outputs=output
         )
-        gr.Examples(
-            examples=[
-                [None, "Create interactive visualizations showing relationships between variables"],
-                [None, "Show the distribution of values with interactive plots"],
-                [None, "Create an interactive correlation analysis"],
-                [None, "Show trends over time with interactive charts"],
-                [None, "Generate a comprehensive analysis with multiple visualizations"],
-            ],
-            inputs=[file, query]
-        )
     return interface
 if __name__ == "__main__":

+"""
+Enhanced Data Analysis Assistant using smolagents for more powerful analysis capabilities.
+"""
 import base64
 import io
 import os
 from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Union
+from pathlib import Path
 import gradio as gr
 import pandas as pd
+import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
+import matplotlib.pyplot as plt
+import seaborn as sns
+from smolagents import CodeAgent, tool
+# Constants
+SUPPORTED_FILE_TYPES = [".csv", ".xlsx", ".xls"]
+DEFAULT_MODEL = "gpt-4o-mini"
+@tool
+def create_plotly_visualization(df: pd.DataFrame, plot_type: str, x: str, y: str,
+                              color: Optional[str] = None, title: Optional[str] = None) -> str:
+    """Create an interactive Plotly visualization.
+    Args:
+        df: DataFrame to visualize
+        plot_type: Type of plot (scatter, line, bar, box)
+        x: Column for x-axis
+        y: Column for y-axis
+        color: Optional column for color encoding
+        title: Optional plot title
+    Returns:
+        HTML string of the plot
+    """
+    if plot_type == "scatter":
+        fig = px.scatter(df, x=x, y=y, color=color, title=title)
+    elif plot_type == "line":
+        fig = px.line(df, x=x, y=y, color=color, title=title)
+    elif plot_type == "bar":
+        fig = px.bar(df, x=x, y=y, color=color, title=title)
+    elif plot_type == "box":
+        fig = px.box(df, x=x, y=y, color=color, title=title)
+    else:
+        raise ValueError(f"Unsupported plot type: {plot_type}")
+    return fig.to_html(include_plotlyjs=True, full_html=False)
+@tool
+def calculate_statistics(df: pd.DataFrame, columns: List[str]) -> Dict[str, Any]:
+    """Calculate basic statistics for specified columns.
+    Args:
+        df: DataFrame to analyze
+        columns: List of columns to analyze
+    Returns:
+        Dictionary of statistics
+    """
+    stats = {}
+    for col in columns:
+        if pd.api.types.is_numeric_dtype(df[col]):
+            stats[col] = {
+                "mean": df[col].mean(),
+                "median": df[col].median(),
+                "std": df[col].std(),
+                "min": df[col].min(),
+                "max": df[col].max(),
+                "missing": df[col].isna().sum()
+            }
+    return stats
+@tool
+def correlation_analysis(df: pd.DataFrame, threshold: float = 0.5) -> str:
+    """Generate correlation analysis with interactive heatmap.
+    Args:
+        df: DataFrame to analyze
+        threshold: Correlation threshold to highlight
+    Returns:
+        HTML string of the correlation heatmap
+    """
+    numeric_df = df.select_dtypes(include=[np.number])
+    corr = numeric_df.corr()
+    fig = go.Figure(data=go.Heatmap(
+        z=corr,
+        x=corr.columns,
+        y=corr.columns,
+        colorscale='RdBu',
+    ))
+    fig.update_layout(
+        title="Correlation Heatmap",
+        height=600,
+    )
+    return fig.to_html(include_plotlyjs=True, full_html=False)
+class DataAnalysisAssistant:
+    """Enhanced data analysis assistant using smolagents."""
+    def __init__(self, api_key: str, model_id: str = DEFAULT_MODEL):
+        """Initialize the assistant with API key and model."""
+        os.environ["OPENAI_API_KEY"] = api_key
+        self.agent = CodeAgent(
+            tools=[
+                create_plotly_visualization,
+                calculate_statistics,
+                correlation_analysis
+            ],
+            model=model_id,
+            additional_authorized_imports=[
+                "pandas",
+                "numpy",
+                "plotly.express",
+                "plotly.graph_objects",
+                "seaborn",
+            ]
+        )
+    def analyze(self, df: pd.DataFrame, query: str) -> str:
+        """Run analysis using the agent.
+        Args:
+            df: DataFrame to analyze
+            query: User's analysis request
+        Returns:
+            HTML string containing analysis and visualizations
+        """
+        context = f"""
+        Available DataFrame (as 'df'):
+        - Shape: {df.shape}
+        - Columns: {', '.join(df.columns)}
+        - Data Types:
+        {chr(10).join([f'  • {col}: {dtype}' for col, dtype in df.dtypes.items()])}
+        User Query: {query}
+        Please provide:
+        1. Data insights and findings
+        2. Interactive visualizations where appropriate
+        3. Statistical analysis
+        4. Clear explanations
+        You can use these tools:
+        - create_plotly_visualization: Creates interactive Plotly plots
+        - calculate_statistics: Provides statistical summaries
+        - correlation_analysis: Generates correlation heatmaps
+        """
+        try:
+            result = self.agent.run(context, additional_args={"df": df})
+            return str(result)
+        except Exception as e:
+            return f"Analysis failed: {str(e)}"
 def process_file(file: gr.File) -> Optional[pd.DataFrame]:
+    """Process uploaded file into DataFrame."""
     if not file:
         return None
     try:
+        file_path = Path(file.name)
+        if file_path.suffix == '.csv':
+            return pd.read_csv(file_path)
+        elif file_path.suffix in ('.xlsx', '.xls'):
+            return pd.read_excel(file_path)
+        else:
+            raise ValueError(f"Unsupported file type: {file_path.suffix}")
     except Exception as e:
+        raise RuntimeError(f"Error reading file: {str(e)}")
+def analyze_data(file: gr.File, query: str, api_key: str) -> str:
+    """Main analysis function for Gradio interface."""
     if not api_key:
+        return "Error: Please provide an API key"
     if not file:
+        return "Error: Please upload a data file"
     try:
         df = process_file(file)
         if df is None:
+            return "Error: Could not process file"
+        assistant = DataAnalysisAssistant(api_key)
+        return assistant.analyze(df, query)
     except Exception as e:
+        return f"Error: {str(e)}"
 def create_interface():
+    """Create Gradio interface."""
     css = """
     .plot-container {
         margin: 20px 0;
         border: 1px solid #e0e0e0;
         border-radius: 8px;
         background: white;
     }
     """
+    with gr.Blocks(css=css) as interface:
         gr.Markdown("""
+        # Enhanced Data Analysis Assistant
+        Powered by smolagents for more intelligent analysis
         """)
         with gr.Row():
             with gr.Column():
                 file = gr.File(
                     label="Upload Data File",
+                    file_types=SUPPORTED_FILE_TYPES
                 )
                 query = gr.Textbox(
                     label="What would you like to analyze?",
+                    placeholder="e.g., Show relationships between variables with interactive plots",
                     lines=3
                 )
                 api_key = gr.Textbox(
+                    label="API Key",
+                    placeholder="Your OpenAI API key",
                     type="password"
                 )
                 analyze_btn = gr.Button("Analyze")
             with gr.Column():
+                output = gr.HTML(label="Analysis Results")
         analyze_btn.click(
             analyze_data,
+            inputs=[file, query, api_key],
             outputs=output
         )
     return interface
 if __name__ == "__main__":