Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing_2

Configuration error

App Files Files Community

jzou19950715 commited on Jan 24, 2025

Commit

38571e1

verified ·

1 Parent(s): 5e23ce2

Update app.py

Browse files

Files changed (1) hide show

app.py +296 -335

app.py CHANGED Viewed

@@ -1,325 +1,297 @@
-import os
-from typing import List, Optional, Tuple, Dict, Any
 import base64
 import io
 import gradio as gr
-import pandas as pd
 import numpy as np
 import plotly.graph_objects as go
 from litellm import completion
-class DataAnalyzer:
-    """Handles data analysis and visualization"""
     def __init__(self):
-        self.data: Optional[pd.DataFrame] = None
-        self.width = 800
-        self.height = 500
-    def create_histogram(self, column: str, bins: int = 30, title: str = "") -> str:
-        """Create histogram with Plotly"""
-        if self.data is None:
-            raise ValueError("No data loaded")
-        fig = go.Figure()
-        fig.add_trace(go.Histogram(
-            x=self.data[column],
-            nbinsx=bins,
-            name=column
-        ))
-        fig.update_layout(
-            title=title,
-            xaxis_title=column,
-            yaxis_title="Count",
-            width=self.width,
-            height=self.height,
-            template="plotly_white"
-        )
-        return fig.to_html(include_plotlyjs=True, full_html=False)
-    def create_scatter(self, x_col: str, y_col: str, color_col: Optional[str] = None,
-                      title: str = "") -> str:
-        """Create scatter plot with Plotly"""
-        if self.data is None:
-            raise ValueError("No data loaded")
-        fig = go.Figure()
-        if color_col:
-            for category in self.data[color_col].unique():
-                mask = self.data[color_col] == category
-                fig.add_trace(go.Scatter(
-                    x=self.data[mask][x_col],
-                    y=self.data[mask][y_col],
-                    mode='markers',
-                    name=str(category),
-                    text=self.data[mask][color_col]
-                ))
-        else:
-            fig.add_trace(go.Scatter(
-                x=self.data[x_col],
-                y=self.data[y_col],
-                mode='markers'
-            ))
-        fig.update_layout(
-            title=title,
-            xaxis_title=x_col,
-            yaxis_title=y_col,
-            width=self.width,
-            height=self.height,
-            template="plotly_white",
-            hovermode='closest'
-        )
-        return fig.to_html(include_plotlyjs=True, full_html=False)
-    def create_box(self, x_col: str, y_col: str, title: str = "") -> str:
-        """Create box plot with Plotly"""
-        if self.data is None:
-            raise ValueError("No data loaded")
-        fig = go.Figure()
-        for category in self.data[x_col].unique():
-            fig.add_trace(go.Box(
-                y=self.data[self.data[x_col] == category][y_col],
-                name=str(category),
-                boxpoints='all',
-                jitter=0.3,
-                pointpos=-1.8
-            ))
-        fig.update_layout(
-            title=title,
-            yaxis_title=y_col,
-            xaxis_title=x_col,
-            width=self.width,
-            height=self.height,
-            template="plotly_white",
-            showlegend=False
-        )
-        return fig.to_html(include_plotlyjs=True, full_html=False)
-    def create_line(self, x_col: str, y_col: str, color_col: Optional[str] = None,
-                    title: str = "") -> str:
-        """Create line plot with Plotly"""
-        if self.data is None:
-            raise ValueError("No data loaded")
-        fig = go.Figure()
-        if color_col:
-            for category in self.data[color_col].unique():
-                mask = self.data[color_col] == category
-                fig.add_trace(go.Scatter(
-                    x=self.data[mask][x_col],
-                    y=self.data[mask][y_col],
-                    mode='lines+markers',
-                    name=str(category)
-                ))
-        else:
-            fig.add_trace(go.Scatter(
-                x=self.data[x_col],
-                y=self.data[y_col],
-                mode='lines+markers'
-            ))
-        fig.update_layout(
-            title=title,
-            xaxis_title=x_col,
-            yaxis_title=y_col,
-            width=self.width,
-            height=self.height,
-            template="plotly_white",
-            hovermode='x unified'
-        )
-        return fig.to_html(include_plotlyjs=True, full_html=False)
-class ChatAnalyzer:
-    """Handles chat-based analysis with visualization"""
-    def __init__(self):
-        self.analyzer = DataAnalyzer()
-        self.history: List[Tuple[str, str]] = []
-    def process_file(self, file: gr.File) -> List[Tuple[str, str]]:
-        """Process uploaded file and initialize analyzer"""
         try:
-            if file.name.endswith('.csv'):
-                self.analyzer.data = pd.read_csv(file.name)
-            elif file.name.endswith(('.xlsx', '.xls')):
-                self.analyzer.data = pd.read_excel(file.name)
-            else:
-                return [("System", "Error: Please upload a CSV or Excel file.")]
-            # Convert date columns to datetime
-            date_cols = self.analyzer.data.select_dtypes(include=['object']).columns
-            for col in date_cols:
-                try:
-                    self.analyzer.data[col] = pd.to_datetime(self.analyzer.data[col])
-                except:
-                    continue
-            info = f"""Data loaded successfully!
-            Shape: {self.analyzer.data.shape}
-            Columns: {', '.join(self.analyzer.data.columns)}
-            Numeric columns: {', '.join(self.analyzer.data.select_dtypes(include=[np.number]).columns)}
-            Date columns: {', '.join(self.analyzer.data.select_dtypes(include=['datetime64']).columns)}
-            Categorical columns: {', '.join(self.analyzer.data.select_dtypes(include=['object']).columns)}
-            """
-            self.history = [("System", info)]
-            return self.history
         except Exception as e:
-            self.history = [("System", f"Error loading file: {str(e)}")]
-            return self.history
-    def chat(self, message: str, api_key: str) -> Tuple[List[Tuple[str, str]], str]:
-        """Process chat message and generate visualizations"""
-        if self.analyzer.data is None:
-            return [(message, "Please upload a data file first.")], ""
-        if not api_key:
-            return [(message, "Please provide an OpenAI API key.")], ""
         try:
-            os.environ["OPENAI_API_KEY"] = api_key
-            # Get data context
-            context = self._get_data_context()
-            # Get AI response
-            completion_response = completion(
-                model="gpt-4o-mini",
-                messages=[
-                    {"role": "system", "content": self._get_system_prompt()},
-                    {"role": "user", "content": f"{context}\n\nUser question: {message}"}
-                ],
-                temperature=0.7
             )
-            analysis = completion_response.choices[0].message.content
-            # Create visualizations
-            plots_html = ""
-            try:
-                # Extract code blocks
-                import re
-                code_blocks = re.findall(r'```python\n(.*?)```', analysis, re.DOTALL)
-                for code in code_blocks:
-                    # Create namespace for execution
-                    namespace = {
-                        'analyzer': self.analyzer,
-                        'df': self.analyzer.data,
-                        'print': lambda x: x
-                    }
-                    # Execute the code
-                    try:
-                        result = eval(code, namespace)
-                        if isinstance(result, str) and ('<div' in result or '<script' in result):
-                            plots_html += f'<div class="plot-container">{result}</div>'
-                    except:
-                        exec(code, namespace)
-            except Exception as e:
-                analysis += f"\n\nError creating visualization: {str(e)}"
-            # Update chat history
-            self.history.append((message, analysis))
-            return self.history, plots_html
         except Exception as e:
-            self.history.append((message, f"Error: {str(e)}"))
-            return self.history, ""
-    def _get_data_context(self) -> str:
-        """Get current data context for AI"""
-        df = self.analyzer.data
-        numeric_cols = df.select_dtypes(include=[np.number]).columns
-        date_cols = df.select_dtypes(include=['datetime64']).columns
-        categorical_cols = df.select_dtypes(include=['object']).columns
-        # Get basic statistics
-        stats = df[numeric_cols].describe().to_string() if len(numeric_cols) > 0 else "No numeric columns"
-        return f"""
-        Data Information:
-        - Shape: {df.shape}
-        - Numeric columns: {', '.join(numeric_cols)}
-        - Date columns: {', '.join(date_cols)}
-        - Categorical columns: {', '.join(categorical_cols)}
-        Basic Statistics:
-        {stats}
-        Available visualization functions:
-        - analyzer.create_histogram(column, bins, title)
-        - analyzer.create_scatter(x_col, y_col, color_col, title)
-        - analyzer.create_box(x_col, y_col, title)
-        - analyzer.create_line(x_col, y_col, color_col, title)
-        """
-    def _get_system_prompt(self) -> str:
-        """Get system prompt for AI"""
-        return """You are a data analysis assistant specialized in creating interactive visualizations.
-Available visualization functions:
-1. create_histogram(column, bins, title) - For distribution analysis
-2. create_scatter(x_col, y_col, color_col, title) - For relationship analysis
-3. create_box(x_col, y_col, title) - For categorical comparisons
-4. create_line(x_col, y_col, color_col, title) - For trend analysis
-Example usage:
 ```python
-# Create histogram
-result = analyzer.create_histogram(
-    column='Salary',
-    bins=20,
-    title='Salary Distribution'
-)
-print(result)
-# Create scatter plot with time series
-result = analyzer.create_scatter(
-    x_col='Date',
-    y_col='Salary',
-    color_col='Title',
-    title='Salary Trends by Title'
-)
-print(result)
-# Create box plot
-result = analyzer.create_box(
-    x_col='Title',
-    y_col='Salary',
-    title='Salary Distribution by Title'
-)
-print(result)
 ```
-Always wrap code in Python code blocks and use print() to display the visualizations.
-Provide analysis and insights about what the visualizations show."""
-def create_interface():
-    """Create Gradio interface"""
-    analyzer = ChatAnalyzer()
-    # Custom CSS
     css = """
-    .container { max-width: 1200px; margin: auto; }
     .plot-container {
         margin: 20px 0;
         padding: 15px;
@@ -328,81 +300,70 @@ def create_interface():
         background: white;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
     }
-    .chat-message {
-        margin-bottom: 15px;
-        padding: 10px;
-        border-radius: 8px;
-        background: #f8f9fa;
-    }
     """
-    with gr.Blocks(css=css) as demo:
         gr.Markdown("""
-        # Interactive Data Analysis Chat
-        Upload your data and chat with AI to analyze it! Features:
-        - Interactive visualizations
-        - Natural language analysis
-        - Statistical insights
-        - Trend detection
         """)
         with gr.Row():
-            with gr.Column(scale=1):
                 file = gr.File(
-                    label="Upload Data (CSV or Excel)",
                     file_types=[".csv", ".xlsx", ".xls"]
                 )
-                api_key = gr.Textbox(
-                    label="OpenAI API Key",
-                    type="password",
-                    placeholder="Enter your API key"
                 )
-            with gr.Column(scale=2):
-                chatbot = gr.Chatbot(
-                    height=400,
-                    elem_classes="chat-message"
                 )
-                message = gr.Textbox(
-                    label="Ask about your data",
-                    placeholder="e.g., Show me trends in the data",
-                    lines=2
                 )
-                send = gr.Button("Send")
-        # Plot output area
-        plot_output = gr.HTML(
-            label="Visualizations",
-            elem_classes="plot-container"
-        )
-        # Event handlers
-        file.change(
-            analyzer.process_file,
-            inputs=[file],
-            outputs=[chatbot]
-        )
-        send.click(
-            analyzer.chat,
-            inputs=[message, api_key],
-            outputs=[chatbot, plot_output]
         )
-        # Example queries
         gr.Examples(
             examples=[
-                ["Show me a histogram of salary distribution"],
-                ["Create a scatter plot of salary trends over time"],
-                ["Show me box plots of salaries by title"],
-                ["Analyze the trends and patterns in the data"],
             ],
-            inputs=message
         )
-    return demo
 if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()

 import base64
 import io
+import os
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional, Union
 import gradio as gr
+import matplotlib.pyplot as plt
 import numpy as np
+import pandas as pd
+import seaborn as sns
+import plotly.express as px
 import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 from litellm import completion
+class CodeEnvironment:
+    """Enhanced environment for executing code with both static and interactive visualization capabilities"""
     def __init__(self):
+        self.globals = {
+            'pd': pd,
+            'np': np,
+            'plt': plt,
+            'sns': sns,
+            'px': px,
+            'go': go,
+            'make_subplots': make_subplots
+        }
+        self.locals = {}
+    def execute(self, code: str, df: pd.DataFrame = None) -> Dict[str, Any]:
+        """Execute code and capture both static and interactive outputs"""
+        if df is not None:
+            self.globals['df'] = df
+        # Capture output
+        output_buffer = io.StringIO()
+        result = {
+            'output': '',
+            'figures': [],      # For base64 static images
+            'interactive': [],  # For Plotly HTML
+            'error': None
+        }
         try:
+            # Execute code
+            exec(code, self.globals, self.locals)
+            # Capture matplotlib figures (static)
+            for i in plt.get_fignums():
+                fig = plt.figure(i)
+                buf = io.BytesIO()
+                fig.savefig(buf, format='png')
+                buf.seek(0)
+                img_str = base64.b64encode(buf.read()).decode()
+                result['figures'].append(f"data:image/png;base64,{img_str}")
+                plt.close(fig)
+            # Capture Plotly figures (interactive)
+            for var in list(self.locals.values()):
+                if isinstance(var, (go.Figure, px.Figure)):
+                    html = var.to_html(
+                        include_plotlyjs=True,
+                        full_html=False,
+                        config={
+                            'displayModeBar': True,
+                            'responsive': True
+                        }
+                    )
+                    result['interactive'].append(html)
+            # Get printed output
+            result['output'] = output_buffer.getvalue()
         except Exception as e:
+            result['error'] = str(e)
+        finally:
+            output_buffer.close()
+        return result
+@dataclass
+class Tool:
+    """Tool for data analysis"""
+    name: str
+    description: str
+    func: Callable
+class AnalysisAgent:
+    """Enhanced agent with interactive visualization capabilities"""
+    def __init__(
+        self,
+        model_id: str = "gpt-4o-mini",
+        temperature: float = 0.7,
+    ):
+        self.model_id = model_id
+        self.temperature = temperature
+        self.tools: List[Tool] = []
+        self.code_env = CodeEnvironment()
+    def add_tool(self, name: str, description: str, func: Callable) -> None:
+        """Add a tool to the agent"""
+        self.tools.append(Tool(name=name, description=description, func=func))
+    def run(self, prompt: str, df: pd.DataFrame = None) -> str:
+        """Run analysis with enhanced visualization support"""
+        messages = [
+            {"role": "system", "content": self._get_system_prompt()},
+            {"role": "user", "content": prompt}
+        ]
         try:
+            # Get response from model
+            response = completion(
+                model=self.model_id,
+                messages=messages,
+                temperature=self.temperature,
             )
+            analysis = response.choices[0].message.content
+            # Extract code blocks
+            code_blocks = self._extract_code(analysis)
+            # Execute code and capture results
+            results = []
+            for code in code_blocks:
+                result = self.code_env.execute(code, df)
+                if result['error']:
+                    results.append(f"Error executing code: {result['error']}")
+                else:
+                    # Add output text
+                    if result['output']:
+                        results.append(result['output'])
+                    # Add interactive plots
+                    for plot in result['interactive']:
+                        results.append(f"<div class='plot-container'>{plot}</div>")
+                    # Add static figures as fallback
+                    for fig in result['figures']:
+                        results.append(f"![Figure]({fig})")
+            # Combine analysis and results
+            return analysis + "\n\n" + "\n".join(results)
         except Exception as e:
+            return f"Error: {str(e)}"
+    def _get_system_prompt(self) -> str:
+        """Get enhanced system prompt with interactive visualization capabilities"""
+        tools_desc = "\n".join([
+            f"- {tool.name}: {tool.description}"
+            for tool in self.tools
+        ])
+        return f"""You are a data analysis assistant with interactive visualization capabilities.
+Available tools:
+{tools_desc}
+Capabilities:
+- Data analysis (pandas, numpy)
+- Interactive visualization (plotly)
+- Static visualization (matplotlib, seaborn)
+- Statistical analysis (scipy)
+- Machine learning (sklearn)
+When writing code:
+- Prefer Plotly for interactive visualizations
+- Use matplotlib/seaborn for static plots when appropriate
+- Create clear visualizations with proper labels
+- Include explanatory text
+- Handle errors gracefully
+Example Plotly usage:
 ```python
+# Create interactive scatter plot
+fig = px.scatter(df, x='column1', y='column2',
+                color='category',
+                title='Interactive Analysis')
+fig.update_layout(height=600)
+fig.show()
+# Create interactive time series
+fig = px.line(df, x='date', y='value',
+              color='category',
+              title='Time Series Analysis')
+fig.update_layout(height=600)
+fig.show()
+```
+Example Matplotlib usage:
+```python
+# Create static plot
+plt.figure(figsize=(10, 6))
+sns.boxplot(data=df, x='category', y='value')
+plt.title('Distribution Analysis')
+plt.show()
 ```
+"""
+    @staticmethod
+    def _extract_code(text: str) -> List[str]:
+        """Extract Python code blocks from markdown"""
+        import re
+        pattern = r'```python\n(.*?)```'
+        return re.findall(pattern, text, re.DOTALL)
+def process_file(file: gr.File) -> Optional[pd.DataFrame]:
+    """Process uploaded file into DataFrame"""
+    if not file:
+        return None
+    try:
+        if file.name.endswith('.csv'):
+            return pd.read_csv(file.name)
+        elif file.name.endswith(('.xlsx', '.xls')):
+            return pd.read_excel(file.name)
+    except Exception as e:
+        print(f"Error reading file: {str(e)}")
+    return None
+def analyze_data(
+    file: gr.File,
+    query: str,
+    api_key: str,
+    temperature: float = 0.7,
+) -> str:
+    """Process user request and generate enhanced analysis"""
+    if not api_key:
+        return "Error: Please provide an API key."
+    if not file:
+        return "Error: Please upload a file."
+    try:
+        # Set up environment
+        os.environ["OPENAI_API_KEY"] = api_key
+        # Create agent
+        agent = AnalysisAgent(
+            model_id="gpt-4o-mini",
+            temperature=temperature
+        )
+        # Process file
+        df = process_file(file)
+        if df is None:
+            return "Error: Could not process file."
+        # Build context
+        file_info = f"""
+        File: {file.name}
+        Shape: {df.shape}
+        Columns: {', '.join(df.columns)}
+        Column Types:
+        {chr(10).join([f'- {col}: {dtype}' for col, dtype in df.dtypes.items()])}
+        """
+        # Run analysis
+        prompt = f"""
+        {file_info}
+        The data is loaded in a pandas DataFrame called 'df'.
+        User request: {query}
+        Please analyze the data and provide:
+        1. Key insights and findings
+        2. Interactive visualizations where appropriate
+        3. Statistical summaries when relevant
+        4. Clear explanations of patterns and trends
+        """
+        return agent.run(prompt, df=df)
+    except Exception as e:
+        return f"Error occurred: {str(e)}"
+def create_interface():
+    """Create enhanced Gradio interface"""
     css = """
     .plot-container {
         margin: 20px 0;
         padding: 15px;
         background: white;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
     }
     """
+    with gr.Blocks(title="AI Data Analysis Assistant", css=css) as interface:
         gr.Markdown("""
+        # AI Data Analysis Assistant
+        Upload your data file and get AI-powered analysis with interactive visualizations.
+        **Features:**
+        - Interactive data visualization
+        - Statistical analysis
+        - Machine learning capabilities
+        - Natural language interaction
+        **Note**: Requires your own OpenAI API key.
         """)
         with gr.Row():
+            with gr.Column():
                 file = gr.File(
+                    label="Upload Data File",
                     file_types=[".csv", ".xlsx", ".xls"]
                 )
+                query = gr.Textbox(
+                    label="What would you like to analyze?",
+                    placeholder="e.g., Create interactive visualizations showing relationships between variables",
+                    lines=3
                 )
+                api_key = gr.Textbox(
+                    label="API Key (Required)",
+                    placeholder="Your API key",
+                    type="password"
                 )
+                temperature = gr.Slider(
+                    label="Temperature",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1
                 )
+                analyze_btn = gr.Button("Analyze")
+            with gr.Column():
+                output = gr.HTML(label="Output")  # Changed to HTML for interactive plots
+        analyze_btn.click(
+            analyze_data,
+            inputs=[file, query, api_key, temperature],
+            outputs=output
         )
         gr.Examples(
             examples=[
+                [None, "Create interactive visualizations showing relationships between variables"],
+                [None, "Show the distribution of values with interactive plots"],
+                [None, "Create an interactive correlation analysis"],
+                [None, "Show trends over time with interactive charts"],
+                [None, "Generate a comprehensive analysis with multiple visualizations"],
             ],
+            inputs=[file, query]
         )
+    return interface
 if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch()