Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing_2

Configuration error

App Files Files Community

jzou19950715 commited on Jan 24, 2025

Commit

707e5d0

verified ·

1 Parent(s): b90c312

Update app.py

Browse files

Files changed (1) hide show

app.py +519 -47

app.py CHANGED Viewed

@@ -1,29 +1,448 @@
-def create_interface():
-    """Create Gradio interface with proper HTML rendering"""
-    agent = AnalysisAgent()
-    def format_html_output(content: str) -> str:
-        """Format the output to properly render HTML in Gradio"""
-        # Split content into text and HTML parts
-        parts = content.split('<!DOCTYPE html>')
-        if len(parts) == 1:
-            # No HTML content
-            return f'<div style="padding: 20px;">{content}</div>'
         formatted_parts = []
         for i, part in enumerate(parts):
-            if i == 0:
-                # Text content
-                if part.strip():
-                    formatted_parts.append(f'<div style="padding: 20px;">{part}</div>')
-            else:
-                # HTML visualization
-                formatted_parts.append(f'<!DOCTYPE html>{part}')
         return '\n'.join(formatted_parts)
     def process_file(file: gr.File) -> str:
         """Process uploaded file and initialize session"""
         try:
@@ -32,39 +451,64 @@ def create_interface():
             elif file.name.endswith(('.xlsx', '.xls')):
                 agent.session.data = pd.read_excel(file.name)
             else:
-                return "Error: Unsupported file type"
-            return format_html_output(f"Successfully loaded data: {agent.session.get_context()}")
         except Exception as e:
-            return format_html_output(f"Error loading file: {str(e)}")
-    def analyze(file: gr.File, query: str, api_key: str) -> str:
-        """Process analysis query"""
         if not api_key:
-            return format_html_output("Error: Please provide an API key.")
         if not file:
-            return format_html_output("Error: Please upload a file.")
         try:
             os.environ["OPENAI_API_KEY"] = api_key
             result = agent.process_query(query)
-            return format_html_output(result)
         except Exception as e:
-            return format_html_output(f"Error: {str(e)}")
     with gr.Blocks(css="""
-        .plot-container { margin: 20px 0; }
-        .bokeh-plot { margin: 20px auto; }
     """) as interface:
         gr.Markdown("""
         # Interactive Data Analysis Assistant
         Upload your data file and chat with the AI to analyze it. Features:
-        - Interactive visualizations
-        - Natural language analysis
-        - Follow-up questions
-        - Statistical insights
         **Note**: Requires OpenAI API key
         """)
@@ -73,42 +517,70 @@ def create_interface():
             with gr.Column(scale=1):
                 file = gr.File(
                     label="Upload Data File",
-                    file_types=[".csv", ".xlsx", ".xls"]
                 )
                 api_key = gr.Textbox(
-                    label="API Key",
-                    type="password"
                 )
                 chat_input = gr.Textbox(
                     label="Ask about your data",
                     placeholder="e.g., Show me the relationship between variables",
                     lines=3
                 )
-                analyze_btn = gr.Button("Analyze")
             with gr.Column(scale=2):
-                chat_output = gr.HTML(
                     label="Analysis & Visualizations",
-                    elem_classes="plot-container"
                 )
         # Set up event handlers
-        file.change(process_file, inputs=[file], outputs=[chat_output])
         analyze_btn.click(
             analyze,
-            inputs=[file, chat_input, api_key],
-            outputs=[chat_output]
         )
         # Example queries
         gr.Examples(
             examples=[
-                [None, "Show me the distribution of numerical variables"],
-                [None, "Create an interactive visualization of the relationships between variables"],
-                [None, "Analyze trends in the data over time"],
-                [None, "Compare different categories using interactive charts"],
             ],
             inputs=[file, chat_input]
         )
-    return interface

+import base64
+import io
+import os
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional, Union
+import json
+import gradio as gr
+import numpy as np
+import pandas as pd
+from bokeh.plotting import figure
+from bokeh.layouts import column, row, layout
+from bokeh.models import (
+    ColumnDataSource,
+    HoverTool,
+    BoxSelectTool,
+    WheelZoomTool,
+    ResetTool,
+    Legend,
+    LegendItem
+)
+from bokeh.embed import file_html
+from bokeh.resources import CDN
+from litellm import completion
+class VisualizationEngine:
+    """Engine for creating interactive Bokeh visualizations"""
+    def __init__(self):
+        self.width = 800
+        self.height = 500
+        self.tools = "pan,box_zoom,wheel_zoom,reset,save,hover"
+        self.cdn = CDN
+    def create_scatter(self, df: pd.DataFrame, x_col: str, y_col: str,
+                      color_col: Optional[str] = None, title: str = "") -> str:
+        """Create an interactive scatter plot"""
+        source = ColumnDataSource(df)
+        p = figure(width=self.width, height=self.height,
+                  title=title, tools=self.tools)
+        # Add scatter points
+        if color_col and color_col in df.columns:
+            scatter = p.scatter(
+                x_col, y_col,
+                source=source,
+                color={'field': color_col, 'transform': 'category10'},
+                size=8,
+                alpha=0.6
+            )
+        else:
+            scatter = p.scatter(
+                x_col, y_col,
+                source=source,
+                color='navy',
+                size=8,
+                alpha=0.6
+            )
+        # Style the plot
+        p.title.text_font_size = '16pt'
+        p.xaxis.axis_label = x_col
+        p.yaxis.axis_label = y_col
+        p.axis.axis_label_text_font_size = '12pt'
+        # Add hover tooltip
+        hover = p.select(dict(type=HoverTool))
+        hover.tooltips = [(col, f"@{col}") for col in [x_col, y_col] + ([color_col] if color_col else [])]
+        hover.mode = 'mouse'
+        return file_html(p, self.cdn)
+    def create_line(self, df: pd.DataFrame, x_col: str, y_cols: List[str],
+                   title: str = "") -> str:
+        """Create an interactive line plot"""
+        source = ColumnDataSource(df)
+        p = figure(width=self.width, height=self.height,
+                  title=title, tools=self.tools)
+        # Add lines for each y column
+        colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd',
+                 '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
+        for i, y_col in enumerate(y_cols):
+            line = p.line(
+                x_col, y_col,
+                line_width=2,
+                source=source,
+                legend_label=y_col,
+                color=colors[i % len(colors)]
+            )
+        # Style the plot
+        p.title.text_font_size = '16pt'
+        p.xaxis.axis_label = x_col
+        p.yaxis.axis_label = "Values"
+        p.axis.axis_label_text_font_size = '12pt'
+        p.legend.click_policy = "hide"
+        p.legend.location = "top_right"
+        # Add hover tooltip
+        hover = p.select(dict(type=HoverTool))
+        hover.tooltips = [(col, f"@{col}") for col in [x_col] + y_cols]
+        hover.mode = 'mouse'
+        return file_html(p, self.cdn)
+    def create_bar(self, df: pd.DataFrame, x_col: str, y_col: str,
+                  title: str = "", color: str = "#1f77b4") -> str:
+        """Create an interactive bar plot"""
+        source = ColumnDataSource(df)
+        p = figure(width=self.width, height=self.height,
+                  title=title, tools=self.tools,
+                  x_range=df[x_col].unique().tolist())
+        # Add bars
+        p.vbar(
+            x=x_col,
+            top=y_col,
+            width=0.9,
+            source=source,
+            color=color,
+            alpha=0.8
+        )
+        # Style the plot
+        p.title.text_font_size = '16pt'
+        p.xaxis.axis_label = x_col
+        p.yaxis.axis_label = y_col
+        p.axis.axis_label_text_font_size = '12pt'
+        p.xgrid.grid_line_color = None
+        p.xaxis.major_label_orientation = 0.7
+        # Add hover tooltip
+        hover = p.select(dict(type=HoverTool))
+        hover.tooltips = [(x_col, f"@{x_col}"), (y_col, f"@{y_col}")]
+        hover.mode = 'mouse'
+        return file_html(p, self.cdn)
+    def create_histogram(self, df: pd.DataFrame, column: str, bins: int = 30,
+                        title: str = "") -> str:
+        """Create an interactive histogram"""
+        hist, edges = np.histogram(df[column].dropna(), bins=bins)
+        hist_df = pd.DataFrame({
+            'count': hist,
+            'left': edges[:-1],
+            'right': edges[1:]
+        })
+        source = ColumnDataSource(hist_df)
+        p = figure(width=self.width, height=self.height,
+                  title=title, tools=self.tools)
+        # Add histogram bars
+        p.quad(
+            top='count',
+            bottom=0,
+            left='left',
+            right='right',
+            source=source,
+            fill_color="#1f77b4",
+            line_color="white",
+            alpha=0.8
+        )
+        # Style the plot
+        p.title.text_font_size = '16pt'
+        p.xaxis.axis_label = column
+        p.yaxis.axis_label = 'Count'
+        p.axis.axis_label_text_font_size = '12pt'
+        # Add hover tooltip
+        hover = p.select(dict(type=HoverTool))
+        hover.tooltips = [
+            ('Range', '@left{0.00} to @right{0.00}'),
+            ('Count', '@count')
+        ]
+        hover.mode = 'mouse'
+        return file_html(p, self.cdn)
+class DataAnalyzer:
+    """Helper class for common data analysis tasks"""
+    @staticmethod
+    def get_summary_stats(df: pd.DataFrame) -> pd.DataFrame:
+        """Get summary statistics for numerical columns"""
+        return df.describe()
+    @staticmethod
+    def get_missing_values(df: pd.DataFrame) -> pd.DataFrame:
+        """Get missing values information"""
+        missing = pd.DataFrame({
+            'column': df.columns,
+            'missing_count': df.isnull().sum(),
+            'missing_percentage': (df.isnull().sum() / len(df) * 100).round(2)
+        })
+        return missing[missing['missing_count'] > 0]
+    @staticmethod
+    def get_correlation_matrix(df: pd.DataFrame) -> pd.DataFrame:
+        """Get correlation matrix for numerical columns"""
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
+        return df[numeric_cols].corr()
+class AnalysisSession:
+    """Maintains state and history for the analysis session"""
+    def __init__(self):
+        self.data: Optional[pd.DataFrame] = None
+        self.chat_history: List[Dict[str, str]] = []
+        self.viz_engine = VisualizationEngine()
+        self.analyzer = DataAnalyzer()
+    def add_message(self, role: str, content: str):
+        """Add a message to the chat history"""
+        self.chat_history.append({"role": role, "content": content})
+    def get_context(self) -> str:
+        """Get the current analysis context"""
+        if self.data is None:
+            return "No data loaded yet."
+        numeric_cols = self.data.select_dtypes(include=[np.number]).columns
+        categorical_cols = self.data.select_dtypes(include=['object', 'category']).columns
+        missing_info = self.analyzer.get_missing_values(self.data)
+        missing_summary = "\n".join([
+            f"- {row['column']}: {row['missing_count']} ({row['missing_percentage']}%)"
+            for _, row in missing_info.iterrows()
+        ]) if not missing_info.empty else "No missing values found."
+        context = f"""
+        Current DataFrame Info:
+        - Shape: {self.data.shape}
+        - Numeric columns: {', '.join(numeric_cols)}
+        - Categorical columns: {', '.join(categorical_cols)}
+        Missing Values:
+        {missing_summary}
+        """
+        return context
+class AnalysisAgent:
+    """Enhanced agent with interactive visualization and chat capabilities"""
+    def __init__(
+        self,
+        model_id: str = "gpt-4",
+        temperature: float = 0.7,
+    ):
+        self.model_id = model_id
+        self.temperature = temperature
+        self.session = AnalysisSession()
+    def process_query(self, query: str) -> str:
+        """Process a user query and generate response with visualizations"""
+        context = self.session.get_context()
+        messages = [
+            {"role": "system", "content": self._get_system_prompt()},
+            *self.session.chat_history[-5:],  # Include last 5 messages for context
+            {"role": "user", "content": f"{context}\n\nUser query: {query}"}
+        ]
+        try:
+            response = completion(
+                model=self.model_id,
+                messages=messages,
+                temperature=self.temperature,
+            )
+            analysis = response.choices[0].message.content
+            # Extract and execute any code blocks
+            visualizations = []
+            code_blocks = self._extract_code(analysis)
+            for code in code_blocks:
+                try:
+                    # Execute code and capture visualization commands
+                    result = self._execute_visualization(code)
+                    if result:
+                        visualizations.append(result)
+                except Exception as e:
+                    visualizations.append(f"Error creating visualization: {str(e)}")
+            # Add messages to chat history
+            self.session.add_message("user", query)
+            self.session.add_message("assistant", analysis)
+            # Format the response with visualizations
+            formatted_response = self._format_response(analysis, visualizations)
+            return formatted_response
+        except Exception as e:
+            return f"Error: {str(e)}"
+    def _execute_visualization(self, code: str) -> Optional[str]:
+        """Execute visualization code and return HTML output"""
+        try:
+            # Create a safe namespace with necessary libraries
+            namespace = {
+                'df': self.session.data,
+                'np': np,
+                'pd': pd,
+                'viz': self.session.viz_engine,
+                'analyzer': self.session.analyzer
+            }
+            # Execute the code
+            exec(code, namespace)
+            # Look for visualization result
+            for var in namespace.values():
+                if isinstance(var, str) and ('<script' in var or '<div' in var):
+                    return var
+            return None
+        except Exception as e:
+            return f"Error executing visualization: {str(e)}"
+    def _format_response(self, analysis: str, visualizations: List[str]) -> str:
+        """Format the response with text and visualizations"""
+        # Split analysis into parts (before and after code blocks)
+        parts = self._extract_code(analysis, keep_markdown=True)
         formatted_parts = []
         for i, part in enumerate(parts):
+            if i % 2 == 0:  # Text content
+                formatted_parts.append(f'<div class="analysis-text">{part}</div>')
+            else:  # Code block location
+                if i//2 < len(visualizations):
+                    viz = visualizations[i//2]
+                    formatted_parts.append(f'<div class="visualization">{viz}</div>')
         return '\n'.join(formatted_parts)
+    def _get_system_prompt(self) -> str:
+        """Get system prompt with visualization capabilities"""
+        return """You are a data analysis assistant with interactive visualization capabilities.
+Available visualizations:
+1. Scatter plots (viz.create_scatter)
+   - x_col: x-axis column name
+   - y_col: y-axis column name
+   - color_col: optional column for color coding
+   - title: plot title
+2. Line plots (viz.create_line)
+   - x_col: x-axis column name
+   - y_cols: list of column names for multiple lines
+   - title: plot title
+3. Bar plots (viz.create_bar)
+   - x_col: category column name
+   - y_col: value column name
+   - title: plot title
+   - color: optional bar color
+4. Histograms (viz.create_histogram)
+   - column: column to analyze
+   - bins: number of bins
+   - title: plot title
+Analysis tools:
+- analyzer.get_summary_stats(df): Get summary statistics
+- analyzer.get_correlation_matrix(df): Get correlation matrix
+- analyzer.get_missing_values(df): Get missing values information
+When analyzing data:
+1. First understand and explain the data
+2. Create relevant visualizations using the viz engine
+3. Provide insights based on the visualizations
+4. Ask follow-up questions when appropriate
+5. Use markdown for formatting
+Example visualization code:
+```python
+# Create scatter plot
+html = viz.create_scatter(df, 'column1', 'column2', title='Analysis')
+print(html)
+# Create line plot with multiple series
+html = viz.create_line(df, 'date_column', ['value1', 'value2'], title='Trends')
+print(html)
+# Create histogram
+html = viz.create_histogram(df, 'numeric_column', bins=30, title='Distribution')
+print(html)
+```
+"""
+    @staticmethod
+    def _extract_code(text: str, keep_markdown: bool = False) -> List[str]:
+        """Extract Python code blocks from markdown"""
+        import re
+        pattern = r'```python\n(.*?)```'
+        if keep_markdown:
+            return re.split(pattern, text, flags=re.DOTALL)
+        return re.findall(pattern, text, re.DOTALL)
+def create_interface():
+    """Create Gradio interface with proper HTML rendering"""
+    agent = AnalysisAgent()
+    def format_html_output(content: str) -> str:
+        """Format the output to properly render HTML in Gradio"""
+        # Add custom CSS for better visualization display
+        css = """
+        <style>
+            .analysis-text {
+                padding: 20px;
+                margin: 10px 0;
+                background: #f8f9fa;
+                border-radius: 8px;
+                font-size: 16px;
+            }
+            .visualization {
+                margin: 20px 0;
+                padding: 10px;
+                border: 1px solid #dee2e6;
+                border-radius: 8px;
+                background: white;
+            }
+            .bokeh-plot {
+                margin: 0 auto;
+            }
+            pre {
+                background: #f1f3f5;
+                padding: 15px;
+                border-radius: 5px;
+                overflow-x: auto;
+            }
+            code {
+                font-family: 'Courier New', Courier, monospace;
+            }
+        </style>
+        """
+        return f"{css}\n{content}"
     def process_file(file: gr.File) -> str:
         """Process uploaded file and initialize session"""
         try:
             elif file.name.endswith(('.xlsx', '.xls')):
                 agent.session.data = pd.read_excel(file.name)
             else:
+                return format_html_output(
+                    '<div class="analysis-text">Error: Unsupported file type. Please upload a CSV or Excel file.</div>'
+                )
+            # Show initial data summary
+            summary = agent.session.get_context()
+            return format_html_output(
+                f'<div class="analysis-text">Successfully loaded data!\n\n{summary}</div>'
+            )
         except Exception as e:
+            return format_html_output(
+                f'<div class="analysis-text">Error loading file: {str(e)}</div>'
+            )
+    def analyze(file: gr.File, query: str, api_key: str, chat_history: str) -> tuple:
+        """Process analysis query and update chat history"""
         if not api_key:
+            return (
+                format_html_output('<div class="analysis-text">Error: Please provide an API key.</div>'),
+                chat_history
+            )
         if not file:
+            return (
+                format_html_output('<div class="analysis-text">Error: Please upload a file.</div>'),
+                chat_history
+            )
         try:
             os.environ["OPENAI_API_KEY"] = api_key
             result = agent.process_query(query)
+            # Update chat history
+            new_history = chat_history or ""
+            new_history += f"\nYou: {query}\nAssistant: {result}\n"
+            return format_html_output(result), new_history
         except Exception as e:
+            return (
+                format_html_output(f'<div class="analysis-text">Error: {str(e)}</div>'),
+                chat_history
+            )
+    # Create the Gradio interface
     with gr.Blocks(css="""
+        .container { max-width: 1200px; margin: auto; }
+        .analysis-header { margin-bottom: 20px; }
+        .file-upload { margin-bottom: 15px; }
     """) as interface:
         gr.Markdown("""
         # Interactive Data Analysis Assistant
         Upload your data file and chat with the AI to analyze it. Features:
+        - Interactive visualizations with zoom, pan, and hover capabilities
+        - Natural language analysis and insights
+        - Statistical analysis and summaries
+        - Trend detection and pattern analysis
         **Note**: Requires OpenAI API key
         """)
             with gr.Column(scale=1):
                 file = gr.File(
                     label="Upload Data File",
+                    file_types=[".csv", ".xlsx", ".xls"],
+                    elem_classes="file-upload"
                 )
                 api_key = gr.Textbox(
+                    label="OpenAI API Key",
+                    type="password",
+                    placeholder="Enter your API key here"
                 )
                 chat_input = gr.Textbox(
                     label="Ask about your data",
                     placeholder="e.g., Show me the relationship between variables",
                     lines=3
                 )
+                chat_history = gr.State("")
+                analyze_btn = gr.Button("Analyze", variant="primary")
             with gr.Column(scale=2):
+                output = gr.HTML(
                     label="Analysis & Visualizations",
+                    elem_classes="analysis-output"
                 )
         # Set up event handlers
+        file.change(
+            process_file,
+            inputs=[file],
+            outputs=[output]
+        )
         analyze_btn.click(
             analyze,
+            inputs=[file, chat_input, api_key, chat_history],
+            outputs=[output, chat_history]
         )
         # Example queries
         gr.Examples(
             examples=[
+                [None, "Show me the distribution of all numerical variables using histograms"],
+                [None, "Create an interactive scatter plot matrix of the main variables"],
+                [None, "Analyze trends over time and show them with an interactive line plot"],
+                [None, "Compare categories using bar plots and provide statistical insights"],
+                [None, "Identify and visualize correlations between numerical variables"],
+                [None, "Create a dashboard showing key metrics and their distributions"],
             ],
             inputs=[file, chat_input]
         )
+        # Add footer with information
+        gr.Markdown("""
+        ### Tips for better analysis:
+        1. Upload clean data in CSV or Excel format
+        2. Be specific in your questions
+        3. Use follow-up questions to dive deeper
+        4. Interact with the visualizations using mouse hover, zoom, and pan
+        5. Look for patterns and trends in the interactive plots
+        """)
+    return interface
+if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch()