Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing_2

Configuration error

App Files Files Community

jzou19950715 commited on Jan 24, 2025

Commit

3950cb3

verified ·

1 Parent(s): 0a37365

Update app.py

Browse files

Files changed (1) hide show

app.py +259 -156

app.py CHANGED Viewed

@@ -1,167 +1,276 @@
 import os
 import json
-from typing import Optional, Dict
 import gradio as gr
 import pandas as pd
 from litellm import completion
-from components.analysis import DataAnalyzer
-from components.statistical import StatisticalAnalyzer
-from components.visualization import D3Visualizer
-def parse_gpt_response(response: str) -> Dict:
-    """Safely parse GPT response into analysis request"""
-    try:
-        # Try to fix common JSON issues
-        cleaned_response = response.replace("```json\n", "").replace("```", "")
-        cleaned_response = cleaned_response.strip()
-        if not cleaned_response.startswith("{"):
-            # Extract JSON if it's embedded in text
-            start = cleaned_response.find("{")
-            end = cleaned_response.rfind("}") + 1
-            if start >= 0 and end > 0:
-                cleaned_response = cleaned_response[start:end]
-        # Parse JSON
-        return json.loads(cleaned_response)
-    except json.JSONDecodeError:
-        # Fallback to default analysis
-        return {
-            "analysis_type": "distribution",
-            "params": {"column": "all"},
-            "explanation": "Performing basic distribution analysis as fallback."
-        }
-def analyze_data(
-    file: gr.File,
-    query: str,
-    api_key: str,
-    temperature: float = 0.7,
-) -> str:
-    """Process user request and generate analysis"""
-    if not api_key:
-        return "Error: Please provide an API key."
-    if not file:
-        return "Error: Please upload a file."
-    try:
-        # Set up environment
-        os.environ["OPENAI_API_KEY"] = api_key
-        # Load data
-        if file.name.endswith('.csv'):
-            df = pd.read_csv(file.name)
-        elif file.name.endswith(('.xlsx', '.xls')):
-            df = pd.read_excel(file.name)
         else:
-            return "Error: Unsupported file type."
-        # Initialize analyzers
-        analyzer = DataAnalyzer()
-        # Build context
-        file_info = f"""
-        File: {file.name}
-        Shape: {df.shape}
-        Columns: {', '.join(df.columns)}
-        Column Types:
-        {chr(10).join([f'- {col}: {dtype}' for col, dtype in df.dtypes.items()])}
-        """
-        # Get analysis request from GPT-4
-        messages = [
-            {
-                "role": "system",
-                "content": """You are a data analysis assistant.
-                Interpret the user's query and provide analysis details in JSON format.
-                Return ONLY a JSON object with these fields:
-                {
-                    "analysis_type": "distribution" or "forecast" or "correlation",
-                    "params": {"column": "column_name", ...},
-                    "explanation": "why this analysis is appropriate"
-                }
-                For timeseries data, prefer 'forecast' type.
-                For multiple columns, prefer 'correlation' type.
-                For single column analysis, prefer 'distribution' type.
-                """
-            },
-            {
-                "role": "user",
-                "content": f"{file_info}\n\nUser request: {query}"
-            }
-        ]
-        response = completion(
-            model="gpt-4o-mini",
-            messages=messages,
-            temperature=temperature
-        )
-        # Parse response and perform analysis
-        analysis_request = parse_gpt_response(response.choices[0].message.content)
-        # Set default column if not specified
-        if "params" not in analysis_request:
-            analysis_request["params"] = {}
-        if "column" not in analysis_request["params"]:
-            analysis_request["params"]["column"] = df.select_dtypes(include=['number']).columns[0]
-        result = analyzer.analyze_data(
-            df,
-            analysis_request["analysis_type"],
-            analysis_request["params"]
-        )
-        # Combine results into HTML
-        html_output = f"""
-        <div class="analysis-container">
-            <div class="explanation">
-                <h2>Analysis Explanation</h2>
-                <p>{analysis_request['explanation']}</p>
-            </div>
-            <div class="results">
-                <h2>Statistical Results</h2>
-                <pre>{str(result.get('statistics', ''))}</pre>
-            </div>
-            <div class="visualization">
-                <h2>Interactive Visualization</h2>
-                {result['visualization']}
-            </div>
-        </div>
         """
-        return html_output
-    except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        return f"Error occurred: {str(e)}\n\nDetails:\n{error_details}"
 def create_interface():
-    """Create Gradio interface"""
-    with gr.Blocks(title="Interactive Data Analysis") as interface:
         gr.Markdown("""
         # Interactive Data Analysis Assistant
-        Upload your data and get interactive visualizations with statistical analysis.
-        **Features:**
-        - Interactive D3 visualizations
-        - Statistical analysis
-        - Probability distributions
-        - Time series forecasting
-        - Correlation analysis
-        **Note**: Requires your own OpenAI API key.
         """)
         with gr.Row():
@@ -170,43 +279,37 @@ def create_interface():
                     label="Upload Data File",
                     file_types=[".csv", ".xlsx", ".xls"]
                 )
-                query = gr.Textbox(
-                    label="What would you like to analyze?",
-                    placeholder="e.g., Show distribution of values with statistics",
-                    lines=3
-                )
                 api_key = gr.Textbox(
-                    label="API Key (Required)",
-                    placeholder="Your OpenAI API key",
                     type="password"
                 )
-                temperature = gr.Slider(
-                    label="Temperature",
-                    minimum=0.0,
-                    maximum=1.0,
-                    value=0.7,
-                    step=0.1
                 )
                 analyze_btn = gr.Button("Analyze")
             with gr.Column():
-                output = gr.HTML(label="Output")
         analyze_btn.click(
-            analyze_data,
-            inputs=[file, query, api_key, temperature],
-            outputs=output
         )
         gr.Examples(
             examples=[
-                [None, "Show me the distribution of values and calculate statistics"],
-                [None, "Create a 10-period probability cone forecast"],
-                [None, "Analyze correlations between variables"],
-                [None, "Test if the data follows a normal distribution"],
-                [None, "Show the data distribution with confidence intervals"],
             ],
-            inputs=[file, query]
         )
     return interface

+import base64
+import io
 import os
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional, Union
 import json
 import gradio as gr
+import numpy as np
 import pandas as pd
+from bokeh.plotting import figure
+from bokeh.layouts import column, row, layout
+from bokeh.models import ColumnDataSource, HoverTool, BoxSelectTool, WheelZoomTool, ResetTool
+from bokeh.embed import components
+from bokeh.resources import CDN
 from litellm import completion
+class VisualizationEngine:
+    """Engine for creating interactive Bokeh visualizations"""
+    def __init__(self):
+        self.width = 600
+        self.height = 400
+        self.tools = "pan,box_zoom,wheel_zoom,reset,save,hover"
+    def create_scatter(self, df: pd.DataFrame, x_col: str, y_col: str,
+                      color_col: Optional[str] = None, title: str = "") -> str:
+        """Create an interactive scatter plot"""
+        source = ColumnDataSource(df)
+        p = figure(width=self.width, height=self.height, title=title, tools=self.tools)
+        if color_col and color_col in df.columns:
+            colors = df[color_col].astype('category').cat.codes
+            scatter = p.scatter(x_col, y_col, source=source, color={'field': color_col, 'transform': 'category10'})
         else:
+            scatter = p.scatter(x_col, y_col, source=source)
+        p.xaxis.axis_label = x_col
+        p.yaxis.axis_label = y_col
+        hover = p.select(dict(type=HoverTool))
+        hover.tooltips = [(col, f"@{col}") for col in [x_col, y_col] + ([color_col] if color_col else [])]
+        script, div = components(p)
+        return f"{CDN.render()}\n{div}\n{script}"
+    def create_line(self, df: pd.DataFrame, x_col: str, y_cols: List[str], title: str = "") -> str:
+        """Create an interactive line plot"""
+        source = ColumnDataSource(df)
+        p = figure(width=self.width, height=self.height, title=title, tools=self.tools)
+        for y_col in y_cols:
+            p.line(x_col, y_col, line_width=2, source=source, legend_label=y_col)
+        p.xaxis.axis_label = x_col
+        p.yaxis.axis_label = "Values"
+        p.legend.click_policy = "hide"
+        hover = p.select(dict(type=HoverTool))
+        hover.tooltips = [(col, f"@{col}") for col in [x_col] + y_cols]
+        script, div = components(p)
+        return f"{CDN.render()}\n{div}\n{script}"
+    def create_bar(self, df: pd.DataFrame, x_col: str, y_col: str, title: str = "") -> str:
+        """Create an interactive bar plot"""
+        source = ColumnDataSource(df)
+        p = figure(width=self.width, height=self.height, title=title,
+                  tools=self.tools, x_range=df[x_col].unique().tolist())
+        p.vbar(x=x_col, top=y_col, width=0.9, source=source)
+        p.xaxis.axis_label = x_col
+        p.yaxis.axis_label = y_col
+        p.xgrid.grid_line_color = None
+        hover = p.select(dict(type=HoverTool))
+        hover.tooltips = [(x_col, f"@{x_col}"), (y_col, f"@{y_col}")]
+        script, div = components(p)
+        return f"{CDN.render()}\n{div}\n{script}"
+class AnalysisSession:
+    """Maintains state and history for the analysis session"""
+    def __init__(self):
+        self.data: Optional[pd.DataFrame] = None
+        self.chat_history: List[Dict[str, str]] = []
+        self.viz_engine = VisualizationEngine()
+    def add_message(self, role: str, content: str):
+        """Add a message to the chat history"""
+        self.chat_history.append({"role": role, "content": content})
+    def get_context(self) -> str:
+        """Get the current analysis context"""
+        if self.data is None:
+            return "No data loaded yet."
+        context = f"""
+        Current DataFrame Info:
+        - Shape: {self.data.shape}
+        - Columns: {', '.join(self.data.columns)}
+        - Numeric columns: {', '.join(self.data.select_dtypes(include=[np.number]).columns)}
+        - Categorical columns: {', '.join(self.data.select_dtypes(include=['object', 'category']).columns)}
         """
+        return context
+class AnalysisAgent:
+    """Enhanced agent with interactive visualization and chat capabilities"""
+    def __init__(
+        self,
+        model_id: str = "gpt-4o-mini",
+        temperature: float = 0.7,
+    ):
+        self.model_id = model_id
+        self.temperature = temperature
+        self.session = AnalysisSession()
+    def process_query(self, query: str) -> str:
+        """Process a user query and generate response with visualizations"""
+        context = self.session.get_context()
+        messages = [
+            {"role": "system", "content": self._get_system_prompt()},
+            *self.session.chat_history[-5:],  # Include last 5 messages for context
+            {"role": "user", "content": f"{context}\n\nUser query: {query}"}
+        ]
+        try:
+            response = completion(
+                model=self.model_id,
+                messages=messages,
+                temperature=self.temperature,
+            )
+            analysis = response.choices[0].message.content
+            # Extract and execute any code blocks
+            visualizations = []
+            code_blocks = self._extract_code(analysis)
+            for code in code_blocks:
+                try:
+                    # Execute code and capture visualization commands
+                    result = self._execute_visualization(code)
+                    if result:
+                        visualizations.append(result)
+                except Exception as e:
+                    visualizations.append(f"Error creating visualization: {str(e)}")
+            # Add messages to chat history
+            self.session.add_message("user", query)
+            self.session.add_message("assistant", analysis)
+            # Combine analysis and visualizations
+            return analysis + "\n\n" + "\n".join(visualizations)
+        except Exception as e:
+            return f"Error: {str(e)}"
+    def _execute_visualization(self, code: str) -> Optional[str]:
+        """Execute visualization code and return HTML output"""
+        try:
+            # Create a safe namespace with necessary libraries
+            namespace = {
+                'df': self.session.data,
+                'np': np,
+                'pd': pd,
+                'viz': self.session.viz_engine
+            }
+            # Execute the code
+            exec(code, namespace)
+            # Look for visualization result
+            for var in namespace.values():
+                if isinstance(var, str) and ('<script' in var or '<div' in var):
+                    return var
+            return None
+        except Exception as e:
+            return f"Error executing visualization: {str(e)}"
+    def _get_system_prompt(self) -> str:
+        """Get system prompt with visualization capabilities"""
+        return """You are a data analysis assistant with interactive visualization capabilities.
+Available visualizations:
+1. Scatter plots (viz.create_scatter)
+2. Line plots (viz.create_line)
+3. Bar plots (viz.create_bar)
+The following variables are available:
+- df: pandas DataFrame with the current data
+- viz: visualization engine with plotting methods
+- np: numpy library
+- pd: pandas library
+When analyzing data:
+1. First understand and explain the data
+2. Create relevant visualizations using the viz engine
+3. Provide insights based on the visualizations
+4. Ask follow-up questions when appropriate
+5. Use markdown for formatting
+Example visualization code:
+```python
+# Create scatter plot
+html = viz.create_scatter(df, 'column1', 'column2', title='Analysis')
+print(html)
+# Create line plot
+html = viz.create_line(df, 'date_column', ['value1', 'value2'], title='Trends')
+print(html)
+```
+"""
+    @staticmethod
+    def _extract_code(text: str) -> List[str]:
+        """Extract Python code blocks from markdown"""
+        import re
+        pattern = r'```python\n(.*?)```'
+        return re.findall(pattern, text, re.DOTALL)
 def create_interface():
+    """Create Gradio interface with chat capabilities"""
+    agent = AnalysisAgent()
+    def process_file(file: gr.File) -> str:
+        """Process uploaded file and initialize session"""
+        try:
+            if file.name.endswith('.csv'):
+                agent.session.data = pd.read_csv(file.name)
+            elif file.name.endswith(('.xlsx', '.xls')):
+                agent.session.data = pd.read_excel(file.name)
+            else:
+                return "Error: Unsupported file type"
+            return f"Successfully loaded data: {agent.session.get_context()}"
+        except Exception as e:
+            return f"Error loading file: {str(e)}"
+    def analyze(file: gr.File, query: str, api_key: str) -> str:
+        """Process analysis query"""
+        if not api_key:
+            return "Error: Please provide an API key."
+        if not file:
+            return "Error: Please upload a file."
+        try:
+            os.environ["OPENAI_API_KEY"] = api_key
+            return agent.process_query(query)
+        except Exception as e:
+            return f"Error: {str(e)}"
+    with gr.Blocks(title="Interactive Data Analysis Assistant") as interface:
         gr.Markdown("""
         # Interactive Data Analysis Assistant
+        Upload your data file and chat with the AI to analyze it. Features:
+        - Interactive visualizations
+        - Natural language analysis
+        - Follow-up questions
+        - Statistical insights
+        **Note**: Requires OpenAI API key
         """)
         with gr.Row():
                     label="Upload Data File",
                     file_types=[".csv", ".xlsx", ".xls"]
                 )
                 api_key = gr.Textbox(
+                    label="API Key",
                     type="password"
                 )
+                chat_input = gr.Textbox(
+                    label="Ask about your data",
+                    placeholder="e.g., Show me the relationship between variables",
+                    lines=3
                 )
                 analyze_btn = gr.Button("Analyze")
             with gr.Column():
+                chat_output = gr.HTML(label="Analysis & Visualizations")
+        # Set up event handlers
+        file.change(process_file, inputs=[file], outputs=[chat_output])
         analyze_btn.click(
+            analyze,
+            inputs=[file, chat_input, api_key],
+            outputs=[chat_output]
         )
+        # Example queries
         gr.Examples(
             examples=[
+                [None, "Show me the distribution of numerical variables"],
+                [None, "Create a correlation analysis with interactive visualizations"],
+                [None, "What are the main trends in the data?"],
+                [None, "Can you identify any interesting patterns?"],
             ],
+            inputs=[file, chat_input]
         )
     return interface