Spaces:

jzou19950715
/

Huggingface_AI_Agent_Function_Testing_2

Configuration error

App Files Files Community

jzou19950715 commited on Jan 24, 2025

Commit

47e9852

verified ·

1 Parent(s): 28c222c

Update app.py

Browse files

Files changed (1) hide show

app.py +245 -329

app.py CHANGED Viewed

@@ -1,345 +1,261 @@
 """
-Advanced Data Analysis Assistant with Interactive Visualizations
-Integrates smolagents, GPT-4, and interactive Plotly visualizations.
 """
-import json
-import os
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Union, Tuple
 import gradio as gr
-import pandas as pd
-from smolagents import CodeAgent, LiteLLMModel
-# Import our custom tools
-from tools import (
-    create_time_series_plot,
-    create_correlation_heatmap,
-    create_statistical_summary,
-    detect_outliers,
-    validate_dataframe,
-    get_numeric_columns,
-    get_temporal_columns,
-    AnalysisError
-)
 # Constants
-SUPPORTED_FILE_TYPES = [".csv", ".xlsx", ".xls"]
-DEFAULT_MODEL = "gpt-4o-mini"
-HISTORY_FILE = "analysis_history.json"
-@dataclass
-class VisualizationConfig:
-    """Configuration for visualizations."""
-    width: int = 800
-    height: int = 500
-    template: str = "plotly_white"
-    show_grid: bool = True
-    interactive: bool = True
-class DataPreprocessor:
-    """Handles data preprocessing and validation."""
-    @staticmethod
-    def preprocess_dataframe(df: pd.DataFrame) -> Tuple[pd.DataFrame, Dict[str, Any]]:
-        """Preprocess the dataframe and return metadata."""
-        # First validate the dataframe
-        is_valid, error_msg = validate_dataframe(df)
-        if not is_valid:
-            raise ValueError(error_msg)
-        metadata = {
-            "original_shape": df.shape,
-            "missing_values": df.isnull().sum().to_dict(),
-            "dtypes": df.dtypes.astype(str).to_dict(),
-            "numeric_columns": get_numeric_columns(df),
-            "categorical_columns": df.select_dtypes(include=['object']).columns.tolist(),
-            "temporal_columns": get_temporal_columns(df)
-        }
-        # Handle missing values
-        df = df.fillna(method='ffill').fillna(method='bfill')
-        return df, metadata
-class DataAnalysisAssistant:
-    """Enhanced data analysis assistant with visualization capabilities."""
-    def __init__(self, api_key: str):
-        self.model = LiteLLMModel(
-            model_id=DEFAULT_MODEL,
-            api_key=api_key
-        )
-        self.history = AnalysisHistory()
-        # Initialize agent with tools and our custom analysis tools
-        self.agent = CodeAgent(
-            model=self.model,
-            tools=[
-                create_time_series_plot,
-                create_correlation_heatmap,
-                create_statistical_summary,
-                detect_outliers
-            ],
-            additional_authorized_imports=[
-                'pandas', 'numpy', 'plotly.express', 'plotly.graph_objects',
-                'seaborn', 'scipy', 'statsmodels'
-            ],
-        )
-    def analyze(self, df: pd.DataFrame, query: str) -> str:
-        """Perform analysis with interactive visualizations."""
-        try:
-            # Preprocess data
-            df, metadata = DataPreprocessor.preprocess_dataframe(df)
-            # Create context for the agent
-            context = self._create_analysis_context(df, metadata, query)
-            # Get analysis plan and execute
-            response = self.agent.run(context, additional_args={"df": df})
-            # Save to history
-            self.history.add_entry(query, str(response))
-            return self._format_results(response)
-        except Exception as e:
-            return f"Analysis failed: {str(e)}"
-    def _create_analysis_context(self, df: pd.DataFrame, metadata: Dict, query: str) -> str:
-        """Create detailed context for analysis."""
-        tools_description = """
-        Available analysis tools:
-        - create_time_series_plot: Create interactive time series visualizations
-        - create_correlation_heatmap: Generate correlation analysis with heatmap
-        - create_statistical_summary: Compute statistical summaries with visualizations
-        - detect_outliers: Identify and visualize outliers
-        """
-        return f"""
-        Analyze the following data with interactive visualizations.
-        DataFrame Information:
-        - Shape: {metadata['original_shape']}
-        - Numeric columns: {', '.join(metadata['numeric_columns'])}
-        - Categorical columns: {', '.join(metadata['categorical_columns'])}
-        - Temporal columns: {', '.join(metadata['temporal_columns'])}
-        {tools_description}
-        User Query: {query}
-        Guidelines:
-        1. Use the provided analysis tools for visualizations
-        2. Include clear titles and labels
-        3. Handle errors gracefully
-        4. Chain multiple analyses when needed
-        5. Provide insights along with visualizations
-        The DataFrame is available as 'df'.
-        """
-    def _format_results(self, response: str) -> str:
-        """Format analysis results with visualizations."""
-        return f'<div class="analysis-text">{response}</div>'
-class AnalysisHistory:
     """
-    Manages analysis history and persistence.
-    Attributes:
-        history_file (Path): Path to the JSON file storing analysis history
-        history (List[Dict]): List of historical analysis entries
-    Each history entry is a dictionary containing:
-        - timestamp: ISO format timestamp
-        - query: The user's analysis query
-        - result: The analysis result/response
     """
-    def __init__(self, history_file: str = HISTORY_FILE):
-        """
-        Initialize the analysis history manager.
-        Args:
-            history_file (str): Path to history JSON file. Defaults to HISTORY_FILE.
-        """
-        self.history_file = Path(history_file)
-        self.history = self._load_history()
-    def _load_history(self) -> List[Dict]:
-        """
-        Load analysis history from file.
-        Returns:
-            List[Dict]: List of historical analysis entries
-        """
-        if self.history_file.exists():
-            try:
-                with self.history_file.open('r') as f:
-                    return json.load(f)
-            except json.JSONDecodeError:
-                logger.error(f"Invalid JSON in history file: {self.history_file}")
-                return []
-            except Exception as e:
-                logger.error(f"Error loading history file: {str(e)}")
-                return []
-        return []
-    def add_entry(self, query: str, result: str) -> None:
-        """
-        Add a new analysis entry to history.
-        Args:
-            query (str): The analysis query
-            result (str): The analysis result/response
-        """
-        entry = {
-            'timestamp': datetime.now().isoformat(),
-            'query': query,
-            'result': result
-        }
-        self.history.append(entry)
-        self._save_history()
-    def get_recent_analyses(self, limit: int = 5) -> List[Dict]:
-        """
-        Get most recent analysis entries.
-        Args:
-            limit (int): Maximum number of entries to return. Defaults to 5.
-        Returns:
-            List[Dict]: Recent analysis entries, sorted by timestamp (newest first)
-        """
-        return sorted(
-            self.history,
-            key=lambda x: x['timestamp'],
-            reverse=True
-        )[:limit]
-    def _save_history(self) -> None:
-        """Save history to file."""
         try:
-            with self.history_file.open('w') as f:
-                json.dump(self.history, f, indent=2)
         except Exception as e:
-            logger.error(f"Failed to save history: {str(e)}")
-    def clear_history(self) -> None:
-        """Clear all analysis history."""
-        self.history = []
-        self._save_history()
-    def get_history_by_date(self, start_date: datetime, end_date: datetime) -> List[Dict]:
-        """
-        Get analysis history within a date range.
-        Args:
-            start_date (datetime): Start of date range
-            end_date (datetime): End of date range
-        Returns:
-            List[Dict]: Analysis entries within the specified date range
-        """
-        filtered_history = []
-        for entry in self.history:
-            try:
-                entry_date = datetime.fromisoformat(entry['timestamp'])
-                if start_date <= entry_date <= end_date:
-                    filtered_history.append(entry)
-            except Exception as e:
-                logger.error(f"Error parsing entry date: {str(e)}")
-                continue
-        return filtered_history
-    def search_history(self, search_term: str) -> List[Dict]:
-        """
-        Search analysis history for a specific term.
-        Args:
-            search_term (str): Term to search for in queries and results
-        Returns:
-            List[Dict]: Matching analysis entries
-        """
-        search_term = search_term.lower()
-        return [
-            entry for entry in self.history
-            if search_term in entry['query'].lower()
-            or search_term in entry['result'].lower()
-        ]
-    def get_statistics(self) -> Dict[str, Any]:
-        """
-        Get statistics about the analysis history.
-        Returns:
-            Dict[str, Any]: Statistics including total entries, date range, etc.
-        """
-        if not self.history:
-            return {
-                "total_entries": 0,
-                "date_range": None,
-                "average_entries_per_day": 0
             }
-        dates = [datetime.fromisoformat(entry['timestamp']) for entry in self.history]
-        first_date = min(dates)
-        last_date = max(dates)
-        days_span = (last_date - first_date).days or 1
-        return {
-            "total_entries": len(self.history),
-            "date_range": {
-                "first": first_date.isoformat(),
-                "last": last_date.isoformat()
-            },
-            "average_entries_per_day": len(self.history) / days_span
-        }
-    def export_history(self, format: str = 'json') -> str:
-        """
-        Export analysis history in specified format.
-        Args:
-            format (str): Export format ('json' or 'csv'). Defaults to 'json'.
-        Returns:
-            str: Path to exported file
-        Raises:
-            ValueError: If format is not supported
-        """
-        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
-        if format == 'json':
-            export_path = f'analysis_history_{timestamp}.json'
-            with open(export_path, 'w') as f:
-                json.dump(self.history, f, indent=2)
-            return export_path
-        elif format == 'csv':
-            export_path = f'analysis_history_{timestamp}.csv'
-            df = pd.DataFrame(self.history)
-            df.to_csv(export_path, index=False)
-            return export_path
-        else:
-            raise ValueError(f"Unsupported export format: {format}")
-def process_file(file: gr.File) -> Optional[pd.DataFrame]:
-    """Process uploaded file into DataFrame."""
-    [Previous process_file implementation remains the same]
-def analyze_data(file: gr.File, query: str, api_key: str) -> str:
-    """Main analysis function for Gradio interface."""
-    [Previous analyze_data implementation remains the same]
 def create_interface():
-    """Create enhanced Gradio interface."""
-    [Previous create_interface implementation remains the same]
 if __name__ == "__main__":
-    interface = create_interface()
-    interface.launch()

 """
+Gradio Interactive Chat App for Educational Information Collection.
 """
+# Imports
 import gradio as gr
+import openai
+import json
+from datetime import datetime
+from typing import List, Optional, Dict
+from pydantic import BaseModel, Field
 # Constants
+SYSTEM_PROMPT = """You are an educational information collection assistant. Your task is to systematically collect the following information in a conversational manner:
+Required Information (collect in this order):
+1. Institution Details:
+   - Name
+   - Type (e.g., university, college, etc.)
+   - Location
+2. Degree Information:
+   - Type (e.g., Bachelor's, Master's, etc.)
+   - Field of Study
+   - Status (e.g., completed, ongoing)
+3. Attendance Dates (start and end)
+4. Academic Performance:
+   - GPA (if provided)
+   - Honors or awards
+5. Activities:
+   - Extracurricular activities, roles, and durations
+Always maintain a friendly, professional tone while systematically collecting this information."""
+# Data Models
+class Institution(BaseModel):
+    name: str
+    type: str
+    location: str
+class Degree(BaseModel):
+    type: str
+    field: str
+    status: str
+class Dates(BaseModel):
+    start: str
+    end: str
+class Activity(BaseModel):
+    name: str
+    description: str
+    duration: str
+class Academic(BaseModel):
+    gpa: Optional[float] = None
+    honors: List[str] = Field(default_factory=list)
+    achievements: List[str] = Field(default_factory=list)
+class Education(BaseModel):
+    institution: Institution
+    degree: Degree
+    dates: Dates
+    academic: Academic
+    activities: List[Activity] = Field(default_factory=list)
+class EducationRecord(BaseModel):
+    education: List[Education] = Field(default_factory=list)
+    metadata: Dict[str, str]
+# Assistant Logic
+class EducationAssistant:
     """
+    Handles conversation state, chat interactions, and JSON generation.
     """
+    def __init__(self):
+        self.conversation_history = []
+        self.client = None
+        self.system_prompt = SYSTEM_PROMPT
+    def initialize_chat(self, api_key: str) -> str:
+        """Initializes OpenAI client and provides the first prompt."""
+        try:
+            openai.api_key = api_key
+            return "Hello! Let's record your educational history. What is the name of your most recent educational institution?"
+        except Exception as e:
+            return f"Error initializing chat: {str(e)}"
+    def chat(self, message: str, api_key: str) -> Dict[str, str]:
+        """Processes user messages and generates responses."""
+        if not self.client:
+            first_message = self.initialize_chat(api_key)
+            self.conversation_history.append({"role": "assistant", "content": first_message})
+            return {"role": "assistant", "content": first_message}
         try:
+            # Append user message to history
+            self.conversation_history.append({"role": "user", "content": message})
+            # Generate response
+            response = openai.ChatCompletion.create(
+                model="gpt-4o-mini",
+                messages=[{"role": "system", "content": self.system_prompt}] + self.conversation_history,
+                temperature=0.7,
+                max_tokens=300
+            )
+            # Parse and store assistant response
+            assistant_message = {"role": "assistant", "content": response.choices[0].message.content}
+            self.conversation_history.append(assistant_message)
+            return assistant_message
         except Exception as e:
+            return {"role": "assistant", "content": f"Error: {str(e)}"}
+    def generate_json(self) -> Optional[str]:
+        """Generates structured JSON from the conversation history."""
+        try:
+            json_prompt = """Based on our conversation, generate a structured JSON containing the educational information shared. Format it as follows:
+            {
+                "education": [
+                    {
+                        "institution": {
+                            "name": string,
+                            "type": string,
+                            "location": string
+                        },
+                        "degree": {
+                            "type": string,
+                            "field": string,
+                            "status": string
+                        },
+                        "dates": {
+                            "start": string,
+                            "end": string
+                        },
+                        "academic": {
+                            "gpa": float (if provided),
+                            "honors": [string],
+                            "achievements": [string]
+                        },
+                        "activities": [
+                            {
+                                "name": string,
+                                "description": string,
+                                "duration": string
+                            }
+                        ]
+                    }
+                ],
+                "metadata": {
+                    "timestamp": string,
+                    "source": "Education Information Assistant"
+                }
             }
+            Respond ONLY with the JSON."""
+            # Generate JSON based on the conversation history
+            response = openai.ChatCompletion.create(
+                model="gpt-4o-mini",
+                messages=[{"role": "system", "content": self.system_prompt}] + self.conversation_history +
+                         [{"role": "user", "content": json_prompt}],
+                temperature=0.1,
+                max_tokens=1500
+            )
+            # Parse response and write JSON file
+            json_data = json.loads(response.choices[0].message.content)
+            filename = f"education_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+            with open(filename, "w") as f:
+                json.dump(json_data, f, indent=2)
+            return filename
+        except Exception as e:
+            print(f"Error generating JSON: {str(e)}")
+            return None
+# Gradio Interface
 def create_interface():
+    assistant = EducationAssistant()
+    with gr.Blocks() as demo:
+        gr.Markdown("# 📘 Educational Information Collection Assistant")
+        with gr.Row():
+            api_key = gr.Textbox(
+                label="OpenAI API Key",
+                type="password",
+                placeholder="Enter your OpenAI API Key",
+                info="Required for using OpenAI's GPT model."
+            )
+        chatbot = gr.Chatbot(label="Assistant", height=400)
+        with gr.Row():
+            user_input = gr.Textbox(
+                label="Your Message",
+                placeholder="Type your message here...",
+                lines=2
+            )
+            send_button = gr.Button("Send", variant="primary")
+        generate_button = gr.Button("Generate JSON")
+        download_file = gr.File(label="Generated JSON")
+        # Event Handlers
+        def handle_send(message, history, api_key):
+            if not api_key.strip():
+                return history + [{"role": "assistant", "content": "Please provide your OpenAI API key to continue."}]
+            if not message.strip():
+                return history
+            response = assistant.chat(message, api_key)
+            return history + [{"role": "user", "content": message}, response]
+        def handle_generate():
+            filename = assistant.generate_json()
+            if filename:
+                return filename
+            return "Error generating JSON. Please ensure all required information is collected."
+        # Button Actions
+        send_button.click(
+            handle_send,
+            inputs=[user_input, chatbot, api_key],
+            outputs=[chatbot]
+        )
+        user_input.submit(
+            handle_send,
+            inputs=[user_input, chatbot, api_key],
+            outputs=[chatbot]
+        )
+        generate_button.click(
+            handle_generate,
+            outputs=[download_file]
+        )
+        return demo
+# Main Execution
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True
+    )