LuisZermeno_Final_Assignment_Template

Runtime error

App Files Files Community

LuisZermeno commited on May 25, 2025

Commit

f64ef80

verified ·

1 Parent(s): a6d07ff

Update tools.py

Browse files

Files changed (1) hide show

tools.py +124 -29

tools.py CHANGED Viewed

@@ -3,7 +3,7 @@ import re
 import json
 import base64
 import requests
-import wikipedia
 import numpy as np
 import pandas as pd
 from typing import Dict, Any, List, Optional, Union
@@ -22,6 +22,9 @@ import logging
 logger = logging.getLogger(__name__)
 # Tool implementations
 def web_search_tool(query: str, num_results: int = 5) -> str:
@@ -50,26 +53,41 @@ def web_search_tool(query: str, num_results: int = 5) -> str:
 def wikipedia_tool(query: str) -> str:
     """Search and get content from Wikipedia"""
     try:
-        # Try direct page first
-        try:
-            page = wikipedia.page(query)
-            return f"Title: {page.title}\n\nSummary: {page.summary[:1000]}...\n\nURL: {page.url}"
-        except wikipedia.exceptions.DisambiguationError as e:
-            # If ambiguous, try first option
-            if e.options:
-                page = wikipedia.page(e.options[0])
-                return f"Title: {page.title}\n\nSummary: {page.summary[:1000]}...\n\nURL: {page.url}"
-        except wikipedia.exceptions.PageError:
-            # If page not found, search
-            search_results = wikipedia.search(query, results=5)
-            if search_results:
-                page = wikipedia.page(search_results[0])
-                return f"Title: {page.title}\n\nSummary: {page.summary[:1000]}...\n\nURL: {page.url}"
         return "No Wikipedia results found."
     except Exception as e:
         logger.error(f"Wikipedia error: {str(e)}")
-        return f"Wikipedia search failed: {str(e)}"
 def calculator_tool(expression: str) -> str:
     """Evaluate mathematical expressions safely"""
@@ -87,9 +105,9 @@ def calculator_tool(expression: str) -> str:
         node = ast.parse(expression, mode='eval')
         # Safety check
-        for node in ast.walk(node):
-            if isinstance(node, ast.Name) and node.id not in allowed_names:
-                raise ValueError(f"Unsafe operation: {node.id}")
         result = eval(compile(ast.parse(expression, mode='eval'), '<string>', 'eval'),
                      {"__builtins__": {}}, allowed_names)
@@ -135,12 +153,21 @@ def python_repl_tool(code: str) -> str:
 def image_analysis_tool(image_path: str, query: str = "") -> str:
     """Analyze images using OCR and basic computer vision"""
     try:
         if image_path.startswith('data:'):
-            # Handle base64 encoded images
             header, encoded = image_path.split(',', 1)
             data = base64.b64decode(encoded)
             image = Image.open(io.BytesIO(data))
         else:
             image = Image.open(image_path)
         # Perform OCR
@@ -173,6 +200,18 @@ def image_analysis_tool(image_path: str, query: str = "") -> str:
 def file_reader_tool(file_path: str, query: str = "") -> str:
     """Read and analyze various file types"""
     try:
         file_ext = os.path.splitext(file_path)[1].lower()
         if file_ext in ['.txt', '.md', '.py', '.json', '.xml', '.html']:
@@ -181,11 +220,37 @@ def file_reader_tool(file_path: str, query: str = "") -> str:
             return f"File content:\n{content[:2000]}{'...' if len(content) > 2000 else ''}"
         elif file_ext in ['.csv']:
-            df = pd.read_csv(file_path)
             info = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n"
             info += f"Columns: {', '.join(df.columns)}\n\n"
             info += f"First 5 rows:\n{df.head().to_string()}\n\n"
             info += f"Data types:\n{df.dtypes.to_string()}"
             return info
         elif file_ext in ['.xlsx', '.xls']:
@@ -213,6 +278,14 @@ def audio_analysis_tool(audio_path: str) -> str:
     try:
         recognizer = sr.Recognizer()
         # Convert to WAV if needed
         if not audio_path.endswith('.wav'):
             audio = AudioSegment.from_file(audio_path)
@@ -235,7 +308,7 @@ def audio_analysis_tool(audio_path: str) -> str:
             result = f"Speech recognition error: {str(e)}"
         # Clean up temp file
-        if wav_path != audio_path:
             os.unlink(wav_path)
         return result
@@ -247,6 +320,14 @@ def audio_analysis_tool(audio_path: str) -> str:
 def data_analysis_tool(file_path: str, operation: str, **kwargs) -> str:
     """Perform data analysis operations on CSV/Excel files"""
     try:
         # Load data
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
@@ -256,22 +337,29 @@ def data_analysis_tool(file_path: str, operation: str, **kwargs) -> str:
         # Perform requested operation
         if operation == "sum":
             column = kwargs.get('column')
-            if column:
                 result = df[column].sum()
                 return f"Sum of {column}: {result}"
         elif operation == "mean":
             column = kwargs.get('column')
-            if column:
                 result = df[column].mean()
                 return f"Mean of {column}: {result}"
         elif operation == "count":
             column = kwargs.get('column')
             value = kwargs.get('value')
-            if column and value:
-                result = len(df[df[column] == value])
-                return f"Count of {column}={value}: {result}"
         elif operation == "groupby":
             group_column = kwargs.get('group_column')
@@ -280,16 +368,23 @@ def data_analysis_tool(file_path: str, operation: str, **kwargs) -> str:
             if group_column and agg_column:
                 result = df.groupby(group_column)[agg_column].agg(agg_func)
                 return f"Grouped results:\n{result.to_string()}"
         elif operation == "filter":
             condition = kwargs.get('condition')
             if condition:
                 filtered_df = df.query(condition)
                 return f"Filtered data ({len(filtered_df)} rows):\n{filtered_df.head().to_string()}"
         elif operation == "describe":
             return f"Data description:\n{df.describe().to_string()}"
         return "Operation not recognized or missing parameters."
     except Exception as e:
@@ -374,7 +469,7 @@ tool_schemas = {
             "type": "object",
             "properties": {
                 "file_path": {"type": "string", "description": "Path to data file"},
-                "operation": {"type": "string", "description": "Operation: sum, mean, count, groupby, filter, describe"},
                 "kwargs": {"type": "object", "description": "Additional parameters for the operation"}
             },
             "required": ["file_path", "operation"]

 import json
 import base64
 import requests
+import wikipediaapi
 import numpy as np
 import pandas as pd
 from typing import Dict, Any, List, Optional, Union
 logger = logging.getLogger(__name__)
+# Initialize Wikipedia API
+wiki_wiki = wikipediaapi.Wikipedia('GAIA-Agent/1.0', 'en')
 # Tool implementations
 def web_search_tool(query: str, num_results: int = 5) -> str:
 def wikipedia_tool(query: str) -> str:
     """Search and get content from Wikipedia"""
     try:
+        # Try to get page directly
+        page = wiki_wiki.page(query)
+        if page.exists():
+            # Get summary (first 1000 characters)
+            summary = page.summary[:1000] if len(page.summary) > 1000 else page.summary
+            return f"Title: {page.title}\n\nSummary: {summary}...\n\nURL: {page.fullurl}"
+        else:
+            # Search for pages
+            from duckduckgo_search import DDGS
+            ddgs = DDGS()
+            search_query = f"site:wikipedia.org {query}"
+            results = list(ddgs.text(search_query, max_results=3))
+            if results:
+                # Try to extract Wikipedia page title from first result
+                first_result = results[0]
+                if 'wikipedia.org/wiki/' in first_result['link']:
+                    page_title = first_result['link'].split('/wiki/')[-1].replace('_', ' ')
+                    page = wiki_wiki.page(page_title)
+                    if page.exists():
+                        summary = page.summary[:1000] if len(page.summary) > 1000 else page.summary
+                        return f"Title: {page.title}\n\nSummary: {summary}...\n\nURL: {page.fullurl}"
+                # Return search results if can't get page
+                formatted_results = []
+                for result in results:
+                    formatted_results.append(f"- {result['title']}: {result['body'][:200]}...")
+                return "Wikipedia search results:\n" + "\n".join(formatted_results)
         return "No Wikipedia results found."
     except Exception as e:
         logger.error(f"Wikipedia error: {str(e)}")
+        # Fallback to web search
+        return web_search_tool(f"site:wikipedia.org {query}", num_results=3)
 def calculator_tool(expression: str) -> str:
     """Evaluate mathematical expressions safely"""
         node = ast.parse(expression, mode='eval')
         # Safety check
+        for n in ast.walk(node):
+            if isinstance(n, ast.Name) and n.id not in allowed_names:
+                raise ValueError(f"Unsafe operation: {n.id}")
         result = eval(compile(ast.parse(expression, mode='eval'), '<string>', 'eval'),
                      {"__builtins__": {}}, allowed_names)
 def image_analysis_tool(image_path: str, query: str = "") -> str:
     """Analyze images using OCR and basic computer vision"""
     try:
+        # Handle base64 encoded images
         if image_path.startswith('data:'):
             header, encoded = image_path.split(',', 1)
             data = base64.b64decode(encoded)
             image = Image.open(io.BytesIO(data))
         else:
+            # Check if file exists in uploaded files
+            uploaded_files = json.loads(os.environ.get("UPLOADED_FILES", "[]"))
+            if uploaded_files and not os.path.exists(image_path):
+                # Try to find the file in uploaded files
+                for file_path in uploaded_files:
+                    if os.path.basename(file_path) == os.path.basename(image_path):
+                        image_path = file_path
+                        break
             image = Image.open(image_path)
         # Perform OCR
 def file_reader_tool(file_path: str, query: str = "") -> str:
     """Read and analyze various file types"""
     try:
+        # Check uploaded files
+        uploaded_files = json.loads(os.environ.get("UPLOADED_FILES", "[]"))
+        if uploaded_files and not os.path.exists(file_path):
+            # Try to find the file in uploaded files
+            for uploaded_path in uploaded_files:
+                if os.path.basename(uploaded_path) == os.path.basename(file_path):
+                    file_path = uploaded_path
+                    break
+        if not os.path.exists(file_path):
+            return f"File not found: {file_path}"
         file_ext = os.path.splitext(file_path)[1].lower()
         if file_ext in ['.txt', '.md', '.py', '.json', '.xml', '.html']:
             return f"File content:\n{content[:2000]}{'...' if len(content) > 2000 else ''}"
         elif file_ext in ['.csv']:
+            # Try multiple encodings and delimiters
+            encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
+            delimiters = [',', ';', '\t', '|']
+            df = None
+            for encoding in encodings:
+                for delimiter in delimiters:
+                    try:
+                        df = pd.read_csv(file_path, encoding=encoding, delimiter=delimiter)
+                        if len(df.columns) > 1:  # Successful parse
+                            break
+                    except:
+                        continue
+                if df is not None and len(df.columns) > 1:
+                    break
+            if df is None:
+                return "Failed to parse CSV file with multiple encoding/delimiter attempts"
             info = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n"
             info += f"Columns: {', '.join(df.columns)}\n\n"
             info += f"First 5 rows:\n{df.head().to_string()}\n\n"
             info += f"Data types:\n{df.dtypes.to_string()}"
+            # Check for date columns and analyze if query mentions time
+            if query and any(word in query.lower() for word in ['month', 'year', 'date', 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december']):
+                from search_strategies import DataAnalysisStrategy
+                temporal_result = DataAnalysisStrategy.analyze_for_temporal_data(df, query)
+                if temporal_result is not None:
+                    info += f"\n\nTemporal analysis result:\n{temporal_result.head(10).to_string()}"
             return info
         elif file_ext in ['.xlsx', '.xls']:
     try:
         recognizer = sr.Recognizer()
+        # Check uploaded files
+        uploaded_files = json.loads(os.environ.get("UPLOADED_FILES", "[]"))
+        if uploaded_files and not os.path.exists(audio_path):
+            for uploaded_path in uploaded_files:
+                if os.path.basename(uploaded_path) == os.path.basename(audio_path):
+                    audio_path = uploaded_path
+                    break
         # Convert to WAV if needed
         if not audio_path.endswith('.wav'):
             audio = AudioSegment.from_file(audio_path)
             result = f"Speech recognition error: {str(e)}"
         # Clean up temp file
+        if wav_path != audio_path and os.path.exists(wav_path):
             os.unlink(wav_path)
         return result
 def data_analysis_tool(file_path: str, operation: str, **kwargs) -> str:
     """Perform data analysis operations on CSV/Excel files"""
     try:
+        # Check uploaded files
+        uploaded_files = json.loads(os.environ.get("UPLOADED_FILES", "[]"))
+        if uploaded_files and not os.path.exists(file_path):
+            for uploaded_path in uploaded_files:
+                if os.path.basename(uploaded_path) == os.path.basename(file_path):
+                    file_path = uploaded_path
+                    break
         # Load data
         if file_path.endswith('.csv'):
             df = pd.read_csv(file_path)
         # Perform requested operation
         if operation == "sum":
             column = kwargs.get('column')
+            if column and column in df.columns:
                 result = df[column].sum()
                 return f"Sum of {column}: {result}"
+            return f"Column '{column}' not found"
         elif operation == "mean":
             column = kwargs.get('column')
+            if column and column in df.columns:
                 result = df[column].mean()
                 return f"Mean of {column}: {result}"
+            return f"Column '{column}' not found"
         elif operation == "count":
             column = kwargs.get('column')
             value = kwargs.get('value')
+            if column and column in df.columns:
+                if value:
+                    result = len(df[df[column] == value])
+                    return f"Count of {column}={value}: {result}"
+                else:
+                    result = df[column].value_counts()
+                    return f"Value counts for {column}:\n{result.to_string()}"
+            return f"Column '{column}' not found"
         elif operation == "groupby":
             group_column = kwargs.get('group_column')
             if group_column and agg_column:
                 result = df.groupby(group_column)[agg_column].agg(agg_func)
                 return f"Grouped results:\n{result.to_string()}"
+            return "Missing group_column or agg_column"
         elif operation == "filter":
             condition = kwargs.get('condition')
             if condition:
                 filtered_df = df.query(condition)
                 return f"Filtered data ({len(filtered_df)} rows):\n{filtered_df.head().to_string()}"
+            return "Missing filter condition"
         elif operation == "describe":
             return f"Data description:\n{df.describe().to_string()}"
+        elif operation == "info":
+            buffer = io.StringIO()
+            df.info(buf=buffer)
+            return buffer.getvalue()
         return "Operation not recognized or missing parameters."
     except Exception as e:
             "type": "object",
             "properties": {
                 "file_path": {"type": "string", "description": "Path to data file"},
+                "operation": {"type": "string", "description": "Operation: sum, mean, count, groupby, filter, describe, info"},
                 "kwargs": {"type": "object", "description": "Additional parameters for the operation"}
             },
             "required": ["file_path", "operation"]