Spaces:
Sleeping
Sleeping
| import requests | |
| from langchain.tools import tool | |
| from duckduckgo_search import DDGS | |
| from bs4 import BeautifulSoup | |
| import tempfile | |
| from typing import Optional | |
| import os | |
| from urllib.parse import urlparse | |
| def search(query: str) -> str: | |
| """Searches the internet using DuckDuckGo | |
| Args: | |
| query (str): Search query | |
| Returns: | |
| str: Search results | |
| """ | |
| with DDGS() as ddgs: | |
| results = [r for r in ddgs.text(query, max_results=5)] | |
| return results if results else "No results found." | |
| def process_content(url: str) -> str: | |
| """Process content from a webpage | |
| Args: | |
| url (str): URL to get content | |
| Returns: | |
| str: Content in the webpage | |
| """ | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| return soup.get_text() | |
| def save_file(content: str, filename: Optional[str] = None) -> str: | |
| """ | |
| Save content to a temporary file and return the path. | |
| Useful for processing files from the GAIA API. | |
| Args: | |
| content: The content to save to the file | |
| filename: Optional filename, will generate a random name if not provided | |
| Returns: | |
| Path to the saved file | |
| """ | |
| temp_dir = tempfile.gettempdir() | |
| if filename is None: | |
| temp_file = tempfile.NamedTemporaryFile(delete=False) | |
| filepath = temp_file.name | |
| else: | |
| filepath = os.path.join(temp_dir, filename) | |
| # Write content to the file | |
| with open(filepath, "w") as f: | |
| f.write(content) | |
| return f"File saved to {filepath}. You can read this file to process its contents." | |
| def download_file_from_url(url: str, filename: Optional[str] = None) -> str: | |
| """ | |
| Download a file from a URL and save it to a temporary location. | |
| Args: | |
| url: The URL to download from | |
| filename: Optional filename, will generate one based on URL if not provided | |
| Returns: | |
| Path to the downloaded file | |
| """ | |
| try: | |
| # Parse URL to get filename if not provided | |
| if not filename: | |
| path = urlparse(url).path | |
| filename = os.path.basename(path) | |
| if not filename: | |
| # Generate a random name if we couldn't extract one | |
| import uuid | |
| filename = f"downloaded_{uuid.uuid4().hex[:8]}" | |
| # Create temporary file | |
| temp_dir = tempfile.gettempdir() | |
| filepath = os.path.join(temp_dir, filename) | |
| # Download the file | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| # Save the file | |
| with open(filepath, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| f.write(chunk) | |
| return f"File downloaded to {filepath}. You can now process this file." | |
| except Exception as e: | |
| return f"Error downloading file: {str(e)}" | |
| def extract_text_from_image(image_path: str) -> str: | |
| """ | |
| Extract text from an image using pytesseract (if available). | |
| Args: | |
| image_path: Path to the image file | |
| Returns: | |
| Extracted text or error message | |
| """ | |
| try: | |
| # Try to import pytesseract | |
| import pytesseract | |
| from PIL import Image | |
| # Open the image | |
| image = Image.open(image_path) | |
| # Extract text | |
| text = pytesseract.image_to_string(image) | |
| return f"Extracted text from image:\n\n{text}" | |
| except ImportError: | |
| return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system." | |
| except Exception as e: | |
| return f"Error extracting text from image: {str(e)}" | |
| def analyze_csv_file(file_path: str, query: str) -> str: | |
| """ | |
| Analyze a CSV file using pandas and answer a question about it. | |
| Args: | |
| file_path: Path to the CSV file | |
| query: Question about the data | |
| Returns: | |
| Analysis result or error message | |
| """ | |
| try: | |
| import pandas as pd | |
| # Read the CSV file | |
| df = pd.read_csv(file_path) | |
| # Run various analyses based on the query | |
| result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
| result += f"Columns: {', '.join(df.columns)}\n\n" | |
| # Add summary statistics | |
| result += "Summary statistics:\n" | |
| result += str(df.describe()) | |
| return result | |
| except ImportError: | |
| return "Error: pandas is not installed. Please install it with 'pip install pandas'." | |
| except Exception as e: | |
| return f"Error analyzing CSV file: {str(e)}" | |
| def analyze_excel_file(file_path: str, query: str) -> str: | |
| """ | |
| Analyze an Excel file using pandas and answer a question about it. | |
| Args: | |
| file_path: Path to the Excel file | |
| query: Question about the data | |
| Returns: | |
| Analysis result or error message | |
| """ | |
| try: | |
| import pandas as pd | |
| # Read the Excel file | |
| df = pd.read_excel(file_path) | |
| # Run various analyses based on the query | |
| result = ( | |
| f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" | |
| ) | |
| result += f"Columns: {', '.join(df.columns)}\n\n" | |
| # Add summary statistics | |
| result += "Summary statistics:\n" | |
| result += str(df.describe()) | |
| return result | |
| except ImportError: | |
| return "Error: pandas and openpyxl are not installed. Please install them with 'pip install pandas openpyxl'." | |
| except Exception as e: | |
| return f"Error analyzing Excel file: {str(e)}" | |
| def get_tools(): | |
| return [ | |
| search, | |
| # process_content, | |
| # save_file, | |
| # download_file_from_url, | |
| # extract_text_from_image, | |
| # analyze_csv_file, | |
| # analyze_excel_file | |
| ] | |