akrstova's picture
Cleanup
12b6ce2
import io
import os
import tempfile
from typing import Optional
from urllib.parse import urlparse
import uuid
import pandas as pd
import contextlib
from langchain_core.tools import tool
import requests
from PIL import Image
import pytesseract
from transformers import pipeline
@tool
def analyze_excel_file(file_path: str, query: str) -> str:
"""
Analyze an Excel file using pandas and answer a question about it.
Args:
file_path (str): the path to the Excel file.
query (str): Question about the data
"""
try:
# Read the Excel file
df = pd.read_excel(file_path)
# Run various analyses based on the query
result = (
f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
)
result += f"Columns: {', '.join(df.columns)}\n\n"
# Add summary statistics
result += "Summary statistics:\n"
result += str(df.describe())
return result
except Exception as e:
return f"Error analyzing Excel file: {str(e)}"
# Load ASR pipeline once at module level (for efficiency)
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=-1)
@tool
def transcribe_audio(file_path: str, query: str = "") -> str:
"""
Transcribes speech from an audio file (e.g., .mp3 or .wav).
Args:
file_path (str): Path to the audio file.
query (str): (Optional) Ignored; present to support LangChain tool schema.
Returns:
str: Transcribed text from the audio.
"""
try:
print(f"Transcribing: {file_path}")
result = asr_pipeline(file_path)
transcript = result["text"]
return transcript.strip() if transcript.strip() else "No speech detected."
except Exception as e:
return f"Error transcribing audio: {str(e)}"
@tool
def execute_python_code(code: str) -> str:
"""
Executes a Python code string and returns the output or error.
Args:
code (str): The Python code to execute.
Returns:
str: The output or error message.
"""
local_vars = {}
stdout = io.StringIO()
try:
with contextlib.redirect_stdout(stdout):
exec(code, {}, local_vars)
output = stdout.getvalue()
if output.strip():
return output.strip()
# If code defines a variable named 'result', return its value
if "result" in local_vars:
return str(local_vars["result"])
return "Code executed successfully, but produced no output."
except Exception as e:
return f"Error executing code: {e}"
@tool
def save_and_read_file(content: str, filename: Optional[str] = None) -> str:
"""
Save content to a file and return the path.
Args:
content (str): the content to save to the file
filename (str, optional): the name of the file. If not provided, a random name file will be created.
"""
temp_dir = tempfile.gettempdir()
if filename is None:
temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
filepath = temp_file.name
else:
filepath = os.path.join(temp_dir, filename)
with open(filepath, "w") as f:
f.write(content)
return f"File saved to {filepath}. You can read this file to process its contents."
@tool
def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
"""
Download a file from a URL and save it to a temporary location.
Args:
url (str): the URL of the file to download.
filename (str, optional): the name of the file. If not provided, a random name file will be created.
"""
try:
# Parse URL to get filename if not provided
if not filename:
path = urlparse(url).path
filename = os.path.basename(path)
if not filename:
filename = f"downloaded_{uuid.uuid4().hex[:8]}"
# Create temporary file
temp_dir = tempfile.gettempdir()
filepath = os.path.join(temp_dir, filename)
# Download the file
response = requests.get(url, stream=True)
response.raise_for_status()
# Save the file
with open(filepath, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return f"File downloaded to {filepath}. You can read this file to process its contents."
except Exception as e:
return f"Error downloading file: {str(e)}"
@tool
def extract_text_from_image(image_path: str) -> str:
"""
Extract text from an image using OCR library pytesseract (if available).
Args:
image_path (str): the path to the image file.
"""
try:
# Open the image
image = Image.open(image_path)
# Extract text from the image
text = pytesseract.image_to_string(image)
return f"Extracted text from image:\n\n{text}"
except Exception as e:
return f"Error extracting text from image: {str(e)}"
@tool
def analyze_csv_file(file_path: str, query: str) -> str:
"""
Analyze a CSV file using pandas and answer a question about it.
Args:
file_path (str): the path to the CSV file.
query (str): Question about the data
"""
try:
# Read the CSV file
df = pd.read_csv(file_path)
# Run various analyses based on the query
result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns)}\n\n"
# Add summary statistics
result += "Summary statistics:\n"
result += str(df.describe())
return result
except Exception as e:
return f"Error analyzing CSV file: {str(e)}"