LangGraph_GAIA / tools /read_file.py
BiGuan's picture
Update tools/read_file.py
df4007a verified
Raw
History Blame Contribute Delete
1.82 kB
import os
import requests
import tempfile
import pandas as pd
from io import StringIO, BytesIO
def read_file(file_source: str) -> str:
"""
Read a file from a URL or local path and return its content as text.
Supports CSV, Excel (.xlsx), and plain text files.
"""
try:
is_url = file_source.startswith(("http://", "https://"))
if is_url:
headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} if "huggingface.co" in file_source else {}
response = requests.get(file_source, headers=headers, timeout=15)
response.raise_for_status()
content_type = response.headers.get('Content-Type', '').lower()
if 'excel' in content_type or file_source.endswith('.xlsx'):
df = pd.read_excel(BytesIO(response.content))
return df.to_csv(index=False)[:8000]
elif 'csv' in content_type or file_source.endswith('.csv'):
df = pd.read_csv(StringIO(response.text))
return df.to_csv(index=False)[:8000]
else:
return response.text[:8000]
else:
if not os.path.exists(file_source):
return f"File not found: {file_source}"
ext = os.path.splitext(file_source)[1].lower()
if ext == ".xlsx":
df = pd.read_excel(file_source)
return df.to_csv(index=False)[:8000]
elif ext == ".csv":
df = pd.read_csv(file_source)
return df.to_csv(index=False)[:8000]
else:
with open(file_source, "r", encoding="utf-8", errors="replace") as f:
return f.read(8000)
except Exception as e:
return f"Error reading file: {str(e)}"