import os import requests import tempfile import pandas as pd from io import StringIO, BytesIO def read_file(file_source: str) -> str: """ Read a file from a URL or local path and return its content as text. Supports CSV, Excel (.xlsx), and plain text files. """ try: is_url = file_source.startswith(("http://", "https://")) if is_url: headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} if "huggingface.co" in file_source else {} response = requests.get(file_source, headers=headers, timeout=15) response.raise_for_status() content_type = response.headers.get('Content-Type', '').lower() if 'excel' in content_type or file_source.endswith('.xlsx'): df = pd.read_excel(BytesIO(response.content)) return df.to_csv(index=False)[:8000] elif 'csv' in content_type or file_source.endswith('.csv'): df = pd.read_csv(StringIO(response.text)) return df.to_csv(index=False)[:8000] else: return response.text[:8000] else: if not os.path.exists(file_source): return f"File not found: {file_source}" ext = os.path.splitext(file_source)[1].lower() if ext == ".xlsx": df = pd.read_excel(file_source) return df.to_csv(index=False)[:8000] elif ext == ".csv": df = pd.read_csv(file_source) return df.to_csv(index=False)[:8000] else: with open(file_source, "r", encoding="utf-8", errors="replace") as f: return f.read(8000) except Exception as e: return f"Error reading file: {str(e)}"