Spaces:
Sleeping
Sleeping
| import os | |
| import requests | |
| import tempfile | |
| import pandas as pd | |
| from io import StringIO, BytesIO | |
| def read_file(file_source: str) -> str: | |
| """ | |
| Read a file from a URL or local path and return its content as text. | |
| Supports CSV, Excel (.xlsx), and plain text files. | |
| """ | |
| try: | |
| is_url = file_source.startswith(("http://", "https://")) | |
| if is_url: | |
| headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} if "huggingface.co" in file_source else {} | |
| response = requests.get(file_source, headers=headers, timeout=15) | |
| response.raise_for_status() | |
| content_type = response.headers.get('Content-Type', '').lower() | |
| if 'excel' in content_type or file_source.endswith('.xlsx'): | |
| df = pd.read_excel(BytesIO(response.content)) | |
| return df.to_csv(index=False)[:8000] | |
| elif 'csv' in content_type or file_source.endswith('.csv'): | |
| df = pd.read_csv(StringIO(response.text)) | |
| return df.to_csv(index=False)[:8000] | |
| else: | |
| return response.text[:8000] | |
| else: | |
| if not os.path.exists(file_source): | |
| return f"File not found: {file_source}" | |
| ext = os.path.splitext(file_source)[1].lower() | |
| if ext == ".xlsx": | |
| df = pd.read_excel(file_source) | |
| return df.to_csv(index=False)[:8000] | |
| elif ext == ".csv": | |
| df = pd.read_csv(file_source) | |
| return df.to_csv(index=False)[:8000] | |
| else: | |
| with open(file_source, "r", encoding="utf-8", errors="replace") as f: | |
| return f.read(8000) | |
| except Exception as e: | |
| return f"Error reading file: {str(e)}" |