Spaces:
Sleeping
Sleeping
| """ | |
| File Reader — support CSV, Excel, Parquet, JSON | |
| """ | |
| import io | |
| import pandas as pd | |
| SUPPORTED_EXTENSIONS = { | |
| ".csv" : "CSV", | |
| ".xlsx" : "Excel", | |
| ".xls" : "Excel (legacy)", | |
| ".parquet" : "Parquet", | |
| ".json" : "JSON", | |
| } | |
| def read_dataset(file_bytes: bytes, filename: str) -> pd.DataFrame: | |
| """ | |
| Baca file dataset dari bytes ke DataFrame. | |
| Support: CSV, Excel (.xlsx/.xls), Parquet, JSON | |
| """ | |
| ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else "" | |
| if ext not in SUPPORTED_EXTENSIONS: | |
| raise ValueError( | |
| f"Format file '{ext}' tidak didukung. " | |
| f"Format yang didukung: {', '.join(SUPPORTED_EXTENSIONS.keys())}" | |
| ) | |
| buf = io.BytesIO(file_bytes) | |
| if ext == ".csv": | |
| # Coba beberapa separator umum | |
| for sep in [",", ";", "\t", "|"]: | |
| try: | |
| buf.seek(0) | |
| df = pd.read_csv(buf, sep=sep) | |
| if df.shape[1] > 1: | |
| return df | |
| except Exception: | |
| continue | |
| buf.seek(0) | |
| return pd.read_csv(buf) | |
| elif ext in (".xlsx", ".xls"): | |
| return pd.read_excel(buf, engine="openpyxl" if ext == ".xlsx" else "xlrd") | |
| elif ext == ".parquet": | |
| return pd.read_parquet(buf) | |
| elif ext == ".json": | |
| buf.seek(0) | |
| try: | |
| return pd.read_json(buf, orient="records") | |
| except Exception: | |
| buf.seek(0) | |
| return pd.read_json(buf) | |
| raise ValueError(f"Format tidak dikenali: {ext}") | |
| def get_file_info(file_bytes: bytes, filename: str) -> dict: | |
| """Informasi singkat tentang file yang diupload.""" | |
| ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else "unknown" | |
| size = len(file_bytes) / 1024 # KB | |
| return { | |
| "filename" : filename, | |
| "format" : SUPPORTED_EXTENSIONS.get(ext, "Unknown"), | |
| "size_kb" : round(size, 2), | |
| } | |