GDank-StockSense / app /reader.py
ArizalMuluk's picture
Upload 14 files
f78d8cf verified
"""
File Reader — support CSV, Excel, Parquet, JSON
"""
import io
import pandas as pd
SUPPORTED_EXTENSIONS = {
".csv" : "CSV",
".xlsx" : "Excel",
".xls" : "Excel (legacy)",
".parquet" : "Parquet",
".json" : "JSON",
}
def read_dataset(file_bytes: bytes, filename: str) -> pd.DataFrame:
"""
Baca file dataset dari bytes ke DataFrame.
Support: CSV, Excel (.xlsx/.xls), Parquet, JSON
"""
ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else ""
if ext not in SUPPORTED_EXTENSIONS:
raise ValueError(
f"Format file '{ext}' tidak didukung. "
f"Format yang didukung: {', '.join(SUPPORTED_EXTENSIONS.keys())}"
)
buf = io.BytesIO(file_bytes)
if ext == ".csv":
# Coba beberapa separator umum
for sep in [",", ";", "\t", "|"]:
try:
buf.seek(0)
df = pd.read_csv(buf, sep=sep)
if df.shape[1] > 1:
return df
except Exception:
continue
buf.seek(0)
return pd.read_csv(buf)
elif ext in (".xlsx", ".xls"):
return pd.read_excel(buf, engine="openpyxl" if ext == ".xlsx" else "xlrd")
elif ext == ".parquet":
return pd.read_parquet(buf)
elif ext == ".json":
buf.seek(0)
try:
return pd.read_json(buf, orient="records")
except Exception:
buf.seek(0)
return pd.read_json(buf)
raise ValueError(f"Format tidak dikenali: {ext}")
def get_file_info(file_bytes: bytes, filename: str) -> dict:
"""Informasi singkat tentang file yang diupload."""
ext = "." + filename.rsplit(".", 1)[-1].lower() if "." in filename else "unknown"
size = len(file_bytes) / 1024 # KB
return {
"filename" : filename,
"format" : SUPPORTED_EXTENSIONS.get(ext, "Unknown"),
"size_kb" : round(size, 2),
}