File size: 1,521 Bytes
1067825 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | from fastapi import APIRouter, UploadFile, File, HTTPException
import io
import pandas as pd
from backend.state import session_store
import uuid
import math
router = APIRouter()
@router.post("/upload")
async def upload_file(file: UploadFile=File(...)):
session_id = str(uuid.uuid4())
if not file.filename.endswith(".csv"): # type: ignore
raise HTTPException(status_code=400, detail="Only CSV files are accepted")
content = await file.read()
if len(content) > 20 * 1024 * 1024:
raise HTTPException(status_code=400, detail="File too large. Maximum size is 10MB")
try:
df = pd.read_csv(io.BytesIO(content), na_values=['?', 'NA', 'N/A', 'na', 'n/a', ''])
except Exception:
raise HTTPException(status_code=400, detail="Could not parse CSV. Check the file format")
def clean_nan(data):
if isinstance(data, dict):
return {k: clean_nan(v) for k, v in data.items()}
elif isinstance(data, list):
return [clean_nan(v) for v in data]
elif isinstance(data, float) and math.isnan(data):
return None
return data
rows, cols = df.shape
preview = clean_nan(df.head(5).to_dict(orient="records"))
session_store[session_id] = {
"df": df,
"summary": None,
"explanation": None
}
return {
"filename": file.filename,
"rows": rows,
"columns": cols,
"column_names": df.columns.tolist(),
"session_id": session_id,
"preview": preview
}
|