File size: 1,521 Bytes
1067825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from fastapi import APIRouter, UploadFile, File, HTTPException
import io
import pandas as pd
from backend.state import session_store
import uuid
import math

router = APIRouter()

@router.post("/upload")
async def upload_file(file: UploadFile=File(...)):
  session_id = str(uuid.uuid4())
  if not file.filename.endswith(".csv"): # type: ignore
    raise HTTPException(status_code=400, detail="Only CSV files are accepted")
  
  content = await file.read()

  if len(content) > 20 * 1024 * 1024:
        raise HTTPException(status_code=400, detail="File too large. Maximum size is 10MB")
  
  try:
      df = pd.read_csv(io.BytesIO(content), na_values=['?', 'NA', 'N/A', 'na', 'n/a', ''])
  except Exception:
      raise HTTPException(status_code=400, detail="Could not parse CSV. Check the file format")
  
  def clean_nan(data):
    if isinstance(data, dict):
        return {k: clean_nan(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [clean_nan(v) for v in data]
    elif isinstance(data, float) and math.isnan(data):
        return None
    return data

  rows, cols = df.shape
  preview = clean_nan(df.head(5).to_dict(orient="records"))
  session_store[session_id] = {
      "df": df,
      "summary": None,
      "explanation": None
  }

  return {
      "filename": file.filename,
      "rows": rows,
      "columns": cols,
      "column_names": df.columns.tolist(),
      "session_id": session_id,
      "preview": preview
  }