abhiraj12 commited on
Commit
e39ffd5
·
1 Parent(s): e0b9d3d

Align 500MB upload limits and stream large files

Browse files
Dockerfile CHANGED
@@ -33,6 +33,7 @@ RUN mkdir -p backend/runs backend/tmp
33
  RUN chmod +x start.sh
34
 
35
  ENV PYTHONPATH=$HOME/app/backend:$PYTHONPATH
 
36
 
37
  # Tell your start.sh script to boot Streamlit on 7860 (the only port HF exposes)
38
  ENV PORT=7860
 
33
  RUN chmod +x start.sh
34
 
35
  ENV PYTHONPATH=$HOME/app/backend:$PYTHONPATH
36
+ ENV MAX_UPLOAD_MB=500
37
 
38
  # Tell your start.sh script to boot Streamlit on 7860 (the only port HF exposes)
39
  ENV PORT=7860
backend/api/routes/datasets.py CHANGED
@@ -36,6 +36,28 @@ def _save_uploaded_file(dataset_id: str, filename: str, payload: bytes) -> str:
36
  return file_path
37
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def _persist_dataframe_as_csv(dataset_id: str, df):
40
  csv_path = os.path.join("tmp", f"{dataset_id}.csv")
41
  df.to_csv(csv_path, index=False)
@@ -236,12 +258,7 @@ async def upload_dataset(
236
  filename = file.filename or "upload.csv"
237
 
238
  try:
239
- raw_bytes = await file.read()
240
- except Exception as e:
241
- return {"error": f"Failed to save upload: {e}"}
242
-
243
- try:
244
- uploaded_path = _save_uploaded_file(dataset_id, filename, raw_bytes)
245
  except Exception as e:
246
  return {"error": f"Failed to save upload: {e}"}
247
 
 
36
  return file_path
37
 
38
 
39
+ async def _stream_upload_to_file(dataset_id: str, filename: str, upload: UploadFile) -> str:
40
+ ext = os.path.splitext(filename or "")[1].lower() or ".csv"
41
+ file_path = os.path.join("tmp", f"{dataset_id}{ext}")
42
+ os.makedirs("tmp", exist_ok=True)
43
+
44
+ try:
45
+ with open(file_path, "wb") as buffer:
46
+ while True:
47
+ chunk = await upload.read(1024 * 1024)
48
+ if not chunk:
49
+ break
50
+ buffer.write(chunk)
51
+ except Exception:
52
+ if os.path.exists(file_path):
53
+ os.remove(file_path)
54
+ raise
55
+ finally:
56
+ await upload.close()
57
+
58
+ return file_path
59
+
60
+
61
  def _persist_dataframe_as_csv(dataset_id: str, df):
62
  csv_path = os.path.join("tmp", f"{dataset_id}.csv")
63
  df.to_csv(csv_path, index=False)
 
258
  filename = file.filename or "upload.csv"
259
 
260
  try:
261
+ uploaded_path = await _stream_upload_to_file(dataset_id, filename, file)
 
 
 
 
 
262
  except Exception as e:
263
  return {"error": f"Failed to save upload: {e}"}
264
 
backend/api/routes/predict.py CHANGED
@@ -10,6 +10,7 @@ import pandas as pd
10
  from infra.database import get_db, JobModel
11
  from infra.result_contract import normalize_results
12
  from infra.storage import get_schema_path
 
13
  from core.file_loader import load_dataframe
14
 
15
  router = APIRouter(prefix="/api", tags=["predict"])
@@ -184,11 +185,19 @@ async def contract_check(job_id: str, file: UploadFile = File(...)):
184
  except Exception:
185
  contract_schema = {}
186
 
 
187
  try:
188
- raw_bytes = await file.read()
189
- df = load_dataframe(contents=raw_bytes, filename=file.filename or "inference.csv")
 
 
 
 
190
  except Exception as e:
191
  raise HTTPException(status_code=422, detail=f"Could not read inference file: {e}")
 
 
 
192
 
193
  incoming_columns = list(df.columns)
194
  missing = sorted(set(expected_features) - set(incoming_columns))
 
10
  from infra.database import get_db, JobModel
11
  from infra.result_contract import normalize_results
12
  from infra.storage import get_schema_path
13
+ from api.routes.datasets import _stream_upload_to_file
14
  from core.file_loader import load_dataframe
15
 
16
  router = APIRouter(prefix="/api", tags=["predict"])
 
185
  except Exception:
186
  contract_schema = {}
187
 
188
+ temp_path = None
189
  try:
190
+ temp_path = await _stream_upload_to_file(
191
+ f"contract_{job_id}_{os.urandom(4).hex()}",
192
+ file.filename or "inference.csv",
193
+ file,
194
+ )
195
+ df = load_dataframe(filepath=temp_path)
196
  except Exception as e:
197
  raise HTTPException(status_code=422, detail=f"Could not read inference file: {e}")
198
+ finally:
199
+ if temp_path and os.path.exists(temp_path):
200
+ os.remove(temp_path)
201
 
202
  incoming_columns = list(df.columns)
203
  missing = sorted(set(expected_features) - set(incoming_columns))
start.sh CHANGED
@@ -12,6 +12,7 @@ export REDIS_URL="${REDIS_URL:-redis://127.0.0.1:6379/0}"
12
  export CELERY_BROKER_URL="${CELERY_BROKER_URL:-$REDIS_URL}"
13
  export CELERY_RESULT_BACKEND="${CELERY_RESULT_BACKEND:-$REDIS_URL}"
14
  export AUTOML_API_URL="${AUTOML_API_URL:-http://127.0.0.1:8000/api}"
 
15
 
16
  mkdir -p /tmp/nginx_client_body /tmp/nginx_proxy /tmp/nginx_fastcgi /tmp/nginx_uwsgi /tmp/nginx_scgi
17
 
@@ -46,6 +47,7 @@ python -m streamlit run app.py \
46
  --server.headless true \
47
  --server.enableCORS false \
48
  --server.enableXsrfProtection false \
 
49
  --browser.gatherUsageStats false &
50
  FRONTEND_PID=$!
51
 
 
12
  export CELERY_BROKER_URL="${CELERY_BROKER_URL:-$REDIS_URL}"
13
  export CELERY_RESULT_BACKEND="${CELERY_RESULT_BACKEND:-$REDIS_URL}"
14
  export AUTOML_API_URL="${AUTOML_API_URL:-http://127.0.0.1:8000/api}"
15
+ export MAX_UPLOAD_MB="${MAX_UPLOAD_MB:-500}"
16
 
17
  mkdir -p /tmp/nginx_client_body /tmp/nginx_proxy /tmp/nginx_fastcgi /tmp/nginx_uwsgi /tmp/nginx_scgi
18
 
 
47
  --server.headless true \
48
  --server.enableCORS false \
49
  --server.enableXsrfProtection false \
50
+ --server.maxUploadSize "${MAX_UPLOAD_MB}" \
51
  --browser.gatherUsageStats false &
52
  FRONTEND_PID=$!
53