Spaces:

abhiraj12
/

Auto_ML

Paused

abhiraj12 commited on Apr 17

Commit

e39ffd5

1 Parent(s): e0b9d3d

Align 500MB upload limits and stream large files

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -33,6 +33,7 @@ RUN mkdir -p backend/runs backend/tmp
 RUN chmod +x start.sh
 ENV PYTHONPATH=$HOME/app/backend:$PYTHONPATH
 # Tell your start.sh script to boot Streamlit on 7860 (the only port HF exposes)
 ENV PORT=7860

 RUN chmod +x start.sh
 ENV PYTHONPATH=$HOME/app/backend:$PYTHONPATH
+ENV MAX_UPLOAD_MB=500
 # Tell your start.sh script to boot Streamlit on 7860 (the only port HF exposes)
 ENV PORT=7860

backend/api/routes/datasets.py CHANGED Viewed

@@ -36,6 +36,28 @@ def _save_uploaded_file(dataset_id: str, filename: str, payload: bytes) -> str:
     return file_path
 def _persist_dataframe_as_csv(dataset_id: str, df):
     csv_path = os.path.join("tmp", f"{dataset_id}.csv")
     df.to_csv(csv_path, index=False)
@@ -236,12 +258,7 @@ async def upload_dataset(
     filename = file.filename or "upload.csv"
     try:
-        raw_bytes = await file.read()
-    except Exception as e:
-        return {"error": f"Failed to save upload: {e}"}
-    try:
-        uploaded_path = _save_uploaded_file(dataset_id, filename, raw_bytes)
     except Exception as e:
         return {"error": f"Failed to save upload: {e}"}

     return file_path
+async def _stream_upload_to_file(dataset_id: str, filename: str, upload: UploadFile) -> str:
+    ext = os.path.splitext(filename or "")[1].lower() or ".csv"
+    file_path = os.path.join("tmp", f"{dataset_id}{ext}")
+    os.makedirs("tmp", exist_ok=True)
+    try:
+        with open(file_path, "wb") as buffer:
+            while True:
+                chunk = await upload.read(1024 * 1024)
+                if not chunk:
+                    break
+                buffer.write(chunk)
+    except Exception:
+        if os.path.exists(file_path):
+            os.remove(file_path)
+        raise
+    finally:
+        await upload.close()
+    return file_path
 def _persist_dataframe_as_csv(dataset_id: str, df):
     csv_path = os.path.join("tmp", f"{dataset_id}.csv")
     df.to_csv(csv_path, index=False)
     filename = file.filename or "upload.csv"
     try:
+        uploaded_path = await _stream_upload_to_file(dataset_id, filename, file)
     except Exception as e:
         return {"error": f"Failed to save upload: {e}"}

backend/api/routes/predict.py CHANGED Viewed

@@ -10,6 +10,7 @@ import pandas as pd
 from infra.database import get_db, JobModel
 from infra.result_contract import normalize_results
 from infra.storage import get_schema_path
 from core.file_loader import load_dataframe
 router = APIRouter(prefix="/api", tags=["predict"])
@@ -184,11 +185,19 @@ async def contract_check(job_id: str, file: UploadFile = File(...)):
         except Exception:
             contract_schema = {}
     try:
-        raw_bytes = await file.read()
-        df = load_dataframe(contents=raw_bytes, filename=file.filename or "inference.csv")
     except Exception as e:
         raise HTTPException(status_code=422, detail=f"Could not read inference file: {e}")
     incoming_columns = list(df.columns)
     missing = sorted(set(expected_features) - set(incoming_columns))

 from infra.database import get_db, JobModel
 from infra.result_contract import normalize_results
 from infra.storage import get_schema_path
+from api.routes.datasets import _stream_upload_to_file
 from core.file_loader import load_dataframe
 router = APIRouter(prefix="/api", tags=["predict"])
         except Exception:
             contract_schema = {}
+    temp_path = None
     try:
+        temp_path = await _stream_upload_to_file(
+            f"contract_{job_id}_{os.urandom(4).hex()}",
+            file.filename or "inference.csv",
+            file,
+        )
+        df = load_dataframe(filepath=temp_path)
     except Exception as e:
         raise HTTPException(status_code=422, detail=f"Could not read inference file: {e}")
+    finally:
+        if temp_path and os.path.exists(temp_path):
+            os.remove(temp_path)
     incoming_columns = list(df.columns)
     missing = sorted(set(expected_features) - set(incoming_columns))

start.sh CHANGED Viewed

@@ -12,6 +12,7 @@ export REDIS_URL="${REDIS_URL:-redis://127.0.0.1:6379/0}"
 export CELERY_BROKER_URL="${CELERY_BROKER_URL:-$REDIS_URL}"
 export CELERY_RESULT_BACKEND="${CELERY_RESULT_BACKEND:-$REDIS_URL}"
 export AUTOML_API_URL="${AUTOML_API_URL:-http://127.0.0.1:8000/api}"
 mkdir -p /tmp/nginx_client_body /tmp/nginx_proxy /tmp/nginx_fastcgi /tmp/nginx_uwsgi /tmp/nginx_scgi
@@ -46,6 +47,7 @@ python -m streamlit run app.py \
     --server.headless true \
     --server.enableCORS false \
     --server.enableXsrfProtection false \
     --browser.gatherUsageStats false &
 FRONTEND_PID=$!

 export CELERY_BROKER_URL="${CELERY_BROKER_URL:-$REDIS_URL}"
 export CELERY_RESULT_BACKEND="${CELERY_RESULT_BACKEND:-$REDIS_URL}"
 export AUTOML_API_URL="${AUTOML_API_URL:-http://127.0.0.1:8000/api}"
+export MAX_UPLOAD_MB="${MAX_UPLOAD_MB:-500}"
 mkdir -p /tmp/nginx_client_body /tmp/nginx_proxy /tmp/nginx_fastcgi /tmp/nginx_uwsgi /tmp/nginx_scgi
     --server.headless true \
     --server.enableCORS false \
     --server.enableXsrfProtection false \
+    --server.maxUploadSize "${MAX_UPLOAD_MB}" \
     --browser.gatherUsageStats false &
 FRONTEND_PID=$!