Spaces:

khushalcodiste
/

testruk

Sleeping

App Files Files Community

khushalcodiste commited on Apr 19

Commit

5fccbac

1 Parent(s): 39c7e26

feat: fixed response

Browse files

Files changed (4) hide show

DEPLOY.md +1 -1
README.md +1 -1
app.py +51 -28
requirements.txt +3 -0

DEPLOY.md CHANGED Viewed

@@ -83,7 +83,7 @@ docker run --rm -p 7860:7860 roulette-predictor
 |------|---------|
 | `app.py` | FastAPI service with `/predict`, `/predict/file`, `/models`, `/` |
 | `Dockerfile` | Python 3.11-slim, non-root user UID 1000, port 7860 (HF convention) |
-| `requirements.txt` | fastapi, uvicorn, pandas, numpy, scikit-learn, xgboost, joblib |
 | `README.md` | HF Space metadata frontmatter + user docs |
 | `ml/features.py` | v1 hand-crafted features (window=10, 25 dims) |
 | `ml/features_v2.py` | v2 features (window=20, 51 dims, run-length, autocorrelation, wheel-neighbor) |

 |------|---------|
 | `app.py` | FastAPI service with `/predict`, `/predict/file`, `/models`, `/` |
 | `Dockerfile` | Python 3.11-slim, non-root user UID 1000, port 7860 (HF convention) |
+| `requirements.txt` | fastapi, uvicorn, pandas, openpyxl, xlrd, lxml, numpy, scikit-learn, xgboost, joblib |
 | `README.md` | HF Space metadata frontmatter + user docs |
 | `ml/features.py` | v1 hand-crafted features (window=10, 25 dims) |
 | `ml/features_v2.py` | v2 features (window=20, 51 dims, run-length, autocorrelation, wheel-neighbor) |

README.md CHANGED Viewed

@@ -35,7 +35,7 @@ Ridge, SGD, SVC, LSTM, GRU, Transformer, and Markov orders 1/2/3.
 ## File upload notes
-The `/predict/file` endpoint accepts both CSV and Excel files. The file must contain a column named `Winner`, `winning number`, or `number` (case-insensitive). If not found, the last column is used.
 ## Example request

 ## File upload notes
+The `/predict/file` endpoint accepts CSV, `.xlsx`, `.xls`, and HTML files that contain a table. The file must contain a column named `Winner`, `winning number`, or `number` (case-insensitive). If not found, the last column is used.
 ## Example request

app.py CHANGED Viewed

@@ -59,6 +59,52 @@ TARGET_LABELS: dict[str, tuple[str, ...]] = {
 TARGETS = ("number", "color", "parity", "dozen", "column")
 # ---------------------------------------------------------------------------
 # Model registry — filled at startup
 # ---------------------------------------------------------------------------
@@ -310,7 +356,6 @@ class StepPrediction(BaseModel):
 class PredictResponse(BaseModel):
     predictions: list[StepPrediction]
-    notes: list[str]
 def _prepare_windows(sequence: list[int]) -> tuple[np.ndarray, np.ndarray]:
@@ -368,11 +413,8 @@ def root() -> dict[str, Any]:
 def predict(req: PredictRequest) -> PredictResponse:
     if not any(REGISTRY[t] for t in TARGETS):
         raise HTTPException(status_code=503, detail="no models loaded")
-    notes: list[str] = []
-    if len(req.numbers) < WINDOW_V2:
-        notes.append(f"Input {len(req.numbers)} numbers; padded with zeros to {WINDOW_V2} for v2 window.")
     preds = _forecast(req.numbers, req.steps)
-    return PredictResponse(predictions=preds, notes=notes)
 @app.post("/predict/file", response_model=PredictResponse)
@@ -384,24 +426,9 @@ async def predict_file(file: UploadFile = File(...), steps: int = 10) -> Predict
     try:
         content = await file.read()
-        # Try reading as CSV first
-        try:
-            df = pd.read_csv(io.BytesIO(content))
-        except Exception:
-            # If CSV fails, try Excel with multiple engines
-            excel_read_error = None
-            for engine in ("openpyxl", "xlrd"):
-                try:
-                    df = pd.read_excel(io.BytesIO(content), engine=engine)
-                    break
-                except Exception as exc_excel:
-                    excel_read_error = exc_excel
-                    df = None
-            if df is None:
-                raise HTTPException(
-                    status_code=400,
-                    detail=f"could not read file as CSV or Excel: {excel_read_error}"
-                ) from excel_read_error
     except Exception as exc:
         raise HTTPException(status_code=400, detail=f"could not read file: {exc}") from exc
@@ -416,12 +443,8 @@ async def predict_file(file: UploadFile = File(...), steps: int = 10) -> Predict
     if any(n < 0 or n > 36 for n in numbers):
         raise HTTPException(status_code=400, detail="values must be in [0, 36]")
-    notes = [f"Loaded column {col!r} with {len(numbers)} rows."]
-    if len(numbers) < WINDOW_V2:
-        notes.append(f"Padded to v2 window={WINDOW_V2} with leading zeros.")
     preds = _forecast(numbers, steps)
-    return PredictResponse(predictions=preds, notes=notes)
 if __name__ == "__main__":

 TARGETS = ("number", "color", "parity", "dozen", "column")
+def _looks_like_html_document(content: bytes) -> bool:
+    sample = content[:512].lstrip().lower()
+    return sample.startswith((b"<!doctype html", b"<html", b"<?xml")) or b"<html" in sample
+def _read_uploaded_dataframe(content: bytes) -> pd.DataFrame:
+    if not content.strip():
+        raise HTTPException(status_code=400, detail="uploaded file is empty")
+    try:
+        return pd.read_csv(io.BytesIO(content))
+    except Exception:
+        pass
+    excel_read_error = None
+    for engine in ("openpyxl", "xlrd"):
+        try:
+            return pd.read_excel(io.BytesIO(content), engine=engine)
+        except Exception as exc_excel:
+            excel_read_error = exc_excel
+    if _looks_like_html_document(content):
+        try:
+            tables = pd.read_html(io.BytesIO(content))
+        except Exception as exc_html:
+            raise HTTPException(
+                status_code=400,
+                detail=(
+                    "uploaded file appears to be HTML rather than CSV/Excel, "
+                    f"and no HTML table could be parsed: {exc_html}"
+                ),
+            ) from exc_html
+        non_empty_tables = [table for table in tables if not table.empty]
+        if non_empty_tables:
+            return max(non_empty_tables, key=lambda table: table.shape[0])
+        if tables:
+            return tables[0]
+        raise HTTPException(status_code=400, detail="uploaded HTML file does not contain any tables")
+    raise HTTPException(
+        status_code=400,
+        detail=f"could not read file as CSV, Excel, or HTML table: {excel_read_error}"
+    ) from excel_read_error
 # ---------------------------------------------------------------------------
 # Model registry — filled at startup
 # ---------------------------------------------------------------------------
 class PredictResponse(BaseModel):
     predictions: list[StepPrediction]
 def _prepare_windows(sequence: list[int]) -> tuple[np.ndarray, np.ndarray]:
 def predict(req: PredictRequest) -> PredictResponse:
     if not any(REGISTRY[t] for t in TARGETS):
         raise HTTPException(status_code=503, detail="no models loaded")
     preds = _forecast(req.numbers, req.steps)
+    return PredictResponse(predictions=preds)
 @app.post("/predict/file", response_model=PredictResponse)
     try:
         content = await file.read()
+        df = _read_uploaded_dataframe(content)
+    except HTTPException:
+        raise
     except Exception as exc:
         raise HTTPException(status_code=400, detail=f"could not read file: {exc}") from exc
     if any(n < 0 or n > 36 for n in numbers):
         raise HTTPException(status_code=400, detail="values must be in [0, 36]")
     preds = _forecast(numbers, steps)
+    return PredictResponse(predictions=preds)
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -4,6 +4,9 @@ pydantic>=2.5
 python-multipart>=0.0.9
 numpy>=1.26,<2.3
 pandas>=2.1
 scikit-learn==1.6.1
 xgboost>=2.1,<3.0
 lightgbm>=4.3

 python-multipart>=0.0.9
 numpy>=1.26,<2.3
 pandas>=2.1
+openpyxl>=3.1
+xlrd>=2.0.1
+lxml>=5.2
 scikit-learn==1.6.1
 xgboost>=2.1,<3.0
 lightgbm>=4.3