khushalcodiste commited on
Commit
5fccbac
·
1 Parent(s): 39c7e26

feat: fixed response

Browse files
Files changed (4) hide show
  1. DEPLOY.md +1 -1
  2. README.md +1 -1
  3. app.py +51 -28
  4. requirements.txt +3 -0
DEPLOY.md CHANGED
@@ -83,7 +83,7 @@ docker run --rm -p 7860:7860 roulette-predictor
83
  |------|---------|
84
  | `app.py` | FastAPI service with `/predict`, `/predict/file`, `/models`, `/` |
85
  | `Dockerfile` | Python 3.11-slim, non-root user UID 1000, port 7860 (HF convention) |
86
- | `requirements.txt` | fastapi, uvicorn, pandas, numpy, scikit-learn, xgboost, joblib |
87
  | `README.md` | HF Space metadata frontmatter + user docs |
88
  | `ml/features.py` | v1 hand-crafted features (window=10, 25 dims) |
89
  | `ml/features_v2.py` | v2 features (window=20, 51 dims, run-length, autocorrelation, wheel-neighbor) |
 
83
  |------|---------|
84
  | `app.py` | FastAPI service with `/predict`, `/predict/file`, `/models`, `/` |
85
  | `Dockerfile` | Python 3.11-slim, non-root user UID 1000, port 7860 (HF convention) |
86
+ | `requirements.txt` | fastapi, uvicorn, pandas, openpyxl, xlrd, lxml, numpy, scikit-learn, xgboost, joblib |
87
  | `README.md` | HF Space metadata frontmatter + user docs |
88
  | `ml/features.py` | v1 hand-crafted features (window=10, 25 dims) |
89
  | `ml/features_v2.py` | v2 features (window=20, 51 dims, run-length, autocorrelation, wheel-neighbor) |
README.md CHANGED
@@ -35,7 +35,7 @@ Ridge, SGD, SVC, LSTM, GRU, Transformer, and Markov orders 1/2/3.
35
 
36
  ## File upload notes
37
 
38
- The `/predict/file` endpoint accepts both CSV and Excel files. The file must contain a column named `Winner`, `winning number`, or `number` (case-insensitive). If not found, the last column is used.
39
 
40
  ## Example request
41
 
 
35
 
36
  ## File upload notes
37
 
38
+ The `/predict/file` endpoint accepts CSV, `.xlsx`, `.xls`, and HTML files that contain a table. The file must contain a column named `Winner`, `winning number`, or `number` (case-insensitive). If not found, the last column is used.
39
 
40
  ## Example request
41
 
app.py CHANGED
@@ -59,6 +59,52 @@ TARGET_LABELS: dict[str, tuple[str, ...]] = {
59
 
60
  TARGETS = ("number", "color", "parity", "dozen", "column")
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # ---------------------------------------------------------------------------
63
  # Model registry — filled at startup
64
  # ---------------------------------------------------------------------------
@@ -310,7 +356,6 @@ class StepPrediction(BaseModel):
310
 
311
  class PredictResponse(BaseModel):
312
  predictions: list[StepPrediction]
313
- notes: list[str]
314
 
315
 
316
  def _prepare_windows(sequence: list[int]) -> tuple[np.ndarray, np.ndarray]:
@@ -368,11 +413,8 @@ def root() -> dict[str, Any]:
368
  def predict(req: PredictRequest) -> PredictResponse:
369
  if not any(REGISTRY[t] for t in TARGETS):
370
  raise HTTPException(status_code=503, detail="no models loaded")
371
- notes: list[str] = []
372
- if len(req.numbers) < WINDOW_V2:
373
- notes.append(f"Input {len(req.numbers)} numbers; padded with zeros to {WINDOW_V2} for v2 window.")
374
  preds = _forecast(req.numbers, req.steps)
375
- return PredictResponse(predictions=preds, notes=notes)
376
 
377
 
378
  @app.post("/predict/file", response_model=PredictResponse)
@@ -384,24 +426,9 @@ async def predict_file(file: UploadFile = File(...), steps: int = 10) -> Predict
384
 
385
  try:
386
  content = await file.read()
387
- # Try reading as CSV first
388
- try:
389
- df = pd.read_csv(io.BytesIO(content))
390
- except Exception:
391
- # If CSV fails, try Excel with multiple engines
392
- excel_read_error = None
393
- for engine in ("openpyxl", "xlrd"):
394
- try:
395
- df = pd.read_excel(io.BytesIO(content), engine=engine)
396
- break
397
- except Exception as exc_excel:
398
- excel_read_error = exc_excel
399
- df = None
400
- if df is None:
401
- raise HTTPException(
402
- status_code=400,
403
- detail=f"could not read file as CSV or Excel: {excel_read_error}"
404
- ) from excel_read_error
405
  except Exception as exc:
406
  raise HTTPException(status_code=400, detail=f"could not read file: {exc}") from exc
407
 
@@ -416,12 +443,8 @@ async def predict_file(file: UploadFile = File(...), steps: int = 10) -> Predict
416
  if any(n < 0 or n > 36 for n in numbers):
417
  raise HTTPException(status_code=400, detail="values must be in [0, 36]")
418
 
419
- notes = [f"Loaded column {col!r} with {len(numbers)} rows."]
420
- if len(numbers) < WINDOW_V2:
421
- notes.append(f"Padded to v2 window={WINDOW_V2} with leading zeros.")
422
-
423
  preds = _forecast(numbers, steps)
424
- return PredictResponse(predictions=preds, notes=notes)
425
 
426
 
427
  if __name__ == "__main__":
 
59
 
60
  TARGETS = ("number", "color", "parity", "dozen", "column")
61
 
62
+
63
+ def _looks_like_html_document(content: bytes) -> bool:
64
+ sample = content[:512].lstrip().lower()
65
+ return sample.startswith((b"<!doctype html", b"<html", b"<?xml")) or b"<html" in sample
66
+
67
+
68
+ def _read_uploaded_dataframe(content: bytes) -> pd.DataFrame:
69
+ if not content.strip():
70
+ raise HTTPException(status_code=400, detail="uploaded file is empty")
71
+
72
+ try:
73
+ return pd.read_csv(io.BytesIO(content))
74
+ except Exception:
75
+ pass
76
+
77
+ excel_read_error = None
78
+ for engine in ("openpyxl", "xlrd"):
79
+ try:
80
+ return pd.read_excel(io.BytesIO(content), engine=engine)
81
+ except Exception as exc_excel:
82
+ excel_read_error = exc_excel
83
+
84
+ if _looks_like_html_document(content):
85
+ try:
86
+ tables = pd.read_html(io.BytesIO(content))
87
+ except Exception as exc_html:
88
+ raise HTTPException(
89
+ status_code=400,
90
+ detail=(
91
+ "uploaded file appears to be HTML rather than CSV/Excel, "
92
+ f"and no HTML table could be parsed: {exc_html}"
93
+ ),
94
+ ) from exc_html
95
+
96
+ non_empty_tables = [table for table in tables if not table.empty]
97
+ if non_empty_tables:
98
+ return max(non_empty_tables, key=lambda table: table.shape[0])
99
+ if tables:
100
+ return tables[0]
101
+ raise HTTPException(status_code=400, detail="uploaded HTML file does not contain any tables")
102
+
103
+ raise HTTPException(
104
+ status_code=400,
105
+ detail=f"could not read file as CSV, Excel, or HTML table: {excel_read_error}"
106
+ ) from excel_read_error
107
+
108
  # ---------------------------------------------------------------------------
109
  # Model registry — filled at startup
110
  # ---------------------------------------------------------------------------
 
356
 
357
  class PredictResponse(BaseModel):
358
  predictions: list[StepPrediction]
 
359
 
360
 
361
  def _prepare_windows(sequence: list[int]) -> tuple[np.ndarray, np.ndarray]:
 
413
  def predict(req: PredictRequest) -> PredictResponse:
414
  if not any(REGISTRY[t] for t in TARGETS):
415
  raise HTTPException(status_code=503, detail="no models loaded")
 
 
 
416
  preds = _forecast(req.numbers, req.steps)
417
+ return PredictResponse(predictions=preds)
418
 
419
 
420
  @app.post("/predict/file", response_model=PredictResponse)
 
426
 
427
  try:
428
  content = await file.read()
429
+ df = _read_uploaded_dataframe(content)
430
+ except HTTPException:
431
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
  except Exception as exc:
433
  raise HTTPException(status_code=400, detail=f"could not read file: {exc}") from exc
434
 
 
443
  if any(n < 0 or n > 36 for n in numbers):
444
  raise HTTPException(status_code=400, detail="values must be in [0, 36]")
445
 
 
 
 
 
446
  preds = _forecast(numbers, steps)
447
+ return PredictResponse(predictions=preds)
448
 
449
 
450
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -4,6 +4,9 @@ pydantic>=2.5
4
  python-multipart>=0.0.9
5
  numpy>=1.26,<2.3
6
  pandas>=2.1
 
 
 
7
  scikit-learn==1.6.1
8
  xgboost>=2.1,<3.0
9
  lightgbm>=4.3
 
4
  python-multipart>=0.0.9
5
  numpy>=1.26,<2.3
6
  pandas>=2.1
7
+ openpyxl>=3.1
8
+ xlrd>=2.0.1
9
+ lxml>=5.2
10
  scikit-learn==1.6.1
11
  xgboost>=2.1,<3.0
12
  lightgbm>=4.3