ethnmcl commited on
Commit
440feb0
·
verified ·
1 Parent(s): 57d1072

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -70
app.py CHANGED
@@ -1,71 +1,65 @@
1
- from fastapi import FastAPI
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel, Field, validator
4
- from typing import List, Dict, Any
5
- import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- from inference import load_model, predict_one, predict_batch, repo_snapshot
8
-
9
- HF_REPO_ID = os.getenv("HF_REPO_ID", "ethnmcl/test-score-predictor-xgb")
10
-
11
- app = FastAPI(title="Test Score Predictor API",
12
- version="1.0.0",
13
- description="FastAPI wrapper for ethnmcl/test-score-predictor-xgb")
14
-
15
- app.add_middleware(
16
- CORSMiddleware,
17
- allow_origins=["*"], allow_credentials=True,
18
- allow_methods=["*"], allow_headers=["*"],
19
- )
20
-
21
- # Load model at startup (downloads snapshot if not already present)
22
- @app.on_event("startup")
23
- def _startup():
24
- repo_snapshot(HF_REPO_ID) # ensures files exist locally
25
- load_model() # loads artifacts into process
26
-
27
-
28
- class Record(BaseModel):
29
- Subject: str = Field(..., examples=["Mathematics"])
30
- Current_Grade: int = Field(..., ge=60, le=98)
31
- Max_Test_Percentage: int = Field(..., ge=65, le=100)
32
- Days_Preparing: int = Field(..., ge=1, le=14)
33
- Hours_Studied: int = Field(..., ge=2, le=50)
34
- Study_Session_Average: float = Field(..., ge=0.1, le=10.0)
35
- Avg_Previous_Tests: int = Field(..., ge=55, le=95)
36
- Test_Difficulty: str = Field(..., examples=["Easy (20)", "Medium (30)", "Hard (50)"])
37
-
38
- @validator("Study_Session_Average", always=True)
39
- def recompute_session_avg(cls, v, values):
40
- # Keep dataset contract: Hours / Days, rounded to 1 decimal
41
- if "Hours_Studied" in values and "Days_Preparing" in values:
42
- h = values["Hours_Studied"]; d = values["Days_Preparing"]
43
- return round(h / d, 1)
44
- return v
45
-
46
-
47
- class PredictRequest(BaseModel):
48
- data: List[Record]
49
-
50
-
51
- @app.get("/health")
52
- def health() -> Dict[str, Any]:
53
- return {"status": "ok", "repo": HF_REPO_ID}
54
-
55
-
56
- @app.get("/model-info")
57
- def model_info() -> Dict[str, Any]:
58
- return {"repo": HF_REPO_ID, "files": ["preprocessor.joblib", "weights.npy", "xgb_model.json", "schema.json"]}
59
-
60
-
61
- @app.post("/predict")
62
- def predict(req: Record) -> Dict[str, Any]:
63
- score = predict_one(req.dict())
64
- return {"predicted_score": float(score)}
65
-
66
-
67
- @app.post("/predict-batch")
68
- def predict_many(req: PredictRequest) -> Dict[str, Any]:
69
- records = [r.dict() for r in req.data]
70
- scores = predict_batch(records)
71
- return {"predicted_scores": [float(s) for s in scores], "count": len(scores)}
 
1
+ import os, json, joblib, numpy as np, pandas as pd, threading
2
+ from huggingface_hub import snapshot_download
3
+ import xgboost as xgb
4
+ from pathlib import Path
5
+
6
+ HF_CACHE_DIR = os.getenv("HF_CACHE_DIR", "/models/hf")
7
+ HF_REPO_ID = os.getenv("HF_REPO_ID", "ethnmcl/test-score-predictor-xgb")
8
+ HF_TOKEN = os.getenv("HF_TOKEN", None) # set as Space secret for private repos
9
+
10
+ _loaded_lock = threading.Lock()
11
+ _loaded = False
12
+ _pre = None
13
+ _weights = None
14
+ _schema = None
15
+ _model = None
16
+
17
+ def repo_snapshot(repo_id: str = None) -> str:
18
+ repo_id = repo_id or HF_REPO_ID
19
+ local_dir = snapshot_download(
20
+ repo_id=repo_id,
21
+ local_dir=HF_CACHE_DIR,
22
+ local_dir_use_symlinks=False,
23
+ token=HF_TOKEN,
24
+ repo_type="model"
25
+ )
26
+ return local_dir
27
+
28
+ def load_model():
29
+ global _loaded, _pre, _weights, _schema, _model
30
+ if _loaded:
31
+ return
32
+ with _loaded_lock:
33
+ if _loaded:
34
+ return
35
+ base = Path(repo_snapshot(HF_REPO_ID))
36
+ _pre = joblib.load(base / "preprocessor.joblib")
37
+ _weights = np.load(base / "weights.npy")
38
+ with open(base / "schema.json") as f:
39
+ _schema = json.load(f)
40
+ _model = xgb.XGBRegressor()
41
+ _model.load_model(str(base / "xgb_model.json"))
42
+ _loaded = True
43
+
44
+ def _transform(records):
45
+ num = _schema["numeric"]; cat = _schema["categorical"]
46
+ df = pd.DataFrame(records, columns=num + cat)
47
+ Xt = _pre.transform(df)
48
+ Xt = Xt.astype(float, copy=False)
49
+ Xt[:, :len(num)] *= _weights # post-transform numeric weighting
50
+ return Xt
51
+
52
+ def predict_one(record: dict) -> float:
53
+ if not _loaded:
54
+ load_model()
55
+ Xt = _transform([record])
56
+ pred = float(_model.predict(Xt)[0])
57
+ return max(50.0, min(100.0, pred)) # optional clamp to match dataset range
58
+
59
+ def predict_batch(records: list) -> np.ndarray:
60
+ if not _loaded:
61
+ load_model()
62
+ Xt = _transform(records)
63
+ preds = _model.predict(Xt)
64
+ return np.clip(preds, 50.0, 100.0)
65