Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,71 +1,65 @@
|
|
| 1 |
-
|
| 2 |
-
from
|
| 3 |
-
|
| 4 |
-
from
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
from inference import load_model, predict_one, predict_batch, repo_snapshot
|
| 8 |
-
|
| 9 |
-
HF_REPO_ID = os.getenv("HF_REPO_ID", "ethnmcl/test-score-predictor-xgb")
|
| 10 |
-
|
| 11 |
-
app = FastAPI(title="Test Score Predictor API",
|
| 12 |
-
version="1.0.0",
|
| 13 |
-
description="FastAPI wrapper for ethnmcl/test-score-predictor-xgb")
|
| 14 |
-
|
| 15 |
-
app.add_middleware(
|
| 16 |
-
CORSMiddleware,
|
| 17 |
-
allow_origins=["*"], allow_credentials=True,
|
| 18 |
-
allow_methods=["*"], allow_headers=["*"],
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
# Load model at startup (downloads snapshot if not already present)
|
| 22 |
-
@app.on_event("startup")
|
| 23 |
-
def _startup():
|
| 24 |
-
repo_snapshot(HF_REPO_ID) # ensures files exist locally
|
| 25 |
-
load_model() # loads artifacts into process
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
class Record(BaseModel):
|
| 29 |
-
Subject: str = Field(..., examples=["Mathematics"])
|
| 30 |
-
Current_Grade: int = Field(..., ge=60, le=98)
|
| 31 |
-
Max_Test_Percentage: int = Field(..., ge=65, le=100)
|
| 32 |
-
Days_Preparing: int = Field(..., ge=1, le=14)
|
| 33 |
-
Hours_Studied: int = Field(..., ge=2, le=50)
|
| 34 |
-
Study_Session_Average: float = Field(..., ge=0.1, le=10.0)
|
| 35 |
-
Avg_Previous_Tests: int = Field(..., ge=55, le=95)
|
| 36 |
-
Test_Difficulty: str = Field(..., examples=["Easy (20)", "Medium (30)", "Hard (50)"])
|
| 37 |
-
|
| 38 |
-
@validator("Study_Session_Average", always=True)
|
| 39 |
-
def recompute_session_avg(cls, v, values):
|
| 40 |
-
# Keep dataset contract: Hours / Days, rounded to 1 decimal
|
| 41 |
-
if "Hours_Studied" in values and "Days_Preparing" in values:
|
| 42 |
-
h = values["Hours_Studied"]; d = values["Days_Preparing"]
|
| 43 |
-
return round(h / d, 1)
|
| 44 |
-
return v
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
class PredictRequest(BaseModel):
|
| 48 |
-
data: List[Record]
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
@app.get("/health")
|
| 52 |
-
def health() -> Dict[str, Any]:
|
| 53 |
-
return {"status": "ok", "repo": HF_REPO_ID}
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
@app.get("/model-info")
|
| 57 |
-
def model_info() -> Dict[str, Any]:
|
| 58 |
-
return {"repo": HF_REPO_ID, "files": ["preprocessor.joblib", "weights.npy", "xgb_model.json", "schema.json"]}
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
@app.post("/predict")
|
| 62 |
-
def predict(req: Record) -> Dict[str, Any]:
|
| 63 |
-
score = predict_one(req.dict())
|
| 64 |
-
return {"predicted_score": float(score)}
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
@app.post("/predict-batch")
|
| 68 |
-
def predict_many(req: PredictRequest) -> Dict[str, Any]:
|
| 69 |
-
records = [r.dict() for r in req.data]
|
| 70 |
-
scores = predict_batch(records)
|
| 71 |
-
return {"predicted_scores": [float(s) for s in scores], "count": len(scores)}
|
|
|
|
| 1 |
+
import os, json, joblib, numpy as np, pandas as pd, threading
|
| 2 |
+
from huggingface_hub import snapshot_download
|
| 3 |
+
import xgboost as xgb
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
HF_CACHE_DIR = os.getenv("HF_CACHE_DIR", "/models/hf")
|
| 7 |
+
HF_REPO_ID = os.getenv("HF_REPO_ID", "ethnmcl/test-score-predictor-xgb")
|
| 8 |
+
HF_TOKEN = os.getenv("HF_TOKEN", None) # set as Space secret for private repos
|
| 9 |
+
|
| 10 |
+
_loaded_lock = threading.Lock()
|
| 11 |
+
_loaded = False
|
| 12 |
+
_pre = None
|
| 13 |
+
_weights = None
|
| 14 |
+
_schema = None
|
| 15 |
+
_model = None
|
| 16 |
+
|
| 17 |
+
def repo_snapshot(repo_id: str = None) -> str:
|
| 18 |
+
repo_id = repo_id or HF_REPO_ID
|
| 19 |
+
local_dir = snapshot_download(
|
| 20 |
+
repo_id=repo_id,
|
| 21 |
+
local_dir=HF_CACHE_DIR,
|
| 22 |
+
local_dir_use_symlinks=False,
|
| 23 |
+
token=HF_TOKEN,
|
| 24 |
+
repo_type="model"
|
| 25 |
+
)
|
| 26 |
+
return local_dir
|
| 27 |
+
|
| 28 |
+
def load_model():
|
| 29 |
+
global _loaded, _pre, _weights, _schema, _model
|
| 30 |
+
if _loaded:
|
| 31 |
+
return
|
| 32 |
+
with _loaded_lock:
|
| 33 |
+
if _loaded:
|
| 34 |
+
return
|
| 35 |
+
base = Path(repo_snapshot(HF_REPO_ID))
|
| 36 |
+
_pre = joblib.load(base / "preprocessor.joblib")
|
| 37 |
+
_weights = np.load(base / "weights.npy")
|
| 38 |
+
with open(base / "schema.json") as f:
|
| 39 |
+
_schema = json.load(f)
|
| 40 |
+
_model = xgb.XGBRegressor()
|
| 41 |
+
_model.load_model(str(base / "xgb_model.json"))
|
| 42 |
+
_loaded = True
|
| 43 |
+
|
| 44 |
+
def _transform(records):
|
| 45 |
+
num = _schema["numeric"]; cat = _schema["categorical"]
|
| 46 |
+
df = pd.DataFrame(records, columns=num + cat)
|
| 47 |
+
Xt = _pre.transform(df)
|
| 48 |
+
Xt = Xt.astype(float, copy=False)
|
| 49 |
+
Xt[:, :len(num)] *= _weights # post-transform numeric weighting
|
| 50 |
+
return Xt
|
| 51 |
+
|
| 52 |
+
def predict_one(record: dict) -> float:
|
| 53 |
+
if not _loaded:
|
| 54 |
+
load_model()
|
| 55 |
+
Xt = _transform([record])
|
| 56 |
+
pred = float(_model.predict(Xt)[0])
|
| 57 |
+
return max(50.0, min(100.0, pred)) # optional clamp to match dataset range
|
| 58 |
+
|
| 59 |
+
def predict_batch(records: list) -> np.ndarray:
|
| 60 |
+
if not _loaded:
|
| 61 |
+
load_model()
|
| 62 |
+
Xt = _transform(records)
|
| 63 |
+
preds = _model.predict(Xt)
|
| 64 |
+
return np.clip(preds, 50.0, 100.0)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|