File size: 2,713 Bytes
e2d5c54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import joblib
import shutil
from huggingface_hub import hf_hub_download, HfApi
from typing import List

def download_model_from_hf(model_repo: str, model_filename: str = None, token: str = None, local_dir: str = "/app/model"):
    """
    Try to download the model file from HF model repo.
    If model_filename is None, attempt fallback names (best_overall_XGBoost, RandomForest, Bagging, DecisionTree).
    Returns local filepath.
    """
    os.makedirs(local_dir, exist_ok=True)
    api = HfApi(token=token)

    candidates = []
    if model_filename:
        candidates.append(model_filename)

    # fallback candidates (order of preference)
    candidates.extend([
        "best_overall_XGBoost.joblib",
        "best_overall_RandomForest.joblib",
        "best_overall_Bagging.joblib",
        "best_overall_DecisionTree.joblib",
        "best_XGBoost.joblib",
        "best_RandomForest.joblib",
        "best_Bagging.joblib",
        "best_DecisionTree.joblib",
    ])

    last_exception = None
    for fn in candidates:
        try:
            print(f"Trying to download '{fn}' from '{model_repo}' ...")
            remote = hf_hub_download(repo_id=model_repo, filename=fn, repo_type="model", use_auth_token=token)
            # hf_hub_download returns a cache path; copy into local_dir with same filename
            dest = os.path.join(local_dir, os.path.basename(remote))
            if remote != dest:
                shutil.copy(remote, dest)
            print("Downloaded model to:", dest)
            return dest
        except Exception as e:
            last_exception = e
            print(f"Could not download {fn}: {e}")

    # If we got here no candidate succeeded
    raise FileNotFoundError(f"Model not found in repo '{model_repo}'. Tried: {candidates}. Last error: {last_exception}")

def load_model(local_model_path: str):
    """Load joblib model/pipeline from given local path."""
    return joblib.load(local_model_path)

def inputs_to_dataframe(payload: dict, feature_order: List[str]):
    """
    Convert one record (dict) to dataframe with fixed column order.
    """
    import pandas as pd
    if isinstance(payload, dict):
        rows = [payload]
    elif isinstance(payload, list):
        rows = payload
    else:
        raise ValueError("Payload must be dict or list of dicts")

    df = pd.DataFrame(rows)
    # ensure columns exist
    for c in feature_order:
        if c not in df.columns:
            df[c] = pd.NA
    df = df[feature_order].copy()
    # try cast numeric columns where possible
    for col in df.columns:
        try:
            df[col] = pd.to_numeric(df[col], errors="ignore")
        except Exception:
            pass
    return df