Spaces:

sathishaiuse
/

Tourism-Package

Sleeping

App Files Files Community

sathishaiuse commited on Dec 5, 2025

Commit

3b5d784

verified ·

1 Parent(s): 51547bb

Update predict_utils.py

Browse files

Files changed (1) hide show

predict_utils.py +148 -100

predict_utils.py CHANGED Viewed

@@ -1,8 +1,10 @@
 # predict_utils.py
-# Robust loader with extended monkey-patches for XGBoost and scikit-learn compatibility.
 import os
 import logging
 import joblib
 from huggingface_hub import hf_hub_download
 # Logging
@@ -21,76 +23,68 @@ LOCAL_CANDIDATES = [
 ]
 # -------------------------
-# Monkey-patch scikit-learn to add missing tags/APIs used during unpickling
 # -------------------------
-def ensure_sklearn_compat():
     try:
         import sklearn
         from sklearn.base import BaseEstimator
     except Exception as e:
-        logger.debug(f"scikit-learn not importable for patching: {e}")
         return
-    # If older/newer pickles expect 'sklearn_tags' attribute/method on BaseEstimator, provide it.
-    try:
-        if not hasattr(BaseEstimator, "sklearn_tags"):
-            # Provide a method attribute that returns an empty dict by default.
-            def _sklearn_tags(self):
-                # Estimators can override by defining sklearn_tags attribute at instance/class level.
-                return {}
             setattr(BaseEstimator, "sklearn_tags", _sklearn_tags)
-            logger.info("Patched BaseEstimator.sklearn_tags() -> default {}")
-    except Exception as e:
-        logger.debug(f"Could not patch BaseEstimator.sklearn_tags: {e}")
-    # Ensure _get_tags exists (some older/newer flows call this)
-    try:
-        if not hasattr(BaseEstimator, "_get_tags"):
-            def _get_tags(self):
-                # If estimator defines _more_tags, call it to merge tags; otherwise use sklearn_tags if present.
-                tags = {}
-                # _more_tags (newer style)
-                more = getattr(self, "_more_tags", None)
-                if callable(more):
-                    try:
-                        tags.update(more())
-                    except Exception:
-                        pass
-                # fallback to sklearn_tags method if present
-                st = getattr(self, "sklearn_tags", None)
-                if callable(st):
-                    try:
-                        tags.update(st())
-                    except Exception:
-                        pass
-                return tags
             setattr(BaseEstimator, "_get_tags", _get_tags)
             logger.info("Patched BaseEstimator._get_tags()")
-    except Exception as e:
-        logger.debug(f"Could not patch BaseEstimator._get_tags: {e}")
-    # Provide a safe _more_tags no-op if missing on class-level to avoid AttributeError
-    try:
-        if not hasattr(BaseEstimator, "_more_tags"):
-            def _more_tags(self):
-                return {}
             setattr(BaseEstimator, "_more_tags", _more_tags)
-            logger.info("Patched BaseEstimator._more_tags() -> default {}")
-    except Exception as e:
-        logger.debug(f"Could not patch BaseEstimator._more_tags: {e}")
-# -------------------------
-# Monkey-patch xgboost sklearn wrappers & base class to add missing attributes.
-# Handles 'use_label_encoder', 'gpu_id', 'predictor', etc.
-# -------------------------
-def ensure_xgb_sklearn_compat():
     try:
         import xgboost as xgb
     except Exception as e:
-        logger.debug(f"xgboost not importable for patching: {e}")
         return
-    # Base class: XGBModel (add common attrs)
     XGBModel = getattr(xgb, "XGBModel", None)
     if XGBModel is not None:
         for attr, val in {
@@ -108,7 +102,6 @@ def ensure_xgb_sklearn_compat():
             except Exception as e:
                 logger.debug(f"Could not patch XGBModel.{attr}: {e}")
-    # XGBClassifier and XGBRegressor class-level defaults
     for cls_name in ("XGBClassifier", "XGBRegressor"):
         cls = getattr(xgb, cls_name, None)
         if cls is not None:
@@ -127,14 +120,12 @@ def ensure_xgb_sklearn_compat():
                 except Exception as e:
                     logger.debug(f"Could not patch {cls_name}.{attr}: {e}")
-# -------------------------
-# Call compatibility patches early so joblib.load has them available
-# -------------------------
-ensure_sklearn_compat()
-ensure_xgb_sklearn_compat()
 # -------------------------
-# Helpers: file inspection and loader attempts
 # -------------------------
 def inspect_file(path):
     info = {"path": path, "exists": False}
@@ -148,17 +139,18 @@ def inspect_file(path):
         info["head_bytes"] = head
         try:
             info["head_text"] = head.decode("utf-8", errors="replace")
-        except:
             info["head_text"] = None
     except Exception as e:
         info["inspect_error"] = str(e)
     return info
 def try_joblib_load(path):
     try:
-        # ensure patches just before load (in case of lazy imports)
-        ensure_sklearn_compat()
-        ensure_xgb_sklearn_compat()
         logger.info(f"Trying joblib.load on {path}")
         m = joblib.load(path)
         logger.info("joblib.load succeeded")
@@ -167,7 +159,54 @@ def try_joblib_load(path):
         logger.exception(f"joblib.load failed: {e}")
         return ("joblib", e)
 def try_xgboost_booster(path):
     try:
         import xgboost as xgb
     except Exception as e:
@@ -179,12 +218,10 @@ def try_xgboost_booster(path):
         booster = xgb.Booster()
         booster.load_model(path)
         logger.info("xgboost.Booster.load_model succeeded")
         class BoosterWrapper:
             def __init__(self, booster):
                 self.booster = booster
                 self._is_xgb_booster = True
             def predict(self, X):
                 import numpy as _np, xgboost as _xgb
                 arr = _np.array(X, dtype=float)
@@ -193,7 +230,6 @@ def try_xgboost_booster(path):
                 if hasattr(pred, "ndim") and pred.ndim == 1:
                     return (_np.where(pred >= 0.5, 1, 0)).tolist()
                 return pred.tolist()
             def predict_proba(self, X):
                 import numpy as _np, xgboost as _xgb
                 arr = _np.array(X, dtype=float)
@@ -202,14 +238,13 @@ def try_xgboost_booster(path):
                 if hasattr(pred, "ndim") and pred.ndim == 1:
                     return (_np.vstack([1 - pred, pred]).T).tolist()
                 return pred.tolist()
         return ("xgboost_booster", BoosterWrapper(booster))
     except Exception as e:
         logger.exception(f"xgboost.Booster.load_model failed: {e}")
         return ("xgboost_booster", e)
 # -------------------------
-# Core loader
 # -------------------------
 def load_model():
     logger.info("==== MODEL LOAD START ====")
@@ -217,7 +252,7 @@ def load_model():
     logger.info(f"Filename: {HF_MODEL_FILENAME}")
     logger.info(f"HF_TOKEN present? {bool(HF_TOKEN)}")
-    # Try local candidate paths
     for path in LOCAL_CANDIDATES:
         try:
             info = inspect_file(path)
@@ -229,14 +264,27 @@ def load_model():
             if t == "joblib" and not isinstance(res, Exception):
                 return res
-            t, res = try_xgboost_booster(path)
-            if t == "xgboost_booster" and not isinstance(res, Exception):
-                return res
         except Exception as e:
             logger.exception(f"Error while trying local candidate {path}: {e}")
-    # Try huggingface hub download
     try:
         logger.info(f"Trying hf_hub_download from {HF_MODEL_REPO}/{HF_MODEL_FILENAME}")
         model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=HF_MODEL_FILENAME, token=HF_TOKEN)
@@ -248,27 +296,31 @@ def load_model():
         if t == "joblib" and not isinstance(res, Exception):
             return res
-        t, res = try_xgboost_booster(model_path)
-        if t == "xgboost_booster" and not isinstance(res, Exception):
-            return res
-        logger.error("Tried joblib and xgboost loader on downloaded file but both failed.")
         return None
     except Exception as e:
         logger.exception(f"hf_hub_download failed: {e}")
         return None
 # -------------------------
-# Robust predict
 # -------------------------
 def predict(model, features):
-    """
-    Accepts:
-      - dict (col_name -> value)  -> builds a single-row pandas.DataFrame preserving key order
-      - list/tuple -> single row (numeric)
-      - list-of-lists -> batch
-    Returns: {"prediction": ..., "probability": ...} or {"error": "..."}
-    """
     if model is None:
         return {"error": "Model not loaded"}
@@ -278,12 +330,11 @@ def predict(model, features):
         is_booster = hasattr(model, "_is_xgb_booster")
-        # dict -> DataFrame
         if isinstance(features, dict):
-            col_names = [str(k) for k in features.keys()]
-            row_values = [features[k] for k in features.keys()]
-            df = _pd.DataFrame([row_values], columns=col_names)
-            logger.info(f"Prepared DataFrame for prediction with columns: {col_names}")
             if is_booster:
                 arr = df.values.astype(float)
@@ -295,8 +346,7 @@ def predict(model, features):
                         prob = float(p[0][1])
                     except:
                         prob = None
-                pred_val = int(preds[0]) if isinstance(preds, (list, tuple)) else int(preds)
-                return {"prediction": pred_val, "probability": prob}
             if hasattr(model, "predict"):
                 pred = model.predict(df)[0]
@@ -313,10 +363,10 @@ def predict(model, features):
                     pass
                 return {"prediction": pred, "probability": prob}
-            return {"error": "Loaded model object not recognized (no predict method)"}
-        # list/tuple single row
-        if isinstance(features, (list, tuple)):
             arr2d = _np.array([features], dtype=float)
             if is_booster:
                 preds = model.predict(arr2d)
@@ -327,8 +377,7 @@ def predict(model, features):
                         prob = float(p[0][1])
                     except:
                         prob = None
-                pred_val = int(preds[0]) if isinstance(preds, (list, tuple)) else int(preds)
-                return {"prediction": pred_val, "probability": prob}
             if hasattr(model, "predict"):
                 try:
@@ -392,7 +441,6 @@ def predict(model, features):
                     return {"prediction": pred.tolist(), "probability": prob}
         return {"error": "Unsupported features format. Provide dict (col->val) or list of values."}
     except Exception as e:
         logger.exception(f"Prediction error: {e}")
         return {"error": str(e)}

 # predict_utils.py
+# Robust loader with upfront patches + manual-unpickle fallback for sklearn/xgboost compatibility.
 import os
 import logging
 import joblib
+import io
+import pickle
 from huggingface_hub import hf_hub_download
 # Logging
 ]
 # -------------------------
+# Upfront compatibility patches (run at import time)
 # -------------------------
+def patch_sklearn_base():
+    """Make sure BaseEstimator exposes sklearn_tags/_get_tags/_more_tags used during unpickling."""
     try:
         import sklearn
         from sklearn.base import BaseEstimator
     except Exception as e:
+        logger.debug(f"sklearn not available to patch: {e}")
         return
+    # Provide sklearn_tags method if missing
+    if not hasattr(BaseEstimator, "sklearn_tags"):
+        def _sklearn_tags(self):
+            return {}
+        try:
             setattr(BaseEstimator, "sklearn_tags", _sklearn_tags)
+            logger.info("Patched BaseEstimator.sklearn_tags()")
+        except Exception as e:
+            logger.debug(f"Could not set BaseEstimator.sklearn_tags: {e}")
+    # Provide _get_tags if missing
+    if not hasattr(BaseEstimator, "_get_tags"):
+        def _get_tags(self):
+            tags = {}
+            more = getattr(self, "_more_tags", None)
+            if callable(more):
+                try:
+                    tags.update(more())
+                except Exception:
+                    pass
+            st = getattr(self, "sklearn_tags", None)
+            if callable(st):
+                try:
+                    tags.update(st())
+                except Exception:
+                    pass
+            return tags
+        try:
             setattr(BaseEstimator, "_get_tags", _get_tags)
             logger.info("Patched BaseEstimator._get_tags()")
+        except Exception as e:
+            logger.debug(f"Could not set BaseEstimator._get_tags: {e}")
+    # Provide a default _more_tags if missing
+    if not hasattr(BaseEstimator, "_more_tags"):
+        def _more_tags(self):
+            return {}
+        try:
             setattr(BaseEstimator, "_more_tags", _more_tags)
+            logger.info("Patched BaseEstimator._more_tags()")
+        except Exception as e:
+            logger.debug(f"Could not set BaseEstimator._more_tags: {e}")
+def patch_xgboost_wrappers():
+    """Add common attributes expected by older pickles to XGBoost classes/base."""
     try:
         import xgboost as xgb
     except Exception as e:
+        logger.debug(f"xgboost not available to patch: {e}")
         return
     XGBModel = getattr(xgb, "XGBModel", None)
     if XGBModel is not None:
         for attr, val in {
             except Exception as e:
                 logger.debug(f"Could not patch XGBModel.{attr}: {e}")
     for cls_name in ("XGBClassifier", "XGBRegressor"):
         cls = getattr(xgb, cls_name, None)
         if cls is not None:
                 except Exception as e:
                     logger.debug(f"Could not patch {cls_name}.{attr}: {e}")
+# Apply upfront patches
+patch_sklearn_base()
+patch_xgboost_wrappers()
 # -------------------------
+# Helpers: inspect file & try loaders
 # -------------------------
 def inspect_file(path):
     info = {"path": path, "exists": False}
         info["head_bytes"] = head
         try:
             info["head_text"] = head.decode("utf-8", errors="replace")
+        except Exception:
             info["head_text"] = None
     except Exception as e:
         info["inspect_error"] = str(e)
     return info
 def try_joblib_load(path):
+    """Try standard joblib load. Return ("joblib", model) or ("joblib", exception)"""
     try:
+        # Re-apply patches immediately before load (cover lazy imports)
+        patch_sklearn_base()
+        patch_xgboost_wrappers()
         logger.info(f"Trying joblib.load on {path}")
         m = joblib.load(path)
         logger.info("joblib.load succeeded")
         logger.exception(f"joblib.load failed: {e}")
         return ("joblib", e)
+def manual_pickle_unpickle(path):
+    """
+    Last-resort: attempt to unpickle the raw file bytes with a custom Unpickler
+    that maps pickled references of sklearn base classes to the live patched classes.
+    This may succeed when joblib.load fails due to base-class method mismatches.
+    """
+    try:
+        data = open(path, "rb").read()
+    except Exception as e:
+        return ("manual_pickle", e)
+    class PatchedUnpickler(pickle.Unpickler):
+        def find_class(self, module, name):
+            # If pickle references sklearn.base.BaseEstimator, return the live patched class
+            if module.startswith("sklearn.") and name in ("BaseEstimator",):
+                try:
+                    from sklearn.base import BaseEstimator as LiveBase
+                    # ensure our patches are present
+                    try:
+                        if not hasattr(LiveBase, "sklearn_tags"):
+                            def _sklearn_tags(self): return {}
+                            setattr(LiveBase, "sklearn_tags", _sklearn_tags)
+                    except Exception:
+                        pass
+                    return LiveBase
+                except Exception:
+                    pass
+            # For xgboost wrappers, map to live classes if referenced
+            if module.startswith("xgboost.") and name in ("XGBClassifier", "XGBRegressor", "XGBModel"):
+                try:
+                    import xgboost as xgb
+                    cls = getattr(xgb, name, None)
+                    if cls is not None:
+                        return cls
+                except Exception:
+                    pass
+            return super().find_class(module, name)
+    try:
+        bio = io.BytesIO(data)
+        u = PatchedUnpickler(bio)
+        obj = u.load()
+        return ("manual_pickle", obj)
+    except Exception as e:
+        return ("manual_pickle", e)
 def try_xgboost_booster(path):
+    """Try loading file as a native xgboost booster (json/bin)"""
     try:
         import xgboost as xgb
     except Exception as e:
         booster = xgb.Booster()
         booster.load_model(path)
         logger.info("xgboost.Booster.load_model succeeded")
         class BoosterWrapper:
             def __init__(self, booster):
                 self.booster = booster
                 self._is_xgb_booster = True
             def predict(self, X):
                 import numpy as _np, xgboost as _xgb
                 arr = _np.array(X, dtype=float)
                 if hasattr(pred, "ndim") and pred.ndim == 1:
                     return (_np.where(pred >= 0.5, 1, 0)).tolist()
                 return pred.tolist()
             def predict_proba(self, X):
                 import numpy as _np, xgboost as _xgb
                 arr = _np.array(X, dtype=float)
                 if hasattr(pred, "ndim") and pred.ndim == 1:
                     return (_np.vstack([1 - pred, pred]).T).tolist()
                 return pred.tolist()
         return ("xgboost_booster", BoosterWrapper(booster))
     except Exception as e:
         logger.exception(f"xgboost.Booster.load_model failed: {e}")
         return ("xgboost_booster", e)
 # -------------------------
+# Main loader: try local -> try HF -> fallbacks
 # -------------------------
 def load_model():
     logger.info("==== MODEL LOAD START ====")
     logger.info(f"Filename: {HF_MODEL_FILENAME}")
     logger.info(f"HF_TOKEN present? {bool(HF_TOKEN)}")
+    # try local candidates
     for path in LOCAL_CANDIDATES:
         try:
             info = inspect_file(path)
             if t == "joblib" and not isinstance(res, Exception):
                 return res
+            # if joblib failed with sklearn_tags error, attempt manual unpickle
+            if t == "joblib" and isinstance(res, Exception):
+                msg = str(res)
+                if "sklearn_tags" in msg or "sklearn_tags" in getattr(res, "args", ()):
+                    logger.info("joblib.load failed with sklearn_tags; trying manual pickle unpickle fallback")
+                    tm, obj = manual_pickle_unpickle(path)
+                    if tm == "manual_pickle" and not isinstance(obj, Exception):
+                        logger.info("manual unpickle succeeded")
+                        return obj
+                    else:
+                        logger.error("manual unpickle did not succeed; continuing to other fallbacks")
+            # try native booster
+            t2, res2 = try_xgboost_booster(path)
+            if t2 == "xgboost_booster" and not isinstance(res2, Exception):
+                return res2
         except Exception as e:
             logger.exception(f"Error while trying local candidate {path}: {e}")
+    # try huggingface hub
     try:
         logger.info(f"Trying hf_hub_download from {HF_MODEL_REPO}/{HF_MODEL_FILENAME}")
         model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=HF_MODEL_FILENAME, token=HF_TOKEN)
         if t == "joblib" and not isinstance(res, Exception):
             return res
+        if t == "joblib" and isinstance(res, Exception):
+            msg = str(res)
+            if "sklearn_tags" in msg or "sklearn_tags" in getattr(res, "args", ()):
+                logger.info("joblib.load failed on downloaded file with sklearn_tags; trying manual unpickle fallback")
+                tm, obj = manual_pickle_unpickle(model_path)
+                if tm == "manual_pickle" and not isinstance(obj, Exception):
+                    logger.info("manual unpickle succeeded on downloaded file")
+                    return obj
+                else:
+                    logger.error("manual unpickle did not succeed on downloaded file")
+        t2, res2 = try_xgboost_booster(model_path)
+        if t2 == "xgboost_booster" and not isinstance(res2, Exception):
+            return res2
+        logger.error("Tried joblib/manual-unpickle and xgboost loader on downloaded file but all failed.")
         return None
     except Exception as e:
         logger.exception(f"hf_hub_download failed: {e}")
         return None
 # -------------------------
+# Prediction helper: accepts dict (col->val), list, or list-of-lists
 # -------------------------
 def predict(model, features):
     if model is None:
         return {"error": "Model not loaded"}
         is_booster = hasattr(model, "_is_xgb_booster")
+        # dict -> DataFrame (preserve key order)
         if isinstance(features, dict):
+            cols = [str(k) for k in features.keys()]
+            row = [features[k] for k in features.keys()]
+            df = _pd.DataFrame([row], columns=cols)
             if is_booster:
                 arr = df.values.astype(float)
                         prob = float(p[0][1])
                     except:
                         prob = None
+                return {"prediction": int(preds[0]) if isinstance(preds, (list,tuple)) else int(preds), "probability": prob}
             if hasattr(model, "predict"):
                 pred = model.predict(df)[0]
                     pass
                 return {"prediction": pred, "probability": prob}
+            return {"error": "Loaded model object not recognized"}
+        # list -> single row numeric
+        if isinstance(features, (list,tuple)):
             arr2d = _np.array([features], dtype=float)
             if is_booster:
                 preds = model.predict(arr2d)
                         prob = float(p[0][1])
                     except:
                         prob = None
+                return {"prediction": int(preds[0]), "probability": prob}
             if hasattr(model, "predict"):
                 try:
                     return {"prediction": pred.tolist(), "probability": prob}
         return {"error": "Unsupported features format. Provide dict (col->val) or list of values."}
     except Exception as e:
         logger.exception(f"Prediction error: {e}")
         return {"error": str(e)}