Set up logic for loading from Huggingface

Files changed (3) hide show

handler.py CHANGED Viewed

@@ -4,34 +4,30 @@ import spacy
 from sklearn.datasets import make_classification
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
 SPACY_MODEL = spacy.load('en_core_web_trf', disable=['parser'])  # Largest, slowest, most accurate model
-class EndpointHandler:
-    def __init__(self, path: str):
-        # model_dir = os.getenv("HF_MODEL_DIR", ".")
-        #
-        # with open(os.path.join(model_dir, "model.pkl"), "rb") as f:
-        #     self.model = pickle.load(f)
-        #
-        # # optional: you could also load a vocabulary or vectorizer
-        # with open(os.path.join(model_dir, "tokenizer.pkl"), "rb") as f:
-        #     self.vectorizer = pickle.load(f)
-        # 1. Generate synthetic binary classification data
-        X, y = make_classification(n_samples=100, n_features=4, n_classes=2, random_state=42)
-        # 2. Split into train/test sets
-        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-        # 3. Create and train the Logistic Regression model
-        self.model = LogisticRegression()
-        self.model.fit(X_train, y_train)
     def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]:
         # Expecting input like: {"inputs": "<html>...</html>"}
         html = inputs["inputs"]
-        return {"label": str(1)}

 from sklearn.datasets import make_classification
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
+from huggingface_hub import hf_hub_download
+from joblib import load
 SPACY_MODEL = spacy.load('en_core_web_trf', disable=['parser'])  # Largest, slowest, most accurate model
+from environs import Env
+class EndpointHandler:
+    def __init__(self, path: str):
+        env = Env()
+        env.read_env()
+        model_path = env.str("MODEL_PATH")
+        downloaded_model_path = hf_hub_download(
+            repo_id="PDAP/url-relevance-models",
+            subfolder=model_path,
+            filename="model.joblib"
+        )
+        self.model = load(downloaded_model_path)
     def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]:
         # Expecting input like: {"inputs": "<html>...</html>"}
         html = inputs["inputs"]
+        return {"label": str(self.model)}

pyproject.toml CHANGED Viewed

@@ -5,6 +5,7 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
     "huggingface-hub>=0.33.2",
     "scikit-learn>=1.7.0",
     "spacy>=3.8.7",

 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
+    "environs>=14.2.0",
     "huggingface-hub>=0.33.2",
     "scikit-learn>=1.7.0",
     "spacy>=3.8.7",

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff