Set up logic for loading from Huggingface
Browse files- handler.py +15 -19
- pyproject.toml +1 -0
- uv.lock +0 -0
handler.py
CHANGED
|
@@ -4,34 +4,30 @@ import spacy
|
|
| 4 |
from sklearn.datasets import make_classification
|
| 5 |
from sklearn.linear_model import LogisticRegression
|
| 6 |
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
|
| 7 |
|
| 8 |
SPACY_MODEL = spacy.load('en_core_web_trf', disable=['parser']) # Largest, slowest, most accurate model
|
| 9 |
|
|
|
|
| 10 |
|
| 11 |
-
class EndpointHandler:
|
| 12 |
-
def __init__(self, path: str):
|
| 13 |
-
# model_dir = os.getenv("HF_MODEL_DIR", ".")
|
| 14 |
-
#
|
| 15 |
-
# with open(os.path.join(model_dir, "model.pkl"), "rb") as f:
|
| 16 |
-
# self.model = pickle.load(f)
|
| 17 |
-
#
|
| 18 |
-
# # optional: you could also load a vocabulary or vectorizer
|
| 19 |
-
# with open(os.path.join(model_dir, "tokenizer.pkl"), "rb") as f:
|
| 20 |
-
# self.vectorizer = pickle.load(f)
|
| 21 |
-
|
| 22 |
-
# 1. Generate synthetic binary classification data
|
| 23 |
-
X, y = make_classification(n_samples=100, n_features=4, n_classes=2, random_state=42)
|
| 24 |
|
| 25 |
-
# 2. Split into train/test sets
|
| 26 |
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 27 |
|
| 28 |
-
# 3. Create and train the Logistic Regression model
|
| 29 |
-
self.model = LogisticRegression()
|
| 30 |
-
self.model.fit(X_train, y_train)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]:
|
| 35 |
# Expecting input like: {"inputs": "<html>...</html>"}
|
| 36 |
html = inputs["inputs"]
|
| 37 |
-
return {"label": str(
|
|
|
|
| 4 |
from sklearn.datasets import make_classification
|
| 5 |
from sklearn.linear_model import LogisticRegression
|
| 6 |
from sklearn.model_selection import train_test_split
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
from joblib import load
|
| 9 |
|
| 10 |
SPACY_MODEL = spacy.load('en_core_web_trf', disable=['parser']) # Largest, slowest, most accurate model
|
| 11 |
|
| 12 |
+
from environs import Env
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
|
|
|
|
|
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
class EndpointHandler:
|
| 18 |
+
def __init__(self, path: str):
|
| 19 |
+
env = Env()
|
| 20 |
+
env.read_env()
|
| 21 |
|
| 22 |
+
model_path = env.str("MODEL_PATH")
|
| 23 |
+
downloaded_model_path = hf_hub_download(
|
| 24 |
+
repo_id="PDAP/url-relevance-models",
|
| 25 |
+
subfolder=model_path,
|
| 26 |
+
filename="model.joblib"
|
| 27 |
+
)
|
| 28 |
+
self.model = load(downloaded_model_path)
|
| 29 |
|
| 30 |
def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]:
|
| 31 |
# Expecting input like: {"inputs": "<html>...</html>"}
|
| 32 |
html = inputs["inputs"]
|
| 33 |
+
return {"label": str(self.model)}
|
pyproject.toml
CHANGED
|
@@ -5,6 +5,7 @@ description = "Add your description here"
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.13"
|
| 7 |
dependencies = [
|
|
|
|
| 8 |
"huggingface-hub>=0.33.2",
|
| 9 |
"scikit-learn>=1.7.0",
|
| 10 |
"spacy>=3.8.7",
|
|
|
|
| 5 |
readme = "README.md"
|
| 6 |
requires-python = ">=3.13"
|
| 7 |
dependencies = [
|
| 8 |
+
"environs>=14.2.0",
|
| 9 |
"huggingface-hub>=0.33.2",
|
| 10 |
"scikit-learn>=1.7.0",
|
| 11 |
"spacy>=3.8.7",
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|