maxachis commited on
Commit
ec19c57
·
1 Parent(s): d8d9050

Set up logic for loading from Huggingface

Browse files
Files changed (3) hide show
  1. handler.py +15 -19
  2. pyproject.toml +1 -0
  3. uv.lock +0 -0
handler.py CHANGED
@@ -4,34 +4,30 @@ import spacy
4
  from sklearn.datasets import make_classification
5
  from sklearn.linear_model import LogisticRegression
6
  from sklearn.model_selection import train_test_split
 
 
7
 
8
  SPACY_MODEL = spacy.load('en_core_web_trf', disable=['parser']) # Largest, slowest, most accurate model
9
 
 
10
 
11
- class EndpointHandler:
12
- def __init__(self, path: str):
13
- # model_dir = os.getenv("HF_MODEL_DIR", ".")
14
- #
15
- # with open(os.path.join(model_dir, "model.pkl"), "rb") as f:
16
- # self.model = pickle.load(f)
17
- #
18
- # # optional: you could also load a vocabulary or vectorizer
19
- # with open(os.path.join(model_dir, "tokenizer.pkl"), "rb") as f:
20
- # self.vectorizer = pickle.load(f)
21
-
22
- # 1. Generate synthetic binary classification data
23
- X, y = make_classification(n_samples=100, n_features=4, n_classes=2, random_state=42)
24
 
25
- # 2. Split into train/test sets
26
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
27
 
28
- # 3. Create and train the Logistic Regression model
29
- self.model = LogisticRegression()
30
- self.model.fit(X_train, y_train)
31
 
 
 
 
 
32
 
 
 
 
 
 
 
 
33
 
34
  def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]:
35
  # Expecting input like: {"inputs": "<html>...</html>"}
36
  html = inputs["inputs"]
37
- return {"label": str(1)}
 
4
  from sklearn.datasets import make_classification
5
  from sklearn.linear_model import LogisticRegression
6
  from sklearn.model_selection import train_test_split
7
+ from huggingface_hub import hf_hub_download
8
+ from joblib import load
9
 
10
  SPACY_MODEL = spacy.load('en_core_web_trf', disable=['parser']) # Largest, slowest, most accurate model
11
 
12
+ from environs import Env
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
15
 
 
 
 
16
 
17
+ class EndpointHandler:
18
+ def __init__(self, path: str):
19
+ env = Env()
20
+ env.read_env()
21
 
22
+ model_path = env.str("MODEL_PATH")
23
+ downloaded_model_path = hf_hub_download(
24
+ repo_id="PDAP/url-relevance-models",
25
+ subfolder=model_path,
26
+ filename="model.joblib"
27
+ )
28
+ self.model = load(downloaded_model_path)
29
 
30
  def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]:
31
  # Expecting input like: {"inputs": "<html>...</html>"}
32
  html = inputs["inputs"]
33
+ return {"label": str(self.model)}
pyproject.toml CHANGED
@@ -5,6 +5,7 @@ description = "Add your description here"
5
  readme = "README.md"
6
  requires-python = ">=3.13"
7
  dependencies = [
 
8
  "huggingface-hub>=0.33.2",
9
  "scikit-learn>=1.7.0",
10
  "spacy>=3.8.7",
 
5
  readme = "README.md"
6
  requires-python = ">=3.13"
7
  dependencies = [
8
+ "environs>=14.2.0",
9
  "huggingface-hub>=0.33.2",
10
  "scikit-learn>=1.7.0",
11
  "spacy>=3.8.7",
uv.lock CHANGED
The diff for this file is too large to render. See raw diff