| from typing import Dict, Any | |
| import spacy | |
| from sklearn.datasets import make_classification | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.model_selection import train_test_split | |
| SPACY_MODEL = spacy.load('en_core_web_trf', disable=['parser']) # Largest, slowest, most accurate model | |
| class EndpointHandler: | |
| def __init__(self, path: str): | |
| # model_dir = os.getenv("HF_MODEL_DIR", ".") | |
| # | |
| # with open(os.path.join(model_dir, "model.pkl"), "rb") as f: | |
| # self.model = pickle.load(f) | |
| # | |
| # # optional: you could also load a vocabulary or vectorizer | |
| # with open(os.path.join(model_dir, "tokenizer.pkl"), "rb") as f: | |
| # self.vectorizer = pickle.load(f) | |
| # 1. Generate synthetic binary classification data | |
| X, y = make_classification(n_samples=100, n_features=4, n_classes=2, random_state=42) | |
| # 2. Split into train/test sets | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # 3. Create and train the Logistic Regression model | |
| self.model = LogisticRegression() | |
| self.model.fit(X_train, y_train) | |
| def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]: | |
| # Expecting input like: {"inputs": "<html>...</html>"} | |
| html = inputs["inputs"] | |
| return {"label": str(1)} | |