ckoozzzu
/

NextPlace

Model card Files Files and versions

xet

Community

ckoozzzu commited on May 28, 2025

Commit

fab83bb

verified ·

1 Parent(s): 1370b15

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

MLBaseModelDriver.py +49 -43

MLBaseModelDriver.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import torch
 import sys
 import pandas as pd
 from typing import TypedDict, Optional, Tuple
 import datetime
 import math
@@ -8,12 +10,57 @@ import importlib.util
 from huggingface_hub import hf_hub_download
 import pickle
 """
 Data container class representing the data shape of the synapse coming into `run_inference`
 """
 class ProcessedSynapse(TypedDict):
     id: Optional[str]
     nextplace_id: Optional[str]
@@ -38,24 +85,12 @@ class ProcessedSynapse(TypedDict):
     query_date: Optional[str]
-"""
-This class must do two things
-1) The constructor must load the model
-2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
-    of float, str representing the predicted sale price and the predicted sale date.
-"""
 class MLBaseModelDriver:
     def __init__(self):
         self.model, self.label_encoder, self.scaler = self.load_model()
     def load_model(self) -> Tuple[any, any, any]:
-        """
-        load the model and model parameters
-        :return: model, label encoder, and scaler
-        """
         print(f"Loading model...")
         model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
         model_class = self._import_model_class(model_class_file)
@@ -65,7 +100,6 @@ class MLBaseModelDriver:
         model.load_state_dict(state_dict)
         model.eval()
-        # Load additional artifacts
         with open(scaler_file, 'rb') as f:
             scaler = pickle.load(f)
@@ -76,28 +110,16 @@ class MLBaseModelDriver:
         return model, label_encoders, scaler
     def _download_model_files(self) -> Tuple[str, str, str, str]:
-        """
-        download files from hugging face
-        :return: downloaded files
-        """
         model_path = "ckoozzzu/NextPlace"
-        # Download the model files from the Hugging Face Hub
         model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
         scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
         label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
         model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
-        # Load the model and artifacts
         return model_file, scaler_file, label_encoders_file, model_class_file
     def _import_model_class(self, model_class_file):
-        """
-        import the model class and instantiate it
-        :param model_class_file: file path to the model class
-        :return: None
-        """
-        # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
         module_name = "MLBaseModel"
         spec = importlib.util.spec_from_file_location(module_name, model_class_file)
         model_module = importlib.util.module_from_spec(spec)
@@ -110,11 +132,6 @@ class MLBaseModelDriver:
             raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
     def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
-        """
-        run inference using the MLBaseModel
-        :param input_data: synapse from the validator
-        :return: the predicted sale price and date
-        """
         input_tensor = self._preprocess_input(input_data)
         with torch.no_grad():
@@ -126,12 +143,6 @@ class MLBaseModelDriver:
         return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
     def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
-        """
-        convert predicted days on market to a sale date
-        :param days_on_market: number of days this home has been on the market
-        :param predicted_days_on_market: the predicted number of days for this home on the market
-        :return: the predicted sale date
-        """
         if days_on_market < predicted_days_on_market:
             days_until_sale = predicted_days_on_market - days_on_market
             sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
@@ -140,11 +151,6 @@ class MLBaseModelDriver:
             return datetime.date.today() + datetime.timedelta(days=1)
     def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
-        """
-        preprocess the input for inference
-        :param data: synapse from the validator
-        :return: tensor representing the synapse
-        """
         df = pd.DataFrame([data])
         default_beds = 3
         default_sqft = 1500.0

 import torch
 import sys
 import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from typing import TypedDict, Optional, Tuple
 import datetime
 import math
 from huggingface_hub import hf_hub_download
 import pickle
+# Класс предобработки
+class DataPreprocessor:
+    def __init__(self):
+        self.feature_scaler = StandardScaler()
+        self.target_scaler = StandardScaler()
+        self.encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
+    def fit_transform(self, df):
+        df['listing_date'] = pd.to_datetime(df['listing_date'])
+        df['sale_date'] = pd.to_datetime(df['sale_date'])
+        df['days_on_market'] = (df['sale_date'] - df['listing_date']).dt.days
+        df['age'] = df['listing_date'].dt.year - df['year_built']
+        df = df[df['days_on_market'] >= 0].dropna(subset=['days_on_market'])
+        df = df.fillna({
+            'beds': df['beds'].median(),
+            'baths': df['baths'].median(),
+            'sqft': df['sqft'].median(),
+            'year_built': df['year_built'].median(),
+            'listing_price': df['listing_price'].median(),
+            'age': df['age'].median()
+        })
+        df = df[(df['sale_price'] > 50000) & (df['sale_price'] < 2000000)]
+        cat_feature = self.encoder.fit_transform(df[['property_type']])
+        cat_df = pd.DataFrame(cat_feature, columns=self.encoder.get_feature_names_out(['property_type']))
+        df = df.reset_index(drop=True).join(cat_df)
+        for col in ['sale_price', 'listing_price', 'sqft']:
+            df[col] = np.log1p(df[col])
+        features = ['beds', 'baths', 'sqft', 'listing_price', 'days_on_market', 'age'] + list(cat_df.columns)
+        targets = ['sale_price']
+        X = df[features]
+        y = df[['sale_price']]
+        X_scaled = self.feature_scaler.fit_transform(X)
+        y_scaled = self.target_scaler.fit_transform(y)
+        self.features = features
+        return pd.DataFrame(X_scaled, columns=features), pd.DataFrame(y_scaled, columns=targets)
+    def inverse_transform_target(self, y_scaled):
+        return np.expm1(self.target_scaler.inverse_transform(y_scaled.reshape(-1, 1)).flatten())
 """
 Data container class representing the data shape of the synapse coming into `run_inference`
 """
 class ProcessedSynapse(TypedDict):
     id: Optional[str]
     nextplace_id: Optional[str]
     query_date: Optional[str]
 class MLBaseModelDriver:
     def __init__(self):
         self.model, self.label_encoder, self.scaler = self.load_model()
     def load_model(self) -> Tuple[any, any, any]:
         print(f"Loading model...")
         model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
         model_class = self._import_model_class(model_class_file)
         model.load_state_dict(state_dict)
         model.eval()
         with open(scaler_file, 'rb') as f:
             scaler = pickle.load(f)
         return model, label_encoders, scaler
     def _download_model_files(self) -> Tuple[str, str, str, str]:
         model_path = "ckoozzzu/NextPlace"
         model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
         scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
         label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
         model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
         return model_file, scaler_file, label_encoders_file, model_class_file
     def _import_model_class(self, model_class_file):
         module_name = "MLBaseModel"
         spec = importlib.util.spec_from_file_location(module_name, model_class_file)
         model_module = importlib.util.module_from_spec(spec)
             raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
     def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
         input_tensor = self._preprocess_input(input_data)
         with torch.no_grad():
         return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
     def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
         if days_on_market < predicted_days_on_market:
             days_until_sale = predicted_days_on_market - days_on_market
             sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
             return datetime.date.today() + datetime.timedelta(days=1)
     def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
         df = pd.DataFrame([data])
         default_beds = 3
         default_sqft = 1500.0