ckoozzzu commited on
Commit
fab83bb
·
verified ·
1 Parent(s): 1370b15

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. MLBaseModelDriver.py +49 -43
MLBaseModelDriver.py CHANGED
@@ -1,6 +1,8 @@
1
  import torch
2
  import sys
3
  import pandas as pd
 
 
4
  from typing import TypedDict, Optional, Tuple
5
  import datetime
6
  import math
@@ -8,12 +10,57 @@ import importlib.util
8
  from huggingface_hub import hf_hub_download
9
  import pickle
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  """
13
  Data container class representing the data shape of the synapse coming into `run_inference`
14
  """
15
-
16
-
17
  class ProcessedSynapse(TypedDict):
18
  id: Optional[str]
19
  nextplace_id: Optional[str]
@@ -38,24 +85,12 @@ class ProcessedSynapse(TypedDict):
38
  query_date: Optional[str]
39
 
40
 
41
- """
42
- This class must do two things
43
- 1) The constructor must load the model
44
- 2) This class must implement a method called `run_inference` that takes the input data and returns a tuple
45
- of float, str representing the predicted sale price and the predicted sale date.
46
- """
47
-
48
-
49
  class MLBaseModelDriver:
50
 
51
  def __init__(self):
52
  self.model, self.label_encoder, self.scaler = self.load_model()
53
 
54
  def load_model(self) -> Tuple[any, any, any]:
55
- """
56
- load the model and model parameters
57
- :return: model, label encoder, and scaler
58
- """
59
  print(f"Loading model...")
60
  model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
61
  model_class = self._import_model_class(model_class_file)
@@ -65,7 +100,6 @@ class MLBaseModelDriver:
65
  model.load_state_dict(state_dict)
66
  model.eval()
67
 
68
- # Load additional artifacts
69
  with open(scaler_file, 'rb') as f:
70
  scaler = pickle.load(f)
71
 
@@ -76,28 +110,16 @@ class MLBaseModelDriver:
76
  return model, label_encoders, scaler
77
 
78
  def _download_model_files(self) -> Tuple[str, str, str, str]:
79
- """
80
- download files from hugging face
81
- :return: downloaded files
82
- """
83
  model_path = "ckoozzzu/NextPlace"
84
 
85
- # Download the model files from the Hugging Face Hub
86
  model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
87
  scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
88
  label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
89
  model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
90
 
91
- # Load the model and artifacts
92
  return model_file, scaler_file, label_encoders_file, model_class_file
93
 
94
  def _import_model_class(self, model_class_file):
95
- """
96
- import the model class and instantiate it
97
- :param model_class_file: file path to the model class
98
- :return: None
99
- """
100
- # Reference docs here: https://docs.python.org/3/library/importlib.html#importlib.util.spec_from_loader
101
  module_name = "MLBaseModel"
102
  spec = importlib.util.spec_from_file_location(module_name, model_class_file)
103
  model_module = importlib.util.module_from_spec(spec)
@@ -110,11 +132,6 @@ class MLBaseModelDriver:
110
  raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
111
 
112
  def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
113
- """
114
- run inference using the MLBaseModel
115
- :param input_data: synapse from the validator
116
- :return: the predicted sale price and date
117
- """
118
  input_tensor = self._preprocess_input(input_data)
119
 
120
  with torch.no_grad():
@@ -126,12 +143,6 @@ class MLBaseModelDriver:
126
  return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
127
 
128
  def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
129
- """
130
- convert predicted days on market to a sale date
131
- :param days_on_market: number of days this home has been on the market
132
- :param predicted_days_on_market: the predicted number of days for this home on the market
133
- :return: the predicted sale date
134
- """
135
  if days_on_market < predicted_days_on_market:
136
  days_until_sale = predicted_days_on_market - days_on_market
137
  sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
@@ -140,11 +151,6 @@ class MLBaseModelDriver:
140
  return datetime.date.today() + datetime.timedelta(days=1)
141
 
142
  def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
143
- """
144
- preprocess the input for inference
145
- :param data: synapse from the validator
146
- :return: tensor representing the synapse
147
- """
148
  df = pd.DataFrame([data])
149
  default_beds = 3
150
  default_sqft = 1500.0
 
1
  import torch
2
  import sys
3
  import pandas as pd
4
+ import numpy as np
5
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
  from typing import TypedDict, Optional, Tuple
7
  import datetime
8
  import math
 
10
  from huggingface_hub import hf_hub_download
11
  import pickle
12
 
13
+ # Класс предобработки
14
+ class DataPreprocessor:
15
+ def __init__(self):
16
+ self.feature_scaler = StandardScaler()
17
+ self.target_scaler = StandardScaler()
18
+ self.encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
19
+
20
+ def fit_transform(self, df):
21
+ df['listing_date'] = pd.to_datetime(df['listing_date'])
22
+ df['sale_date'] = pd.to_datetime(df['sale_date'])
23
+ df['days_on_market'] = (df['sale_date'] - df['listing_date']).dt.days
24
+ df['age'] = df['listing_date'].dt.year - df['year_built']
25
+ df = df[df['days_on_market'] >= 0].dropna(subset=['days_on_market'])
26
+
27
+ df = df.fillna({
28
+ 'beds': df['beds'].median(),
29
+ 'baths': df['baths'].median(),
30
+ 'sqft': df['sqft'].median(),
31
+ 'year_built': df['year_built'].median(),
32
+ 'listing_price': df['listing_price'].median(),
33
+ 'age': df['age'].median()
34
+ })
35
+
36
+ df = df[(df['sale_price'] > 50000) & (df['sale_price'] < 2000000)]
37
+
38
+ cat_feature = self.encoder.fit_transform(df[['property_type']])
39
+ cat_df = pd.DataFrame(cat_feature, columns=self.encoder.get_feature_names_out(['property_type']))
40
+ df = df.reset_index(drop=True).join(cat_df)
41
+
42
+ for col in ['sale_price', 'listing_price', 'sqft']:
43
+ df[col] = np.log1p(df[col])
44
+
45
+ features = ['beds', 'baths', 'sqft', 'listing_price', 'days_on_market', 'age'] + list(cat_df.columns)
46
+ targets = ['sale_price']
47
+
48
+ X = df[features]
49
+ y = df[['sale_price']]
50
+
51
+ X_scaled = self.feature_scaler.fit_transform(X)
52
+ y_scaled = self.target_scaler.fit_transform(y)
53
+
54
+ self.features = features
55
+ return pd.DataFrame(X_scaled, columns=features), pd.DataFrame(y_scaled, columns=targets)
56
+
57
+ def inverse_transform_target(self, y_scaled):
58
+ return np.expm1(self.target_scaler.inverse_transform(y_scaled.reshape(-1, 1)).flatten())
59
+
60
 
61
  """
62
  Data container class representing the data shape of the synapse coming into `run_inference`
63
  """
 
 
64
  class ProcessedSynapse(TypedDict):
65
  id: Optional[str]
66
  nextplace_id: Optional[str]
 
85
  query_date: Optional[str]
86
 
87
 
 
 
 
 
 
 
 
 
88
  class MLBaseModelDriver:
89
 
90
  def __init__(self):
91
  self.model, self.label_encoder, self.scaler = self.load_model()
92
 
93
  def load_model(self) -> Tuple[any, any, any]:
 
 
 
 
94
  print(f"Loading model...")
95
  model_file, scaler_file, label_encoders_file, model_class_file = self._download_model_files()
96
  model_class = self._import_model_class(model_class_file)
 
100
  model.load_state_dict(state_dict)
101
  model.eval()
102
 
 
103
  with open(scaler_file, 'rb') as f:
104
  scaler = pickle.load(f)
105
 
 
110
  return model, label_encoders, scaler
111
 
112
  def _download_model_files(self) -> Tuple[str, str, str, str]:
 
 
 
 
113
  model_path = "ckoozzzu/NextPlace"
114
 
 
115
  model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
116
  scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
117
  label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
118
  model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
119
 
 
120
  return model_file, scaler_file, label_encoders_file, model_class_file
121
 
122
  def _import_model_class(self, model_class_file):
 
 
 
 
 
 
123
  module_name = "MLBaseModel"
124
  spec = importlib.util.spec_from_file_location(module_name, model_class_file)
125
  model_module = importlib.util.module_from_spec(spec)
 
132
  raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
133
 
134
  def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
 
 
 
 
 
135
  input_tensor = self._preprocess_input(input_data)
136
 
137
  with torch.no_grad():
 
143
  return float(predicted_sale_price), predicted_sale_date.strftime("%Y-%m-%d")
144
 
145
  def _sale_date_predictor(self, days_on_market: int, predicted_days_on_market: int) -> datetime.date:
 
 
 
 
 
 
146
  if days_on_market < predicted_days_on_market:
147
  days_until_sale = predicted_days_on_market - days_on_market
148
  sale_date = datetime.date.today() + datetime.timedelta(days=days_until_sale)
 
151
  return datetime.date.today() + datetime.timedelta(days=1)
152
 
153
  def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
 
 
 
 
 
154
  df = pd.DataFrame([data])
155
  default_beds = 3
156
  default_sqft = 1500.0