ckoozzzu commited on
Commit
cb8c13e
·
verified ·
1 Parent(s): fab83bb

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. MLBaseModelDriver.py +21 -69
MLBaseModelDriver.py CHANGED
@@ -1,8 +1,6 @@
1
  import torch
2
  import sys
3
  import pandas as pd
4
- import numpy as np
5
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
  from typing import TypedDict, Optional, Tuple
7
  import datetime
8
  import math
@@ -10,57 +8,25 @@ import importlib.util
10
  from huggingface_hub import hf_hub_download
11
  import pickle
12
 
13
- # Класс предобработки
14
  class DataPreprocessor:
15
- def __init__(self):
16
- self.feature_scaler = StandardScaler()
17
- self.target_scaler = StandardScaler()
18
- self.encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
19
-
20
- def fit_transform(self, df):
21
- df['listing_date'] = pd.to_datetime(df['listing_date'])
22
- df['sale_date'] = pd.to_datetime(df['sale_date'])
23
- df['days_on_market'] = (df['sale_date'] - df['listing_date']).dt.days
24
- df['age'] = df['listing_date'].dt.year - df['year_built']
25
- df = df[df['days_on_market'] >= 0].dropna(subset=['days_on_market'])
26
-
27
- df = df.fillna({
28
- 'beds': df['beds'].median(),
29
- 'baths': df['baths'].median(),
30
- 'sqft': df['sqft'].median(),
31
- 'year_built': df['year_built'].median(),
32
- 'listing_price': df['listing_price'].median(),
33
- 'age': df['age'].median()
34
- })
35
-
36
- df = df[(df['sale_price'] > 50000) & (df['sale_price'] < 2000000)]
37
-
38
- cat_feature = self.encoder.fit_transform(df[['property_type']])
39
- cat_df = pd.DataFrame(cat_feature, columns=self.encoder.get_feature_names_out(['property_type']))
40
- df = df.reset_index(drop=True).join(cat_df)
41
-
42
- for col in ['sale_price', 'listing_price', 'sqft']:
43
- df[col] = np.log1p(df[col])
44
-
45
- features = ['beds', 'baths', 'sqft', 'listing_price', 'days_on_market', 'age'] + list(cat_df.columns)
46
- targets = ['sale_price']
47
-
48
- X = df[features]
49
- y = df[['sale_price']]
50
-
51
- X_scaled = self.feature_scaler.fit_transform(X)
52
- y_scaled = self.target_scaler.fit_transform(y)
53
-
54
- self.features = features
55
- return pd.DataFrame(X_scaled, columns=features), pd.DataFrame(y_scaled, columns=targets)
56
-
57
- def inverse_transform_target(self, y_scaled):
58
- return np.expm1(self.target_scaler.inverse_transform(y_scaled.reshape(-1, 1)).flatten())
59
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- """
62
- Data container class representing the data shape of the synapse coming into `run_inference`
63
- """
64
  class ProcessedSynapse(TypedDict):
65
  id: Optional[str]
66
  nextplace_id: Optional[str]
@@ -84,11 +50,12 @@ class ProcessedSynapse(TypedDict):
84
  hoa_dues: Optional[float]
85
  query_date: Optional[str]
86
 
87
-
88
  class MLBaseModelDriver:
89
 
90
  def __init__(self):
91
- self.model, self.label_encoder, self.scaler = self.load_model()
 
92
 
93
  def load_model(self) -> Tuple[any, any, any]:
94
  print(f"Loading model...")
@@ -111,12 +78,10 @@ class MLBaseModelDriver:
111
 
112
  def _download_model_files(self) -> Tuple[str, str, str, str]:
113
  model_path = "ckoozzzu/NextPlace"
114
-
115
  model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
116
  scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
117
  label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
118
  model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
119
-
120
  return model_file, scaler_file, label_encoders_file, model_class_file
121
 
122
  def _import_model_class(self, model_class_file):
@@ -132,7 +97,8 @@ class MLBaseModelDriver:
132
  raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
133
 
134
  def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
135
- input_tensor = self._preprocess_input(input_data)
 
136
 
137
  with torch.no_grad():
138
  prediction = self.model(input_tensor)
@@ -149,17 +115,3 @@ class MLBaseModelDriver:
149
  return sale_date
150
  else:
151
  return datetime.date.today() + datetime.timedelta(days=1)
152
-
153
- def _preprocess_input(self, data: ProcessedSynapse) -> torch.tensor:
154
- df = pd.DataFrame([data])
155
- default_beds = 3
156
- default_sqft = 1500.0
157
- default_property_type = '6'
158
- df['beds'] = df['beds'].fillna(default_beds)
159
- df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
160
- df['property_type'] = df['property_type'].fillna(default_property_type)
161
- df['property_type'] = df['property_type'].astype(int)
162
- df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
163
- X = df[['beds', 'sqft', 'property_type', 'price']]
164
- input_tensor = torch.tensor(X.values, dtype=torch.float32)
165
- return input_tensor
 
1
  import torch
2
  import sys
3
  import pandas as pd
 
 
4
  from typing import TypedDict, Optional, Tuple
5
  import datetime
6
  import math
 
8
  from huggingface_hub import hf_hub_download
9
  import pickle
10
 
11
+ # --------- Класс DataPreprocessor ---------
12
  class DataPreprocessor:
13
+ def __init__(self, label_encoders, scaler):
14
+ self.label_encoders = label_encoders
15
+ self.scaler = scaler
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ def preprocess(self, df: pd.DataFrame) -> torch.Tensor:
18
+ default_beds = 3
19
+ default_sqft = 1500.0
20
+ default_property_type = '6'
21
+ df['beds'] = df['beds'].fillna(default_beds)
22
+ df['sqft'] = pd.to_numeric(df['sqft'], errors='coerce').fillna(default_sqft)
23
+ df['property_type'] = df['property_type'].fillna(default_property_type)
24
+ df['property_type'] = df['property_type'].astype(int)
25
+ df[['sqft', 'price']] = self.scaler.transform(df[['sqft', 'price']])
26
+ X = df[['beds', 'sqft', 'property_type', 'price']]
27
+ return torch.tensor(X.values, dtype=torch.float32)
28
 
29
+ # --------- Класс ProcessedSynapse ---------
 
 
30
  class ProcessedSynapse(TypedDict):
31
  id: Optional[str]
32
  nextplace_id: Optional[str]
 
50
  hoa_dues: Optional[float]
51
  query_date: Optional[str]
52
 
53
+ # --------- Класс MLBaseModelDriver ---------
54
  class MLBaseModelDriver:
55
 
56
  def __init__(self):
57
+ self.model, self.label_encoders, self.scaler = self.load_model()
58
+ self.preprocessor = DataPreprocessor(self.label_encoders, self.scaler)
59
 
60
  def load_model(self) -> Tuple[any, any, any]:
61
  print(f"Loading model...")
 
78
 
79
  def _download_model_files(self) -> Tuple[str, str, str, str]:
80
  model_path = "ckoozzzu/NextPlace"
 
81
  model_file = hf_hub_download(repo_id=model_path, filename="model_files/real_estate_model.pth")
82
  scaler_file = hf_hub_download(repo_id=model_path, filename="model_files/scaler.pkl")
83
  label_encoders_file = hf_hub_download(repo_id=model_path, filename="model_files/label_encoder.pkl")
84
  model_class_file = hf_hub_download(repo_id=model_path, filename="MLBaseModel.py")
 
85
  return model_file, scaler_file, label_encoders_file, model_class_file
86
 
87
  def _import_model_class(self, model_class_file):
 
97
  raise AttributeError(f"The module does not contain a class named 'MLBaseModel'")
98
 
99
  def run_inference(self, input_data: ProcessedSynapse) -> Tuple[float, str]:
100
+ df = pd.DataFrame([input_data])
101
+ input_tensor = self.preprocessor.preprocess(df)
102
 
103
  with torch.no_grad():
104
  prediction = self.model(input_tensor)
 
115
  return sale_date
116
  else:
117
  return datetime.date.today() + datetime.timedelta(days=1)