chkp-talexm commited on
Commit
091c949
Β·
1 Parent(s): bade23f
Files changed (1) hide show
  1. app.py +31 -15
app.py CHANGED
@@ -36,6 +36,10 @@ FEATURE_COLUMNS = CATEGORICAL_COLUMNS + NUMERICAL_COLUMNS
36
  import pandas as pd
37
  from sklearn.preprocessing import LabelEncoder, StandardScaler
38
 
 
 
 
 
39
 
40
  def preprocess_input(input_df):
41
  """
@@ -76,27 +80,38 @@ def preprocess_input(input_df):
76
  required_columns = ["age_level", "gender", "product", "city_development_index"]
77
  if not all(col in input_df.columns for col in required_columns):
78
  print("πŸ›‘ Missing required columns for aggregations. Skipping aggregation steps.")
79
- return input_df
80
-
81
- # Aggregation: Age & Gender vs Product
82
- input_df = compute_aggregations(input_df, ["age_level", "gender", "product"], {
83
- "campaign_id": "nunique",
84
- "webpage_id": "nunique"
85
- }, suffix="age_sex_prod")
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # Aggregation: City, Age, Product
88
- input_df = compute_aggregations(input_df, ["city_development_index", "age_level", "product"], {
89
- "campaign_id": "nunique",
90
- "webpage_id": "nunique"
91
- }, suffix="city_age_prod")
92
 
93
- # πŸš€ Step 4: Ensure Categorical Features Are Strings
94
  categorical_columns = ["gender", "product", "campaign_id", "webpage_id"]
95
 
96
  for col in categorical_columns:
97
  input_df[col] = input_df[col].astype(str).fillna("missing")
98
 
99
- # πŸš€ Step 5: Encode Categorical Features as Integers
100
  label_encoders = {}
101
  for col in categorical_columns:
102
  le = LabelEncoder()
@@ -105,7 +120,7 @@ def preprocess_input(input_df):
105
  label_encoders[col] = le
106
  input_df[col] = input_df[col].map(lambda x: le.transform([x])[0] if x in le.classes_ else -1)
107
 
108
- # πŸš€ Step 6: Normalize Numerical Features
109
  numerical_columns = [
110
  "age_level", "city_development_index", "user_group_id", "user_depth", "var_1",
111
  "click_sum_age_sex_prod", "click_count_age_sex_prod",
@@ -121,6 +136,7 @@ def preprocess_input(input_df):
121
 
122
  return input_df
123
 
 
124
  def download_model(filename, local_path):
125
  """Download model from Hugging Face and move it to the correct location."""
126
  temp_path = hf_hub_download(repo_id=MODEL_REPO, filename=filename, local_dir=MODEL_DIR)
 
36
  import pandas as pd
37
  from sklearn.preprocessing import LabelEncoder, StandardScaler
38
 
39
+ import pandas as pd
40
+ import numpy as np
41
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
42
+
43
 
44
  def preprocess_input(input_df):
45
  """
 
80
  required_columns = ["age_level", "gender", "product", "city_development_index"]
81
  if not all(col in input_df.columns for col in required_columns):
82
  print("πŸ›‘ Missing required columns for aggregations. Skipping aggregation steps.")
83
+ else:
84
+ # Aggregation: Age & Gender vs Product
85
+ input_df = compute_aggregations(input_df, ["age_level", "gender", "product"], {
86
+ "campaign_id": "nunique",
87
+ "webpage_id": "nunique"
88
+ }, suffix="age_sex_prod")
89
+
90
+ # Aggregation: City, Age, Product
91
+ input_df = compute_aggregations(input_df, ["city_development_index", "age_level", "product"], {
92
+ "campaign_id": "nunique",
93
+ "webpage_id": "nunique"
94
+ }, suffix="city_age_prod")
95
+
96
+ # πŸš€ Step 4: Add Missing Aggregated Columns with Default Values
97
+ aggregated_features = [
98
+ "click_sum_age_sex_prod", "click_count_age_sex_prod", "unique_campaigns_age_sex_prod",
99
+ "unique_webpages_age_sex_prod",
100
+ "click_sum_city_age_prod", "click_count_city_age_prod", "unique_campaigns_city_age_prod",
101
+ "unique_webpages_city_age_prod"
102
+ ]
103
 
104
+ for col in aggregated_features:
105
+ if col not in input_df.columns:
106
+ input_df[col] = 0 # Fill missing aggregated columns with default values
 
 
107
 
108
+ # πŸš€ Step 5: Ensure Categorical Features Are Strings
109
  categorical_columns = ["gender", "product", "campaign_id", "webpage_id"]
110
 
111
  for col in categorical_columns:
112
  input_df[col] = input_df[col].astype(str).fillna("missing")
113
 
114
+ # πŸš€ Step 6: Encode Categorical Features as Integers
115
  label_encoders = {}
116
  for col in categorical_columns:
117
  le = LabelEncoder()
 
120
  label_encoders[col] = le
121
  input_df[col] = input_df[col].map(lambda x: le.transform([x])[0] if x in le.classes_ else -1)
122
 
123
+ # πŸš€ Step 7: Normalize Numerical Features
124
  numerical_columns = [
125
  "age_level", "city_development_index", "user_group_id", "user_depth", "var_1",
126
  "click_sum_age_sex_prod", "click_count_age_sex_prod",
 
136
 
137
  return input_df
138
 
139
+
140
  def download_model(filename, local_path):
141
  """Download model from Hugging Face and move it to the correct location."""
142
  temp_path = hf_hub_download(repo_id=MODEL_REPO, filename=filename, local_dir=MODEL_DIR)