Superkart / make_dummy_model.py
harikrishna1985's picture
Create make_dummy_model.py
469bbac verified
import joblib, pandas as pd, numpy as np
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.dummy import DummyRegressor
# Columns as per your app/notebook
columns = [
"Product_Id","Product_Weight","Product_Sugar_Content","Product_Allocated_Area",
"Product_Type","Product_MRP","Store_Id","Store_Establishment_Year",
"Store_Size","Store_Location_City_Type","Store_Type"
]
# Minimal 1-row frame to fit preprocessing shapes
mock = pd.DataFrame([{
"Product_Id":"PR1234","Product_Weight":1.0,"Product_Sugar_Content":"regular",
"Product_Allocated_Area":0.02,"Product_Type":"dairy","Product_MRP":50.0,
"Store_Id":"S001","Store_Establishment_Year":2010,"Store_Size":"medium",
"Store_Location_City_Type":"Tier 2","Store_Type":"Supermarket Type 1"
}], columns=columns)
def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
d = df.copy()
d["Product_Prefix"] = d["Product_Id"].astype(str).str[:2]
d["Store_Age"] = 2025 - d["Store_Establishment_Year"]
return d
mock = feature_engineering(mock)
num_cols = ["Product_Weight","Product_Allocated_Area","Product_MRP","Store_Establishment_Year","Store_Age"]
cat_cols = ["Product_Id","Product_Sugar_Content","Product_Type","Store_Id","Store_Size","Store_Location_City_Type","Store_Type","Product_Prefix"]
numeric = Pipeline([("imputer", SimpleImputer(strategy="median")),
("scaler", StandardScaler(with_mean=False))])
categorical = Pipeline([("imputer", SimpleImputer(strategy="most_frequent")),
("ohe", OneHotEncoder(handle_unknown="ignore", sparse=True))])
pre = ColumnTransformer([("num", numeric, num_cols), ("cat", categorical, cat_cols)],
remainder="drop", sparse_threshold=0.3)
model = DummyRegressor(strategy="mean")
pipe = Pipeline([("prep", pre), ("model", model)])
pipe.fit(mock, np.array([0.0])) # dummy target
joblib.dump(pipe, "best_model.pkl")
print("Saved best_model.pkl (DummyRegressor).")