Spaces:
Sleeping
Sleeping
| import joblib, pandas as pd, numpy as np | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.preprocessing import OneHotEncoder, StandardScaler | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.dummy import DummyRegressor | |
| # Columns as per your app/notebook | |
| columns = [ | |
| "Product_Id","Product_Weight","Product_Sugar_Content","Product_Allocated_Area", | |
| "Product_Type","Product_MRP","Store_Id","Store_Establishment_Year", | |
| "Store_Size","Store_Location_City_Type","Store_Type" | |
| ] | |
| # Minimal 1-row frame to fit preprocessing shapes | |
| mock = pd.DataFrame([{ | |
| "Product_Id":"PR1234","Product_Weight":1.0,"Product_Sugar_Content":"regular", | |
| "Product_Allocated_Area":0.02,"Product_Type":"dairy","Product_MRP":50.0, | |
| "Store_Id":"S001","Store_Establishment_Year":2010,"Store_Size":"medium", | |
| "Store_Location_City_Type":"Tier 2","Store_Type":"Supermarket Type 1" | |
| }], columns=columns) | |
| def feature_engineering(df: pd.DataFrame) -> pd.DataFrame: | |
| d = df.copy() | |
| d["Product_Prefix"] = d["Product_Id"].astype(str).str[:2] | |
| d["Store_Age"] = 2025 - d["Store_Establishment_Year"] | |
| return d | |
| mock = feature_engineering(mock) | |
| num_cols = ["Product_Weight","Product_Allocated_Area","Product_MRP","Store_Establishment_Year","Store_Age"] | |
| cat_cols = ["Product_Id","Product_Sugar_Content","Product_Type","Store_Id","Store_Size","Store_Location_City_Type","Store_Type","Product_Prefix"] | |
| numeric = Pipeline([("imputer", SimpleImputer(strategy="median")), | |
| ("scaler", StandardScaler(with_mean=False))]) | |
| categorical = Pipeline([("imputer", SimpleImputer(strategy="most_frequent")), | |
| ("ohe", OneHotEncoder(handle_unknown="ignore", sparse=True))]) | |
| pre = ColumnTransformer([("num", numeric, num_cols), ("cat", categorical, cat_cols)], | |
| remainder="drop", sparse_threshold=0.3) | |
| model = DummyRegressor(strategy="mean") | |
| pipe = Pipeline([("prep", pre), ("model", model)]) | |
| pipe.fit(mock, np.array([0.0])) # dummy target | |
| joblib.dump(pipe, "best_model.pkl") | |
| print("Saved best_model.pkl (DummyRegressor).") | |