Spaces:

harikrishna1985
/

Superkart

Sleeping

App Files Files Community

Superkart / make_dummy_model.py

harikrishna1985

Create make_dummy_model.py

469bbac verified 4 months ago

raw

history blame contribute delete

2.11 kB

	import joblib, pandas as pd, numpy as np
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import OneHotEncoder, StandardScaler
	from sklearn.impute import SimpleImputer
	from sklearn.dummy import DummyRegressor

	# Columns as per your app/notebook
	columns = [
	"Product_Id","Product_Weight","Product_Sugar_Content","Product_Allocated_Area",
	"Product_Type","Product_MRP","Store_Id","Store_Establishment_Year",
	"Store_Size","Store_Location_City_Type","Store_Type"
	]

	# Minimal 1-row frame to fit preprocessing shapes
	mock = pd.DataFrame([{
	"Product_Id":"PR1234","Product_Weight":1.0,"Product_Sugar_Content":"regular",
	"Product_Allocated_Area":0.02,"Product_Type":"dairy","Product_MRP":50.0,
	"Store_Id":"S001","Store_Establishment_Year":2010,"Store_Size":"medium",
	"Store_Location_City_Type":"Tier 2","Store_Type":"Supermarket Type 1"
	}], columns=columns)

	def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
	d = df.copy()
	d["Product_Prefix"] = d["Product_Id"].astype(str).str[:2]
	d["Store_Age"] = 2025 - d["Store_Establishment_Year"]
	return d

	mock = feature_engineering(mock)

	num_cols = ["Product_Weight","Product_Allocated_Area","Product_MRP","Store_Establishment_Year","Store_Age"]
	cat_cols = ["Product_Id","Product_Sugar_Content","Product_Type","Store_Id","Store_Size","Store_Location_City_Type","Store_Type","Product_Prefix"]

	numeric = Pipeline([("imputer", SimpleImputer(strategy="median")),
	("scaler", StandardScaler(with_mean=False))])
	categorical = Pipeline([("imputer", SimpleImputer(strategy="most_frequent")),
	("ohe", OneHotEncoder(handle_unknown="ignore", sparse=True))])

	pre = ColumnTransformer([("num", numeric, num_cols), ("cat", categorical, cat_cols)],
	remainder="drop", sparse_threshold=0.3)

	model = DummyRegressor(strategy="mean")
	pipe = Pipeline([("prep", pre), ("model", model)])

	pipe.fit(mock, np.array([0.0])) # dummy target
	joblib.dump(pipe, "best_model.pkl")
	print("Saved best_model.pkl (DummyRegressor).")