Discount_Engine / train_discount_model.py
TanmayPandey9584's picture
Upload 5 files
3d212b0 verified
import pandas as pd
import numpy as np
import requests
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
WEATHER_CACHE = {}
# Helper to map month to season
SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
3: 'Spring', 4: 'Spring', 5: 'Spring',
6: 'Summer', 7: 'Summer', 8: 'Summer',
9: 'Fall', 10: 'Fall', 11: 'Fall'}
def fetch_weather(city, state, api_key=API_KEY):
key = f"{city},{state}"
if key in WEATHER_CACHE:
return WEATHER_CACHE[key]
geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
try:
geo_resp = requests.get(geo_url)
geo_resp.raise_for_status()
geo_data = geo_resp.json()
if not geo_data:
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
weather_resp = requests.get(weather_url)
weather_resp.raise_for_status()
data = weather_resp.json()
weather = {
'temperature': data['main']['temp'],
'humidity': data['main']['humidity'],
'condition': data['weather'][0]['main']
}
WEATHER_CACHE[key] = weather
return weather
except Exception as e:
print(f"Weather fetch error for {city}, {state}: {e}")
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
def extract_season(month):
return SEASON_MAP.get(month, 'Unknown')
def load_and_prepare_data(csv_path):
df = pd.read_csv(csv_path)
# Parse dates
df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True)
df['order_month'] = df['Order Date'].dt.month
df['order_day_of_week'] = df['Order Date'].dt.dayofweek
df['season'] = df['order_month'].apply(extract_season)
# Simulate discount for training
np.random.seed(42)
df['discount'] = (df['Sales'] / df['Sales'].max()) * 20 + np.random.normal(0, 2, len(df))
# Fetch weather features
weather_features = df.apply(lambda row: fetch_weather(row['City'], row['State']), axis=1)
df['temperature'] = [w['temperature'] for w in weather_features]
df['humidity'] = [w['humidity'] for w in weather_features]
df['condition'] = [w['condition'] for w in weather_features]
return df
def train_discount_model(df):
features = [
'Category', 'Sub-Category', 'Product ID', 'Sales',
'City', 'State', 'Segment', 'Ship Mode',
'order_month', 'order_day_of_week', 'season',
'temperature', 'humidity', 'condition'
]
X = df[features]
y = df['discount']
categorical = [
'Category', 'Sub-Category', 'Product ID', 'City', 'State',
'Segment', 'Ship Mode', 'season', 'condition'
]
numeric = ['Sales', 'order_month', 'order_day_of_week', 'temperature', 'humidity']
preprocessor = ColumnTransformer([
('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
('num', 'passthrough', numeric)
])
model = Pipeline([
('pre', preprocessor),
('reg', RandomForestRegressor(n_estimators=100, random_state=42))
])
model.fit(X, y)
return model
if __name__ == "__main__":
print("Loading and preparing data...")
df = load_and_prepare_data('train.csv')
print("Training model...")
model = train_discount_model(df)
joblib.dump(model, 'discount_model.joblib')
print("Model trained and saved as discount_model.joblib.")