File size: 3,883 Bytes
3d212b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import pandas as pd
import numpy as np
import requests
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib
API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
WEATHER_CACHE = {}
# Helper to map month to season
SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
3: 'Spring', 4: 'Spring', 5: 'Spring',
6: 'Summer', 7: 'Summer', 8: 'Summer',
9: 'Fall', 10: 'Fall', 11: 'Fall'}
def fetch_weather(city, state, api_key=API_KEY):
key = f"{city},{state}"
if key in WEATHER_CACHE:
return WEATHER_CACHE[key]
geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
try:
geo_resp = requests.get(geo_url)
geo_resp.raise_for_status()
geo_data = geo_resp.json()
if not geo_data:
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
weather_resp = requests.get(weather_url)
weather_resp.raise_for_status()
data = weather_resp.json()
weather = {
'temperature': data['main']['temp'],
'humidity': data['main']['humidity'],
'condition': data['weather'][0]['main']
}
WEATHER_CACHE[key] = weather
return weather
except Exception as e:
print(f"Weather fetch error for {city}, {state}: {e}")
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
def extract_season(month):
return SEASON_MAP.get(month, 'Unknown')
def load_and_prepare_data(csv_path):
df = pd.read_csv(csv_path)
# Parse dates
df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True)
df['order_month'] = df['Order Date'].dt.month
df['order_day_of_week'] = df['Order Date'].dt.dayofweek
df['season'] = df['order_month'].apply(extract_season)
# Simulate discount for training
np.random.seed(42)
df['discount'] = (df['Sales'] / df['Sales'].max()) * 20 + np.random.normal(0, 2, len(df))
# Fetch weather features
weather_features = df.apply(lambda row: fetch_weather(row['City'], row['State']), axis=1)
df['temperature'] = [w['temperature'] for w in weather_features]
df['humidity'] = [w['humidity'] for w in weather_features]
df['condition'] = [w['condition'] for w in weather_features]
return df
def train_discount_model(df):
features = [
'Category', 'Sub-Category', 'Product ID', 'Sales',
'City', 'State', 'Segment', 'Ship Mode',
'order_month', 'order_day_of_week', 'season',
'temperature', 'humidity', 'condition'
]
X = df[features]
y = df['discount']
categorical = [
'Category', 'Sub-Category', 'Product ID', 'City', 'State',
'Segment', 'Ship Mode', 'season', 'condition'
]
numeric = ['Sales', 'order_month', 'order_day_of_week', 'temperature', 'humidity']
preprocessor = ColumnTransformer([
('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
('num', 'passthrough', numeric)
])
model = Pipeline([
('pre', preprocessor),
('reg', RandomForestRegressor(n_estimators=100, random_state=42))
])
model.fit(X, y)
return model
if __name__ == "__main__":
print("Loading and preparing data...")
df = load_and_prepare_data('train.csv')
print("Training model...")
model = train_discount_model(df)
joblib.dump(model, 'discount_model.joblib')
print("Model trained and saved as discount_model.joblib.") |