File size: 3,883 Bytes
3d212b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import pandas as pd
import numpy as np
import requests
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
WEATHER_CACHE = {}

# Helper to map month to season
SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
              3: 'Spring', 4: 'Spring', 5: 'Spring',
              6: 'Summer', 7: 'Summer', 8: 'Summer',
              9: 'Fall', 10: 'Fall', 11: 'Fall'}

def fetch_weather(city, state, api_key=API_KEY):
    key = f"{city},{state}"
    if key in WEATHER_CACHE:
        return WEATHER_CACHE[key]
    geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
    try:
        geo_resp = requests.get(geo_url)
        geo_resp.raise_for_status()
        geo_data = geo_resp.json()
        if not geo_data:
            return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
        lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
        weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
        weather_resp = requests.get(weather_url)
        weather_resp.raise_for_status()
        data = weather_resp.json()
        weather = {
            'temperature': data['main']['temp'],
            'humidity': data['main']['humidity'],
            'condition': data['weather'][0]['main']
        }
        WEATHER_CACHE[key] = weather
        return weather
    except Exception as e:
        print(f"Weather fetch error for {city}, {state}: {e}")
        return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}

def extract_season(month):
    return SEASON_MAP.get(month, 'Unknown')

def load_and_prepare_data(csv_path):
    df = pd.read_csv(csv_path)
    # Parse dates
    df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True)
    df['order_month'] = df['Order Date'].dt.month
    df['order_day_of_week'] = df['Order Date'].dt.dayofweek
    df['season'] = df['order_month'].apply(extract_season)
    # Simulate discount for training
    np.random.seed(42)
    df['discount'] = (df['Sales'] / df['Sales'].max()) * 20 + np.random.normal(0, 2, len(df))
    # Fetch weather features
    weather_features = df.apply(lambda row: fetch_weather(row['City'], row['State']), axis=1)
    df['temperature'] = [w['temperature'] for w in weather_features]
    df['humidity'] = [w['humidity'] for w in weather_features]
    df['condition'] = [w['condition'] for w in weather_features]
    return df

def train_discount_model(df):
    features = [
        'Category', 'Sub-Category', 'Product ID', 'Sales',
        'City', 'State', 'Segment', 'Ship Mode',
        'order_month', 'order_day_of_week', 'season',
        'temperature', 'humidity', 'condition'
    ]
    X = df[features]
    y = df['discount']
    categorical = [
        'Category', 'Sub-Category', 'Product ID', 'City', 'State',
        'Segment', 'Ship Mode', 'season', 'condition'
    ]
    numeric = ['Sales', 'order_month', 'order_day_of_week', 'temperature', 'humidity']
    preprocessor = ColumnTransformer([
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
        ('num', 'passthrough', numeric)
    ])
    model = Pipeline([
        ('pre', preprocessor),
        ('reg', RandomForestRegressor(n_estimators=100, random_state=42))
    ])
    model.fit(X, y)
    return model

if __name__ == "__main__":
    print("Loading and preparing data...")
    df = load_and_prepare_data('train.csv')
    print("Training model...")
    model = train_discount_model(df)
    joblib.dump(model, 'discount_model.joblib')
    print("Model trained and saved as discount_model.joblib.")