Upload 5 files
Browse files- .gitattributes +1 -0
- WMT_Grocery_202209.csv +3 -0
- discount_model.joblib +3 -0
- predict_discount.py +93 -0
- train.csv +0 -0
- train_discount_model.py +97 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
DiscountEngine/WMT_Grocery_202209.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
DiscountEngine/WMT_Grocery_202209.csv filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
WMT_Grocery_202209.csv filter=lfs diff=lfs merge=lfs -text
|
WMT_Grocery_202209.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d4b17fd9cdef56f81ffad1b0574686fefc93fda039962318d746e2a8c35d6bfe
|
| 3 |
+
size 179757122
|
discount_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95279ead44e50e7dab7cefd662a7a880011b2ba4d85899ee8a941ab88e593df5
|
| 3 |
+
size 89291098
|
predict_discount.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import requests
|
| 3 |
+
import joblib
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
|
| 7 |
+
WEATHER_CACHE = {}
|
| 8 |
+
|
| 9 |
+
# Helper to map month to season
|
| 10 |
+
SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
|
| 11 |
+
3: 'Spring', 4: 'Spring', 5: 'Spring',
|
| 12 |
+
6: 'Summer', 7: 'Summer', 8: 'Summer',
|
| 13 |
+
9: 'Fall', 10: 'Fall', 11: 'Fall'}
|
| 14 |
+
|
| 15 |
+
def fetch_weather(city, state, api_key=API_KEY):
|
| 16 |
+
key = f"{city},{state}"
|
| 17 |
+
if key in WEATHER_CACHE:
|
| 18 |
+
return WEATHER_CACHE[key]
|
| 19 |
+
geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
|
| 20 |
+
try:
|
| 21 |
+
geo_resp = requests.get(geo_url)
|
| 22 |
+
geo_resp.raise_for_status()
|
| 23 |
+
geo_data = geo_resp.json()
|
| 24 |
+
if not geo_data:
|
| 25 |
+
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
|
| 26 |
+
lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
|
| 27 |
+
weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
|
| 28 |
+
weather_resp = requests.get(weather_url)
|
| 29 |
+
weather_resp.raise_for_status()
|
| 30 |
+
data = weather_resp.json()
|
| 31 |
+
weather = {
|
| 32 |
+
'temperature': data['main']['temp'],
|
| 33 |
+
'humidity': data['main']['humidity'],
|
| 34 |
+
'condition': data['weather'][0]['main']
|
| 35 |
+
}
|
| 36 |
+
WEATHER_CACHE[key] = weather
|
| 37 |
+
return weather
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f"Weather fetch error for {city}, {state}: {e}")
|
| 40 |
+
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
|
| 41 |
+
|
| 42 |
+
def extract_season(month):
|
| 43 |
+
return SEASON_MAP.get(month, 'Unknown')
|
| 44 |
+
|
| 45 |
+
def predict_discount(product_serial, city, state, order_date, segment, ship_mode, df, model):
|
| 46 |
+
product = df[df['Product ID'] == product_serial]
|
| 47 |
+
if product.empty:
|
| 48 |
+
print(f"Product serial {product_serial} not found.")
|
| 49 |
+
return None
|
| 50 |
+
weather = fetch_weather(city, state)
|
| 51 |
+
# Parse order date
|
| 52 |
+
try:
|
| 53 |
+
order_dt = pd.to_datetime(order_date, dayfirst=True)
|
| 54 |
+
except Exception:
|
| 55 |
+
print("Invalid order date format. Use YYYY-MM-DD.")
|
| 56 |
+
return None
|
| 57 |
+
order_month = order_dt.month
|
| 58 |
+
order_day_of_week = order_dt.dayofweek
|
| 59 |
+
season = extract_season(order_month)
|
| 60 |
+
X_pred = pd.DataFrame({
|
| 61 |
+
'Category': [product.iloc[0]['Category']],
|
| 62 |
+
'Sub-Category': [product.iloc[0]['Sub-Category']],
|
| 63 |
+
'Product ID': [product_serial],
|
| 64 |
+
'Sales': [product.iloc[0]['Sales']],
|
| 65 |
+
'City': [city],
|
| 66 |
+
'State': [state],
|
| 67 |
+
'Segment': [segment],
|
| 68 |
+
'Ship Mode': [ship_mode],
|
| 69 |
+
'order_month': [order_month],
|
| 70 |
+
'order_day_of_week': [order_day_of_week],
|
| 71 |
+
'season': [season],
|
| 72 |
+
'temperature': [weather['temperature']],
|
| 73 |
+
'humidity': [weather['humidity']],
|
| 74 |
+
'condition': [weather['condition']]
|
| 75 |
+
})
|
| 76 |
+
discount = model.predict(X_pred)[0]
|
| 77 |
+
return max(0, round(discount, 2))
|
| 78 |
+
|
| 79 |
+
if __name__ == "__main__":
|
| 80 |
+
# Load model and data
|
| 81 |
+
print("Loading model and data...")
|
| 82 |
+
model = joblib.load('discount_model.joblib')
|
| 83 |
+
df = pd.read_csv('train.csv')
|
| 84 |
+
# User input
|
| 85 |
+
serial = input("Enter product serial (Product ID): ")
|
| 86 |
+
city = input("Enter city: ")
|
| 87 |
+
state = input("Enter state: ")
|
| 88 |
+
order_date = input("Enter order date (YYYY-MM-DD): ")
|
| 89 |
+
segment = input("Enter customer segment (e.g., Consumer, Corporate, Home Office): ")
|
| 90 |
+
ship_mode = input("Enter ship mode (e.g., First Class, Second Class, Standard Class, Same Day): ")
|
| 91 |
+
discount = predict_discount(serial, city, state, order_date, segment, ship_mode, df, model)
|
| 92 |
+
if discount is not None:
|
| 93 |
+
print(f"Predicted discount for product {serial} in {city}, {state} on {order_date}: {discount}%")
|
train.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
train_discount_model.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import requests
|
| 4 |
+
from sklearn.ensemble import RandomForestRegressor
|
| 5 |
+
from sklearn.preprocessing import OneHotEncoder
|
| 6 |
+
from sklearn.compose import ColumnTransformer
|
| 7 |
+
from sklearn.pipeline import Pipeline
|
| 8 |
+
import joblib
|
| 9 |
+
|
| 10 |
+
API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
|
| 11 |
+
WEATHER_CACHE = {}
|
| 12 |
+
|
| 13 |
+
# Helper to map month to season
|
| 14 |
+
SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
|
| 15 |
+
3: 'Spring', 4: 'Spring', 5: 'Spring',
|
| 16 |
+
6: 'Summer', 7: 'Summer', 8: 'Summer',
|
| 17 |
+
9: 'Fall', 10: 'Fall', 11: 'Fall'}
|
| 18 |
+
|
| 19 |
+
def fetch_weather(city, state, api_key=API_KEY):
|
| 20 |
+
key = f"{city},{state}"
|
| 21 |
+
if key in WEATHER_CACHE:
|
| 22 |
+
return WEATHER_CACHE[key]
|
| 23 |
+
geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
|
| 24 |
+
try:
|
| 25 |
+
geo_resp = requests.get(geo_url)
|
| 26 |
+
geo_resp.raise_for_status()
|
| 27 |
+
geo_data = geo_resp.json()
|
| 28 |
+
if not geo_data:
|
| 29 |
+
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
|
| 30 |
+
lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
|
| 31 |
+
weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
|
| 32 |
+
weather_resp = requests.get(weather_url)
|
| 33 |
+
weather_resp.raise_for_status()
|
| 34 |
+
data = weather_resp.json()
|
| 35 |
+
weather = {
|
| 36 |
+
'temperature': data['main']['temp'],
|
| 37 |
+
'humidity': data['main']['humidity'],
|
| 38 |
+
'condition': data['weather'][0]['main']
|
| 39 |
+
}
|
| 40 |
+
WEATHER_CACHE[key] = weather
|
| 41 |
+
return weather
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f"Weather fetch error for {city}, {state}: {e}")
|
| 44 |
+
return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
|
| 45 |
+
|
| 46 |
+
def extract_season(month):
|
| 47 |
+
return SEASON_MAP.get(month, 'Unknown')
|
| 48 |
+
|
| 49 |
+
def load_and_prepare_data(csv_path):
|
| 50 |
+
df = pd.read_csv(csv_path)
|
| 51 |
+
# Parse dates
|
| 52 |
+
df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True)
|
| 53 |
+
df['order_month'] = df['Order Date'].dt.month
|
| 54 |
+
df['order_day_of_week'] = df['Order Date'].dt.dayofweek
|
| 55 |
+
df['season'] = df['order_month'].apply(extract_season)
|
| 56 |
+
# Simulate discount for training
|
| 57 |
+
np.random.seed(42)
|
| 58 |
+
df['discount'] = (df['Sales'] / df['Sales'].max()) * 20 + np.random.normal(0, 2, len(df))
|
| 59 |
+
# Fetch weather features
|
| 60 |
+
weather_features = df.apply(lambda row: fetch_weather(row['City'], row['State']), axis=1)
|
| 61 |
+
df['temperature'] = [w['temperature'] for w in weather_features]
|
| 62 |
+
df['humidity'] = [w['humidity'] for w in weather_features]
|
| 63 |
+
df['condition'] = [w['condition'] for w in weather_features]
|
| 64 |
+
return df
|
| 65 |
+
|
| 66 |
+
def train_discount_model(df):
|
| 67 |
+
features = [
|
| 68 |
+
'Category', 'Sub-Category', 'Product ID', 'Sales',
|
| 69 |
+
'City', 'State', 'Segment', 'Ship Mode',
|
| 70 |
+
'order_month', 'order_day_of_week', 'season',
|
| 71 |
+
'temperature', 'humidity', 'condition'
|
| 72 |
+
]
|
| 73 |
+
X = df[features]
|
| 74 |
+
y = df['discount']
|
| 75 |
+
categorical = [
|
| 76 |
+
'Category', 'Sub-Category', 'Product ID', 'City', 'State',
|
| 77 |
+
'Segment', 'Ship Mode', 'season', 'condition'
|
| 78 |
+
]
|
| 79 |
+
numeric = ['Sales', 'order_month', 'order_day_of_week', 'temperature', 'humidity']
|
| 80 |
+
preprocessor = ColumnTransformer([
|
| 81 |
+
('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
|
| 82 |
+
('num', 'passthrough', numeric)
|
| 83 |
+
])
|
| 84 |
+
model = Pipeline([
|
| 85 |
+
('pre', preprocessor),
|
| 86 |
+
('reg', RandomForestRegressor(n_estimators=100, random_state=42))
|
| 87 |
+
])
|
| 88 |
+
model.fit(X, y)
|
| 89 |
+
return model
|
| 90 |
+
|
| 91 |
+
if __name__ == "__main__":
|
| 92 |
+
print("Loading and preparing data...")
|
| 93 |
+
df = load_and_prepare_data('train.csv')
|
| 94 |
+
print("Training model...")
|
| 95 |
+
model = train_discount_model(df)
|
| 96 |
+
joblib.dump(model, 'discount_model.joblib')
|
| 97 |
+
print("Model trained and saved as discount_model.joblib.")
|