TanmayPandey9584 commited on
Commit
3d212b0
·
verified ·
1 Parent(s): 5085703

Upload 5 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  DiscountEngine/WMT_Grocery_202209.csv filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  DiscountEngine/WMT_Grocery_202209.csv filter=lfs diff=lfs merge=lfs -text
37
+ WMT_Grocery_202209.csv filter=lfs diff=lfs merge=lfs -text
WMT_Grocery_202209.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4b17fd9cdef56f81ffad1b0574686fefc93fda039962318d746e2a8c35d6bfe
3
+ size 179757122
discount_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95279ead44e50e7dab7cefd662a7a880011b2ba4d85899ee8a941ab88e593df5
3
+ size 89291098
predict_discount.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import requests
3
+ import joblib
4
+ from datetime import datetime
5
+
6
+ API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
7
+ WEATHER_CACHE = {}
8
+
9
+ # Helper to map month to season
10
+ SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
11
+ 3: 'Spring', 4: 'Spring', 5: 'Spring',
12
+ 6: 'Summer', 7: 'Summer', 8: 'Summer',
13
+ 9: 'Fall', 10: 'Fall', 11: 'Fall'}
14
+
15
+ def fetch_weather(city, state, api_key=API_KEY):
16
+ key = f"{city},{state}"
17
+ if key in WEATHER_CACHE:
18
+ return WEATHER_CACHE[key]
19
+ geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
20
+ try:
21
+ geo_resp = requests.get(geo_url)
22
+ geo_resp.raise_for_status()
23
+ geo_data = geo_resp.json()
24
+ if not geo_data:
25
+ return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
26
+ lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
27
+ weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
28
+ weather_resp = requests.get(weather_url)
29
+ weather_resp.raise_for_status()
30
+ data = weather_resp.json()
31
+ weather = {
32
+ 'temperature': data['main']['temp'],
33
+ 'humidity': data['main']['humidity'],
34
+ 'condition': data['weather'][0]['main']
35
+ }
36
+ WEATHER_CACHE[key] = weather
37
+ return weather
38
+ except Exception as e:
39
+ print(f"Weather fetch error for {city}, {state}: {e}")
40
+ return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
41
+
42
+ def extract_season(month):
43
+ return SEASON_MAP.get(month, 'Unknown')
44
+
45
+ def predict_discount(product_serial, city, state, order_date, segment, ship_mode, df, model):
46
+ product = df[df['Product ID'] == product_serial]
47
+ if product.empty:
48
+ print(f"Product serial {product_serial} not found.")
49
+ return None
50
+ weather = fetch_weather(city, state)
51
+ # Parse order date
52
+ try:
53
+ order_dt = pd.to_datetime(order_date, dayfirst=True)
54
+ except Exception:
55
+ print("Invalid order date format. Use YYYY-MM-DD.")
56
+ return None
57
+ order_month = order_dt.month
58
+ order_day_of_week = order_dt.dayofweek
59
+ season = extract_season(order_month)
60
+ X_pred = pd.DataFrame({
61
+ 'Category': [product.iloc[0]['Category']],
62
+ 'Sub-Category': [product.iloc[0]['Sub-Category']],
63
+ 'Product ID': [product_serial],
64
+ 'Sales': [product.iloc[0]['Sales']],
65
+ 'City': [city],
66
+ 'State': [state],
67
+ 'Segment': [segment],
68
+ 'Ship Mode': [ship_mode],
69
+ 'order_month': [order_month],
70
+ 'order_day_of_week': [order_day_of_week],
71
+ 'season': [season],
72
+ 'temperature': [weather['temperature']],
73
+ 'humidity': [weather['humidity']],
74
+ 'condition': [weather['condition']]
75
+ })
76
+ discount = model.predict(X_pred)[0]
77
+ return max(0, round(discount, 2))
78
+
79
+ if __name__ == "__main__":
80
+ # Load model and data
81
+ print("Loading model and data...")
82
+ model = joblib.load('discount_model.joblib')
83
+ df = pd.read_csv('train.csv')
84
+ # User input
85
+ serial = input("Enter product serial (Product ID): ")
86
+ city = input("Enter city: ")
87
+ state = input("Enter state: ")
88
+ order_date = input("Enter order date (YYYY-MM-DD): ")
89
+ segment = input("Enter customer segment (e.g., Consumer, Corporate, Home Office): ")
90
+ ship_mode = input("Enter ship mode (e.g., First Class, Second Class, Standard Class, Same Day): ")
91
+ discount = predict_discount(serial, city, state, order_date, segment, ship_mode, df, model)
92
+ if discount is not None:
93
+ print(f"Predicted discount for product {serial} in {city}, {state} on {order_date}: {discount}%")
train.csv ADDED
The diff for this file is too large to render. See raw diff
 
train_discount_model.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import requests
4
+ from sklearn.ensemble import RandomForestRegressor
5
+ from sklearn.preprocessing import OneHotEncoder
6
+ from sklearn.compose import ColumnTransformer
7
+ from sklearn.pipeline import Pipeline
8
+ import joblib
9
+
10
+ API_KEY = 'a4f54718b17aa482e0b0a9f2e6220fc0'
11
+ WEATHER_CACHE = {}
12
+
13
+ # Helper to map month to season
14
+ SEASON_MAP = {1: 'Winter', 2: 'Winter', 12: 'Winter',
15
+ 3: 'Spring', 4: 'Spring', 5: 'Spring',
16
+ 6: 'Summer', 7: 'Summer', 8: 'Summer',
17
+ 9: 'Fall', 10: 'Fall', 11: 'Fall'}
18
+
19
+ def fetch_weather(city, state, api_key=API_KEY):
20
+ key = f"{city},{state}"
21
+ if key in WEATHER_CACHE:
22
+ return WEATHER_CACHE[key]
23
+ geo_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city},{state},US&limit=1&appid={api_key}"
24
+ try:
25
+ geo_resp = requests.get(geo_url)
26
+ geo_resp.raise_for_status()
27
+ geo_data = geo_resp.json()
28
+ if not geo_data:
29
+ return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
30
+ lat, lon = geo_data[0]['lat'], geo_data[0]['lon']
31
+ weather_url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
32
+ weather_resp = requests.get(weather_url)
33
+ weather_resp.raise_for_status()
34
+ data = weather_resp.json()
35
+ weather = {
36
+ 'temperature': data['main']['temp'],
37
+ 'humidity': data['main']['humidity'],
38
+ 'condition': data['weather'][0]['main']
39
+ }
40
+ WEATHER_CACHE[key] = weather
41
+ return weather
42
+ except Exception as e:
43
+ print(f"Weather fetch error for {city}, {state}: {e}")
44
+ return {'temperature': 20, 'humidity': 50, 'condition': 'Clear'}
45
+
46
+ def extract_season(month):
47
+ return SEASON_MAP.get(month, 'Unknown')
48
+
49
+ def load_and_prepare_data(csv_path):
50
+ df = pd.read_csv(csv_path)
51
+ # Parse dates
52
+ df['Order Date'] = pd.to_datetime(df['Order Date'], dayfirst=True)
53
+ df['order_month'] = df['Order Date'].dt.month
54
+ df['order_day_of_week'] = df['Order Date'].dt.dayofweek
55
+ df['season'] = df['order_month'].apply(extract_season)
56
+ # Simulate discount for training
57
+ np.random.seed(42)
58
+ df['discount'] = (df['Sales'] / df['Sales'].max()) * 20 + np.random.normal(0, 2, len(df))
59
+ # Fetch weather features
60
+ weather_features = df.apply(lambda row: fetch_weather(row['City'], row['State']), axis=1)
61
+ df['temperature'] = [w['temperature'] for w in weather_features]
62
+ df['humidity'] = [w['humidity'] for w in weather_features]
63
+ df['condition'] = [w['condition'] for w in weather_features]
64
+ return df
65
+
66
+ def train_discount_model(df):
67
+ features = [
68
+ 'Category', 'Sub-Category', 'Product ID', 'Sales',
69
+ 'City', 'State', 'Segment', 'Ship Mode',
70
+ 'order_month', 'order_day_of_week', 'season',
71
+ 'temperature', 'humidity', 'condition'
72
+ ]
73
+ X = df[features]
74
+ y = df['discount']
75
+ categorical = [
76
+ 'Category', 'Sub-Category', 'Product ID', 'City', 'State',
77
+ 'Segment', 'Ship Mode', 'season', 'condition'
78
+ ]
79
+ numeric = ['Sales', 'order_month', 'order_day_of_week', 'temperature', 'humidity']
80
+ preprocessor = ColumnTransformer([
81
+ ('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
82
+ ('num', 'passthrough', numeric)
83
+ ])
84
+ model = Pipeline([
85
+ ('pre', preprocessor),
86
+ ('reg', RandomForestRegressor(n_estimators=100, random_state=42))
87
+ ])
88
+ model.fit(X, y)
89
+ return model
90
+
91
+ if __name__ == "__main__":
92
+ print("Loading and preparing data...")
93
+ df = load_and_prepare_data('train.csv')
94
+ print("Training model...")
95
+ model = train_discount_model(df)
96
+ joblib.dump(model, 'discount_model.joblib')
97
+ print("Model trained and saved as discount_model.joblib.")