vikaswebdev's picture
Upload 17 files
7f90ea0 verified
"""
Demand Prediction System - Prediction Script
This script loads a trained model and makes demand predictions for products
on future dates. Supports both ML models and time-series models (ARIMA, Prophet).
Usage (ML Models):
python predict.py --product_id 1 --date 2024-01-15 --price 100 --discount 10 --category Electronics
Usage (Time-Series Models - overall demand):
python predict.py --date 2024-01-15 --model_type timeseries
"""
import pandas as pd
import numpy as np
import joblib
import json
import argparse
from datetime import datetime
import os
import warnings
warnings.filterwarnings('ignore')
# Configuration
MODEL_DIR = 'models'
MODEL_PATH = f'{MODEL_DIR}/best_model.joblib'
TS_MODEL_PATH = f'{MODEL_DIR}/best_timeseries_model.joblib'
PREPROCESSING_PATH = f'{MODEL_DIR}/preprocessing.joblib'
METADATA_PATH = f'{MODEL_DIR}/model_metadata.json'
ALL_MODELS_METADATA_PATH = f'{MODEL_DIR}/all_models_metadata.json'
def load_model_and_preprocessing(model_type='auto'):
"""
Load the trained model and preprocessing objects.
Args:
model_type: 'ml', 'timeseries', or 'auto' (auto-detect best model)
Returns:
tuple: (model, preprocessing_data, model_name, is_timeseries)
"""
# Load metadata to determine best model
if os.path.exists(ALL_MODELS_METADATA_PATH):
with open(ALL_MODELS_METADATA_PATH, 'r') as f:
all_metadata = json.load(f)
best_model_name = all_metadata.get('best_model', 'Unknown')
else:
best_model_name = None
# Determine which model to use
if model_type == 'auto':
if best_model_name in ['ARIMA', 'Prophet']:
model_type = 'timeseries'
else:
model_type = 'ml'
is_timeseries = (model_type == 'timeseries')
if is_timeseries:
# Load time-series model
if not os.path.exists(TS_MODEL_PATH):
raise FileNotFoundError(
f"Time-series model not found at {TS_MODEL_PATH}. Please run train_model.py first."
)
print("Loading time-series model...")
model = joblib.load(TS_MODEL_PATH)
preprocessing_data = None
if best_model_name:
print(f"Model: {best_model_name}")
if best_model_name in all_metadata.get('all_models', {}):
metrics = all_metadata['all_models'][best_model_name]
print(f"R2 Score: {metrics.get('r2', 'N/A'):.4f}")
return model, preprocessing_data, best_model_name or 'Time-Series', True
else:
# Load ML model
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(
f"ML model not found at {MODEL_PATH}. Please run train_model.py first."
)
if not os.path.exists(PREPROCESSING_PATH):
raise FileNotFoundError(
f"Preprocessing objects not found at {PREPROCESSING_PATH}. Please run train_model.py first."
)
print("Loading ML model and preprocessing objects...")
model = joblib.load(MODEL_PATH)
preprocessing_data = joblib.load(PREPROCESSING_PATH)
# Load metadata if available
if os.path.exists(METADATA_PATH):
with open(METADATA_PATH, 'r') as f:
metadata = json.load(f)
model_name = metadata.get('model_name', 'ML Model')
print(f"Model: {model_name}")
print(f"R2 Score: {metadata.get('metrics', {}).get('r2', 'N/A'):.4f}")
else:
model_name = best_model_name or 'ML Model'
return model, preprocessing_data, model_name, False
def prepare_features(product_id, date, price, discount, category, preprocessing_data):
"""
Prepare features for prediction using the same preprocessing pipeline.
Args:
product_id: Product ID
date: Date string (YYYY-MM-DD) or datetime object
price: Product price
discount: Discount percentage (0-100)
category: Product category
preprocessing_data: Dictionary containing encoders and scaler
Returns:
numpy array: Prepared features for prediction
"""
# Convert date to datetime if string
if isinstance(date, str):
date = pd.to_datetime(date)
# Extract date features (same as in training)
day = date.day
month = date.month
day_of_week = date.weekday() # 0=Monday, 6=Sunday
weekend = 1 if day_of_week >= 5 else 0
year = date.year
quarter = date.quarter
# Encode categorical variables
category_encoder = preprocessing_data['encoders']['category']
product_encoder = preprocessing_data['encoders']['product_id']
# Handle unseen categories/products
try:
category_encoded = category_encoder.transform([category])[0]
except ValueError:
# If category not seen during training, use most common category
print(f"Warning: Category '{category}' not seen during training. Using default encoding.")
category_encoded = 0
try:
product_id_encoded = product_encoder.transform([product_id])[0]
except ValueError:
# If product_id not seen during training, use mean encoding
print(f"Warning: Product ID '{product_id}' not seen during training. Using default encoding.")
product_id_encoded = product_encoder.transform([product_encoder.classes_[0]])[0]
# Create feature dictionary
feature_dict = {
'price': price,
'discount': discount,
'day': day,
'month': month,
'day_of_week': day_of_week,
'weekend': weekend,
'year': year,
'quarter': quarter,
'category_encoded': category_encoded,
'product_id_encoded': product_id_encoded
}
# Create feature array in the same order as training
feature_names = preprocessing_data['feature_names']
features = np.array([[feature_dict[name] for name in feature_names]])
# Scale features
scaler = preprocessing_data['scaler']
features_scaled = scaler.transform(features)
return features_scaled
def predict_demand_ml(product_id, date, price, discount, category, model, preprocessing_data):
"""
Predict demand for a product on a given date using ML model.
Args:
product_id: Product ID
date: Date string (YYYY-MM-DD) or datetime object
price: Product price
discount: Discount percentage (0-100)
category: Product category
model: Trained ML model
preprocessing_data: Dictionary containing encoders and scaler
Returns:
float: Predicted sales quantity
"""
# Prepare features
features = prepare_features(product_id, date, price, discount, category, preprocessing_data)
# Make prediction
prediction = model.predict(features)[0]
# Ensure non-negative prediction
prediction = max(0, prediction)
return prediction
def predict_demand_timeseries(date, model, model_name):
"""
Predict overall daily demand using time-series model.
Args:
date: Date string (YYYY-MM-DD) or datetime object
model: Trained time-series model (ARIMA or Prophet)
model_name: Name of the model ('ARIMA' or 'Prophet')
Returns:
float: Predicted total daily sales quantity
"""
# Convert date to datetime if string
if isinstance(date, str):
date = pd.to_datetime(date)
if model_name == 'ARIMA':
# For ARIMA, we need to calculate how many steps ahead
# This is a simplified approach - in practice, you'd need the training end date
# For now, predict 1 step ahead
try:
forecast = model.forecast(steps=1)
prediction = forecast[0] if hasattr(forecast, '__iter__') else forecast
prediction = max(0, prediction)
return prediction
except Exception as e:
print(f"Error in ARIMA prediction: {e}")
return None
elif model_name == 'Prophet':
# For Prophet, create a future dataframe
try:
future = pd.DataFrame({'ds': [date]})
forecast = model.predict(future)
prediction = forecast['yhat'].iloc[0]
prediction = max(0, prediction)
return prediction
except Exception as e:
print(f"Error in Prophet prediction: {e}")
return None
else:
print(f"Unknown time-series model: {model_name}")
return None
def predict_batch(predictions_data, model, preprocessing_data):
"""
Predict demand for multiple products/dates at once.
Args:
predictions_data: List of dictionaries, each containing:
- product_id
- date
- price
- discount
- category
model: Trained model
preprocessing_data: Dictionary containing encoders and scaler
Returns:
list: List of predicted sales quantities
"""
predictions = []
for data in predictions_data:
pred = predict_demand(
data['product_id'],
data['date'],
data['price'],
data['discount'],
data['category'],
model,
preprocessing_data
)
predictions.append(pred)
return predictions
def main():
"""
Main function for command-line interface.
"""
parser = argparse.ArgumentParser(
description='Predict product demand for a given date and product details',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples (ML Models):
python predict.py --product_id 1 --date 2024-01-15 --price 100 --discount 10 --category Electronics
python predict.py --product_id 5 --date 2024-06-20 --price 50 --discount 0 --category Clothing
Examples (Time-Series Models - overall daily demand):
python predict.py --date 2024-01-15 --model_type timeseries
"""
)
parser.add_argument('--product_id', type=int, default=None,
help='Product ID (required for ML models)')
parser.add_argument('--date', type=str, required=True,
help='Date in YYYY-MM-DD format')
parser.add_argument('--price', type=float, default=None,
help='Product price (required for ML models)')
parser.add_argument('--discount', type=float, default=0,
help='Discount percentage (0-100), default: 0 (for ML models)')
parser.add_argument('--category', type=str, default=None,
help='Product category (required for ML models)')
parser.add_argument('--model_type', type=str, default='auto',
choices=['auto', 'ml', 'timeseries'],
help='Model type to use: auto (best model), ml, or timeseries')
args = parser.parse_args()
# Validate date format
try:
date_obj = pd.to_datetime(args.date)
except ValueError:
print(f"Error: Invalid date format '{args.date}'. Please use YYYY-MM-DD format.")
return
# Load model and preprocessing
try:
model, preprocessing_data, model_name, is_timeseries = load_model_and_preprocessing(args.model_type)
except FileNotFoundError as e:
print(f"Error: {e}")
return
# Validate arguments based on model type
if not is_timeseries:
# ML model requires product details
if args.product_id is None or args.price is None or args.category is None:
print("Error: ML models require --product_id, --price, and --category arguments.")
return
# Validate discount range
if args.discount < 0 or args.discount > 100:
print(f"Warning: Discount {args.discount}% is outside 0-100 range. Clamping to valid range.")
args.discount = max(0, min(100, args.discount))
# Make prediction
print("\n" + "="*60)
print("MAKING PREDICTION")
print("="*60)
print(f"Model: {model_name}")
print(f"Model Type: {'Time-Series' if is_timeseries else 'Machine Learning'}")
print(f"Date: {args.date}")
if not is_timeseries:
print(f"Product ID: {args.product_id}")
print(f"Price: ${args.price:.2f}")
print(f"Discount: {args.discount}%")
print(f"Category: {args.category}")
print("-"*60)
if is_timeseries:
predicted_demand = predict_demand_timeseries(
args.date,
model,
model_name
)
if predicted_demand is None:
print("Error: Failed to make prediction.")
return
print(f"\nPredicted Total Daily Sales Quantity: {predicted_demand:.0f} units")
print("(This is the predicted total demand across all products for this date)")
else:
predicted_demand = predict_demand_ml(
args.product_id,
args.date,
args.price,
args.discount,
args.category,
model,
preprocessing_data
)
print(f"\nPredicted Sales Quantity: {predicted_demand:.0f} units")
print("(This is the predicted demand for this specific product)")
print("="*60)
# Additional information
date_obj = pd.to_datetime(args.date)
day_name = date_obj.strftime('%A')
is_weekend = "Yes" if date_obj.weekday() >= 5 else "No"
print(f"\nDate Information:")
print(f" Day of week: {day_name}")
print(f" Weekend: {is_weekend}")
print(f" Month: {date_obj.strftime('%B')}")
print(f" Quarter: Q{date_obj.quarter}")
if __name__ == "__main__":
main()