Cars / app.py
adeyemi001's picture
Upload app.py with huggingface_hub
bbcf20f verified
from flask import Flask, render_template, request, jsonify
import os
import numpy as np
import pandas as pd
import pickle
import joblib
from sentence_transformers import SentenceTransformer
app = Flask(__name__)
# ===============================
# Configuration
# ===============================
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
EMBEDDINGS_DIR = os.path.join(BASE_DIR, 'embeddings')
MODEL_DIR = os.path.join(BASE_DIR, 'model')
IMAGES_DIR = os.path.join(BASE_DIR, 'static', 'Car Images')
# ===============================
# Load Model and Resources
# ===============================
print("Loading resources...")
# Load the trained LightGBM model
model_path = os.path.join(MODEL_DIR, 'lightgbm_used_car_model.pkl')
model = joblib.load(model_path)
print("βœ… Model loaded")
# Load the sentence transformer model
sentence_model_path = os.path.join(EMBEDDINGS_DIR, 'all_mpnet_base_v2')
sentence_model = SentenceTransformer(sentence_model_path)
print("βœ… Sentence Transformer loaded")
# Load categorical features mapping
categorical_map_path = os.path.join(EMBEDDINGS_DIR, 'categorical_features_map.pkl')
with open(categorical_map_path, 'rb') as f:
categorical_map = pickle.load(f)
print("βœ… Categorical mapping loaded")
# Extract the new encoded columns from the mapping
all_encoded_columns = categorical_map['new_encoded_columns']
# Features used by the model (from your training)
MODEL_FEATURES = [
'name_emb_307', 'name_emb_741', 'origin_local', 'name_emb_559',
'name_emb_618', 'name_emb_207', 'name_emb_661', 'name_emb_766',
'name_emb_541', 'name_emb_518', 'interior_color_coffee brown',
'exterior_color_dark silver'
]
# Get list of car images for background rotation
try:
car_images = [f for f in os.listdir(IMAGES_DIR) if f.endswith('.png')]
print(f"βœ… Found {len(car_images)} car images in {IMAGES_DIR}")
except FileNotFoundError:
print(f"⚠️ Warning: Car Images directory not found at {IMAGES_DIR}")
car_images = []
# ===============================
# Helper Functions
# ===============================
def generate_name_embeddings(car_name):
"""Generate embeddings for car name using the sentence transformer model."""
embedding = sentence_model.encode([car_name], show_progress_bar=False)[0]
return embedding
def encode_categorical_features(origin, transmission, fuel_type, interior_color, exterior_color):
"""
Encode categorical features using one-hot encoding matching training format.
Returns a dictionary with all encoded column names set to 0 or 1.
"""
# Initialize all encoded columns to 0
encoded_dict = {col: 0 for col in all_encoded_columns}
# Set the appropriate columns to 1 based on input values
# Format: feature_value (with drop_first=True applied during training)
# Origin
if origin.lower() == 'local':
encoded_dict['origin_local'] = 1
# Transmission (check what options exist in your encoded columns)
transmission_col = f'transmission_{transmission.lower()}'
if transmission_col in encoded_dict:
encoded_dict[transmission_col] = 1
# Fuel Type
fuel_col = f'fuel_type_{fuel_type.lower()}'
if fuel_col in encoded_dict:
encoded_dict[fuel_col] = 1
# Interior Color
interior_col = f'interior_color_{interior_color.lower()}'
if interior_col in encoded_dict:
encoded_dict[interior_col] = 1
# Exterior Color
exterior_col = f'exterior_color_{exterior_color.lower()}'
if exterior_col in encoded_dict:
encoded_dict[exterior_col] = 1
return encoded_dict
def prepare_features_for_prediction(car_name, origin, transmission, fuel_type,
interior_color, exterior_color):
"""
Prepare all features needed for model prediction.
"""
# 1. Generate name embeddings (768 dimensions for all-mpnet-base-v2)
name_embedding = generate_name_embeddings(car_name)
embedding_dict = {f'name_emb_{i}': name_embedding[i] for i in range(len(name_embedding))}
# 2. Encode categorical features
categorical_dict = encode_categorical_features(
origin, transmission, fuel_type, interior_color, exterior_color
)
# 3. Combine all features
all_features = {**embedding_dict, **categorical_dict}
# 4. Create DataFrame with only the features used by the model
df_input = pd.DataFrame([all_features])
# Ensure all MODEL_FEATURES exist (fill missing with 0)
for feature in MODEL_FEATURES:
if feature not in df_input.columns:
df_input[feature] = 0
# Select only the features the model was trained on
df_model_input = df_input[MODEL_FEATURES]
return df_model_input
# ===============================
# Routes
# ===============================
@app.route('/')
def index():
"""Render the main page."""
return render_template('index.html', images=car_images)
@app.route('/predict', methods=['POST'])
def predict():
"""Handle prediction requests."""
try:
# Get form data
data = request.get_json()
car_name = data.get('car_name', '').strip()
origin = data.get('origin', '').strip()
transmission = data.get('transmission', '').strip()
fuel_type = data.get('fuel_type', '').strip()
interior_color = data.get('interior_color', '').strip()
exterior_color = data.get('exterior_color', '').strip()
# Validate inputs
if not all([car_name, origin, transmission, fuel_type, interior_color, exterior_color]):
return jsonify({
'error': 'All fields are required'
}), 400
# Prepare features
features_df = prepare_features_for_prediction(
car_name, origin, transmission, fuel_type,
interior_color, exterior_color
)
# Make prediction
predicted_price = model.predict(features_df)[0]
# Return result
return jsonify({
'success': True,
'predicted_price': float(predicted_price),
'formatted_price': f'₦{predicted_price:,.2f}'
})
except Exception as e:
return jsonify({
'error': f'Prediction failed: {str(e)}'
}), 500
@app.route('/health')
def health():
"""Health check endpoint."""
return jsonify({'status': 'healthy', 'model_loaded': model is not None})
if __name__ == '__main__':
print("\n" + "="*50)
print("πŸš— Car Price Prediction App Started!")
print("="*50)
print(f"πŸ“‚ Working Directory: {BASE_DIR}")
print(f"πŸ“‚ Images Directory: {IMAGES_DIR}")
print(f"🌐 Access the app at: http://localhost:7860")
print("="*50 + "\n")
app.run(debug=True, host='0.0.0.0', port=7860)