Spaces:

adeyemi001
/

Cars

Running

File size: 7,031 Bytes

bbcf20f

from flask import Flask, render_template, request, jsonify
import os
import numpy as np
import pandas as pd
import pickle
import joblib
from sentence_transformers import SentenceTransformer

app = Flask(__name__)

# ===============================
# Configuration
# ===============================
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
EMBEDDINGS_DIR = os.path.join(BASE_DIR, 'embeddings')
MODEL_DIR = os.path.join(BASE_DIR, 'model')
IMAGES_DIR = os.path.join(BASE_DIR, 'static', 'Car Images')

# ===============================
# Load Model and Resources
# ===============================
print("Loading resources...")

# Load the trained LightGBM model
model_path = os.path.join(MODEL_DIR, 'lightgbm_used_car_model.pkl')
model = joblib.load(model_path)
print("✅ Model loaded")

# Load the sentence transformer model
sentence_model_path = os.path.join(EMBEDDINGS_DIR, 'all_mpnet_base_v2')
sentence_model = SentenceTransformer(sentence_model_path)
print("✅ Sentence Transformer loaded")

# Load categorical features mapping
categorical_map_path = os.path.join(EMBEDDINGS_DIR, 'categorical_features_map.pkl')
with open(categorical_map_path, 'rb') as f:
    categorical_map = pickle.load(f)
print("✅ Categorical mapping loaded")

# Extract the new encoded columns from the mapping
all_encoded_columns = categorical_map['new_encoded_columns']

# Features used by the model (from your training)
MODEL_FEATURES = [
    'name_emb_307', 'name_emb_741', 'origin_local', 'name_emb_559',
    'name_emb_618', 'name_emb_207', 'name_emb_661', 'name_emb_766',
    'name_emb_541', 'name_emb_518', 'interior_color_coffee brown',
    'exterior_color_dark silver'
]

# Get list of car images for background rotation
try:
    car_images = [f for f in os.listdir(IMAGES_DIR) if f.endswith('.png')]
    print(f"✅ Found {len(car_images)} car images in {IMAGES_DIR}")
except FileNotFoundError:
    print(f"⚠️ Warning: Car Images directory not found at {IMAGES_DIR}")
    car_images = []

# ===============================
# Helper Functions
# ===============================
def generate_name_embeddings(car_name):
    """Generate embeddings for car name using the sentence transformer model."""
    embedding = sentence_model.encode([car_name], show_progress_bar=False)[0]
    return embedding

def encode_categorical_features(origin, transmission, fuel_type, interior_color, exterior_color):
    """

    Encode categorical features using one-hot encoding matching training format.

    Returns a dictionary with all encoded column names set to 0 or 1.

    """
    # Initialize all encoded columns to 0
    encoded_dict = {col: 0 for col in all_encoded_columns}
    
    # Set the appropriate columns to 1 based on input values
    # Format: feature_value (with drop_first=True applied during training)
    
    # Origin
    if origin.lower() == 'local':
        encoded_dict['origin_local'] = 1
    
    # Transmission (check what options exist in your encoded columns)
    transmission_col = f'transmission_{transmission.lower()}'
    if transmission_col in encoded_dict:
        encoded_dict[transmission_col] = 1
    
    # Fuel Type
    fuel_col = f'fuel_type_{fuel_type.lower()}'
    if fuel_col in encoded_dict:
        encoded_dict[fuel_col] = 1
    
    # Interior Color
    interior_col = f'interior_color_{interior_color.lower()}'
    if interior_col in encoded_dict:
        encoded_dict[interior_col] = 1
    
    # Exterior Color
    exterior_col = f'exterior_color_{exterior_color.lower()}'
    if exterior_col in encoded_dict:
        encoded_dict[exterior_col] = 1
    
    return encoded_dict

def prepare_features_for_prediction(car_name, origin, transmission, fuel_type, 

                                   interior_color, exterior_color):
    """

    Prepare all features needed for model prediction.

    """
    # 1. Generate name embeddings (768 dimensions for all-mpnet-base-v2)
    name_embedding = generate_name_embeddings(car_name)
    embedding_dict = {f'name_emb_{i}': name_embedding[i] for i in range(len(name_embedding))}
    
    # 2. Encode categorical features
    categorical_dict = encode_categorical_features(
        origin, transmission, fuel_type, interior_color, exterior_color
    )
    
    # 3. Combine all features
    all_features = {**embedding_dict, **categorical_dict}
    
    # 4. Create DataFrame with only the features used by the model
    df_input = pd.DataFrame([all_features])
    
    # Ensure all MODEL_FEATURES exist (fill missing with 0)
    for feature in MODEL_FEATURES:
        if feature not in df_input.columns:
            df_input[feature] = 0
    
    # Select only the features the model was trained on
    df_model_input = df_input[MODEL_FEATURES]
    
    return df_model_input

# ===============================
# Routes
# ===============================
@app.route('/')
def index():
    """Render the main page."""
    return render_template('index.html', images=car_images)

@app.route('/predict', methods=['POST'])
def predict():
    """Handle prediction requests."""
    try:
        # Get form data
        data = request.get_json()
        
        car_name = data.get('car_name', '').strip()
        origin = data.get('origin', '').strip()
        transmission = data.get('transmission', '').strip()
        fuel_type = data.get('fuel_type', '').strip()
        interior_color = data.get('interior_color', '').strip()
        exterior_color = data.get('exterior_color', '').strip()
        
        # Validate inputs
        if not all([car_name, origin, transmission, fuel_type, interior_color, exterior_color]):
            return jsonify({
                'error': 'All fields are required'
            }), 400
        
        # Prepare features
        features_df = prepare_features_for_prediction(
            car_name, origin, transmission, fuel_type, 
            interior_color, exterior_color
        )
        
        # Make prediction
        predicted_price = model.predict(features_df)[0]
        
        # Return result
        return jsonify({
            'success': True,
            'predicted_price': float(predicted_price),
            'formatted_price': f'₦{predicted_price:,.2f}'
        })
        
    except Exception as e:
        return jsonify({
            'error': f'Prediction failed: {str(e)}'
        }), 500

@app.route('/health')
def health():
    """Health check endpoint."""
    return jsonify({'status': 'healthy', 'model_loaded': model is not None})

if __name__ == '__main__':
    print("\n" + "="*50)
    print("🚗 Car Price Prediction App Started!")
    print("="*50)
    print(f"📂 Working Directory: {BASE_DIR}")
    print(f"📂 Images Directory: {IMAGES_DIR}")
    print(f"🌐 Access the app at: http://localhost:7860")
    print("="*50 + "\n")
    app.run(debug=True, host='0.0.0.0', port=7860)