from flask import Flask, render_template, request, jsonify import os import numpy as np import pandas as pd import pickle import joblib from sentence_transformers import SentenceTransformer app = Flask(__name__) # =============================== # Configuration # =============================== BASE_DIR = os.path.dirname(os.path.abspath(__file__)) EMBEDDINGS_DIR = os.path.join(BASE_DIR, 'embeddings') MODEL_DIR = os.path.join(BASE_DIR, 'model') IMAGES_DIR = os.path.join(BASE_DIR, 'static', 'Car Images') # =============================== # Load Model and Resources # =============================== print("Loading resources...") # Load the trained LightGBM model model_path = os.path.join(MODEL_DIR, 'lightgbm_used_car_model.pkl') model = joblib.load(model_path) print("✅ Model loaded") # Load the sentence transformer model sentence_model_path = os.path.join(EMBEDDINGS_DIR, 'all_mpnet_base_v2') sentence_model = SentenceTransformer(sentence_model_path) print("✅ Sentence Transformer loaded") # Load categorical features mapping categorical_map_path = os.path.join(EMBEDDINGS_DIR, 'categorical_features_map.pkl') with open(categorical_map_path, 'rb') as f: categorical_map = pickle.load(f) print("✅ Categorical mapping loaded") # Extract the new encoded columns from the mapping all_encoded_columns = categorical_map['new_encoded_columns'] # Features used by the model (from your training) MODEL_FEATURES = [ 'name_emb_307', 'name_emb_741', 'origin_local', 'name_emb_559', 'name_emb_618', 'name_emb_207', 'name_emb_661', 'name_emb_766', 'name_emb_541', 'name_emb_518', 'interior_color_coffee brown', 'exterior_color_dark silver' ] # Get list of car images for background rotation try: car_images = [f for f in os.listdir(IMAGES_DIR) if f.endswith('.png')] print(f"✅ Found {len(car_images)} car images in {IMAGES_DIR}") except FileNotFoundError: print(f"⚠️ Warning: Car Images directory not found at {IMAGES_DIR}") car_images = [] # =============================== # Helper Functions # =============================== def generate_name_embeddings(car_name): """Generate embeddings for car name using the sentence transformer model.""" embedding = sentence_model.encode([car_name], show_progress_bar=False)[0] return embedding def encode_categorical_features(origin, transmission, fuel_type, interior_color, exterior_color): """ Encode categorical features using one-hot encoding matching training format. Returns a dictionary with all encoded column names set to 0 or 1. """ # Initialize all encoded columns to 0 encoded_dict = {col: 0 for col in all_encoded_columns} # Set the appropriate columns to 1 based on input values # Format: feature_value (with drop_first=True applied during training) # Origin if origin.lower() == 'local': encoded_dict['origin_local'] = 1 # Transmission (check what options exist in your encoded columns) transmission_col = f'transmission_{transmission.lower()}' if transmission_col in encoded_dict: encoded_dict[transmission_col] = 1 # Fuel Type fuel_col = f'fuel_type_{fuel_type.lower()}' if fuel_col in encoded_dict: encoded_dict[fuel_col] = 1 # Interior Color interior_col = f'interior_color_{interior_color.lower()}' if interior_col in encoded_dict: encoded_dict[interior_col] = 1 # Exterior Color exterior_col = f'exterior_color_{exterior_color.lower()}' if exterior_col in encoded_dict: encoded_dict[exterior_col] = 1 return encoded_dict def prepare_features_for_prediction(car_name, origin, transmission, fuel_type, interior_color, exterior_color): """ Prepare all features needed for model prediction. """ # 1. Generate name embeddings (768 dimensions for all-mpnet-base-v2) name_embedding = generate_name_embeddings(car_name) embedding_dict = {f'name_emb_{i}': name_embedding[i] for i in range(len(name_embedding))} # 2. Encode categorical features categorical_dict = encode_categorical_features( origin, transmission, fuel_type, interior_color, exterior_color ) # 3. Combine all features all_features = {**embedding_dict, **categorical_dict} # 4. Create DataFrame with only the features used by the model df_input = pd.DataFrame([all_features]) # Ensure all MODEL_FEATURES exist (fill missing with 0) for feature in MODEL_FEATURES: if feature not in df_input.columns: df_input[feature] = 0 # Select only the features the model was trained on df_model_input = df_input[MODEL_FEATURES] return df_model_input # =============================== # Routes # =============================== @app.route('/') def index(): """Render the main page.""" return render_template('index.html', images=car_images) @app.route('/predict', methods=['POST']) def predict(): """Handle prediction requests.""" try: # Get form data data = request.get_json() car_name = data.get('car_name', '').strip() origin = data.get('origin', '').strip() transmission = data.get('transmission', '').strip() fuel_type = data.get('fuel_type', '').strip() interior_color = data.get('interior_color', '').strip() exterior_color = data.get('exterior_color', '').strip() # Validate inputs if not all([car_name, origin, transmission, fuel_type, interior_color, exterior_color]): return jsonify({ 'error': 'All fields are required' }), 400 # Prepare features features_df = prepare_features_for_prediction( car_name, origin, transmission, fuel_type, interior_color, exterior_color ) # Make prediction predicted_price = model.predict(features_df)[0] # Return result return jsonify({ 'success': True, 'predicted_price': float(predicted_price), 'formatted_price': f'₦{predicted_price:,.2f}' }) except Exception as e: return jsonify({ 'error': f'Prediction failed: {str(e)}' }), 500 @app.route('/health') def health(): """Health check endpoint.""" return jsonify({'status': 'healthy', 'model_loaded': model is not None}) if __name__ == '__main__': print("\n" + "="*50) print("🚗 Car Price Prediction App Started!") print("="*50) print(f"📂 Working Directory: {BASE_DIR}") print(f"📂 Images Directory: {IMAGES_DIR}") print(f"🌐 Access the app at: http://localhost:7860") print("="*50 + "\n") app.run(debug=True, host='0.0.0.0', port=7860)