Spaces:
Running
Running
| from flask import Flask, render_template, request, jsonify | |
| import os | |
| import numpy as np | |
| import pandas as pd | |
| import pickle | |
| import joblib | |
| from sentence_transformers import SentenceTransformer | |
| app = Flask(__name__) | |
| # =============================== | |
| # Configuration | |
| # =============================== | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| EMBEDDINGS_DIR = os.path.join(BASE_DIR, 'embeddings') | |
| MODEL_DIR = os.path.join(BASE_DIR, 'model') | |
| IMAGES_DIR = os.path.join(BASE_DIR, 'static', 'Car Images') | |
| # =============================== | |
| # Load Model and Resources | |
| # =============================== | |
| print("Loading resources...") | |
| # Load the trained LightGBM model | |
| model_path = os.path.join(MODEL_DIR, 'lightgbm_used_car_model.pkl') | |
| model = joblib.load(model_path) | |
| print("β Model loaded") | |
| # Load the sentence transformer model | |
| sentence_model_path = os.path.join(EMBEDDINGS_DIR, 'all_mpnet_base_v2') | |
| sentence_model = SentenceTransformer(sentence_model_path) | |
| print("β Sentence Transformer loaded") | |
| # Load categorical features mapping | |
| categorical_map_path = os.path.join(EMBEDDINGS_DIR, 'categorical_features_map.pkl') | |
| with open(categorical_map_path, 'rb') as f: | |
| categorical_map = pickle.load(f) | |
| print("β Categorical mapping loaded") | |
| # Extract the new encoded columns from the mapping | |
| all_encoded_columns = categorical_map['new_encoded_columns'] | |
| # Features used by the model (from your training) | |
| MODEL_FEATURES = [ | |
| 'name_emb_307', 'name_emb_741', 'origin_local', 'name_emb_559', | |
| 'name_emb_618', 'name_emb_207', 'name_emb_661', 'name_emb_766', | |
| 'name_emb_541', 'name_emb_518', 'interior_color_coffee brown', | |
| 'exterior_color_dark silver' | |
| ] | |
| # Get list of car images for background rotation | |
| try: | |
| car_images = [f for f in os.listdir(IMAGES_DIR) if f.endswith('.png')] | |
| print(f"β Found {len(car_images)} car images in {IMAGES_DIR}") | |
| except FileNotFoundError: | |
| print(f"β οΈ Warning: Car Images directory not found at {IMAGES_DIR}") | |
| car_images = [] | |
| # =============================== | |
| # Helper Functions | |
| # =============================== | |
| def generate_name_embeddings(car_name): | |
| """Generate embeddings for car name using the sentence transformer model.""" | |
| embedding = sentence_model.encode([car_name], show_progress_bar=False)[0] | |
| return embedding | |
| def encode_categorical_features(origin, transmission, fuel_type, interior_color, exterior_color): | |
| """ | |
| Encode categorical features using one-hot encoding matching training format. | |
| Returns a dictionary with all encoded column names set to 0 or 1. | |
| """ | |
| # Initialize all encoded columns to 0 | |
| encoded_dict = {col: 0 for col in all_encoded_columns} | |
| # Set the appropriate columns to 1 based on input values | |
| # Format: feature_value (with drop_first=True applied during training) | |
| # Origin | |
| if origin.lower() == 'local': | |
| encoded_dict['origin_local'] = 1 | |
| # Transmission (check what options exist in your encoded columns) | |
| transmission_col = f'transmission_{transmission.lower()}' | |
| if transmission_col in encoded_dict: | |
| encoded_dict[transmission_col] = 1 | |
| # Fuel Type | |
| fuel_col = f'fuel_type_{fuel_type.lower()}' | |
| if fuel_col in encoded_dict: | |
| encoded_dict[fuel_col] = 1 | |
| # Interior Color | |
| interior_col = f'interior_color_{interior_color.lower()}' | |
| if interior_col in encoded_dict: | |
| encoded_dict[interior_col] = 1 | |
| # Exterior Color | |
| exterior_col = f'exterior_color_{exterior_color.lower()}' | |
| if exterior_col in encoded_dict: | |
| encoded_dict[exterior_col] = 1 | |
| return encoded_dict | |
| def prepare_features_for_prediction(car_name, origin, transmission, fuel_type, | |
| interior_color, exterior_color): | |
| """ | |
| Prepare all features needed for model prediction. | |
| """ | |
| # 1. Generate name embeddings (768 dimensions for all-mpnet-base-v2) | |
| name_embedding = generate_name_embeddings(car_name) | |
| embedding_dict = {f'name_emb_{i}': name_embedding[i] for i in range(len(name_embedding))} | |
| # 2. Encode categorical features | |
| categorical_dict = encode_categorical_features( | |
| origin, transmission, fuel_type, interior_color, exterior_color | |
| ) | |
| # 3. Combine all features | |
| all_features = {**embedding_dict, **categorical_dict} | |
| # 4. Create DataFrame with only the features used by the model | |
| df_input = pd.DataFrame([all_features]) | |
| # Ensure all MODEL_FEATURES exist (fill missing with 0) | |
| for feature in MODEL_FEATURES: | |
| if feature not in df_input.columns: | |
| df_input[feature] = 0 | |
| # Select only the features the model was trained on | |
| df_model_input = df_input[MODEL_FEATURES] | |
| return df_model_input | |
| # =============================== | |
| # Routes | |
| # =============================== | |
| def index(): | |
| """Render the main page.""" | |
| return render_template('index.html', images=car_images) | |
| def predict(): | |
| """Handle prediction requests.""" | |
| try: | |
| # Get form data | |
| data = request.get_json() | |
| car_name = data.get('car_name', '').strip() | |
| origin = data.get('origin', '').strip() | |
| transmission = data.get('transmission', '').strip() | |
| fuel_type = data.get('fuel_type', '').strip() | |
| interior_color = data.get('interior_color', '').strip() | |
| exterior_color = data.get('exterior_color', '').strip() | |
| # Validate inputs | |
| if not all([car_name, origin, transmission, fuel_type, interior_color, exterior_color]): | |
| return jsonify({ | |
| 'error': 'All fields are required' | |
| }), 400 | |
| # Prepare features | |
| features_df = prepare_features_for_prediction( | |
| car_name, origin, transmission, fuel_type, | |
| interior_color, exterior_color | |
| ) | |
| # Make prediction | |
| predicted_price = model.predict(features_df)[0] | |
| # Return result | |
| return jsonify({ | |
| 'success': True, | |
| 'predicted_price': float(predicted_price), | |
| 'formatted_price': f'β¦{predicted_price:,.2f}' | |
| }) | |
| except Exception as e: | |
| return jsonify({ | |
| 'error': f'Prediction failed: {str(e)}' | |
| }), 500 | |
| def health(): | |
| """Health check endpoint.""" | |
| return jsonify({'status': 'healthy', 'model_loaded': model is not None}) | |
| if __name__ == '__main__': | |
| print("\n" + "="*50) | |
| print("π Car Price Prediction App Started!") | |
| print("="*50) | |
| print(f"π Working Directory: {BASE_DIR}") | |
| print(f"π Images Directory: {IMAGES_DIR}") | |
| print(f"π Access the app at: http://localhost:7860") | |
| print("="*50 + "\n") | |
| app.run(debug=True, host='0.0.0.0', port=7860) |