Spaces:
Sleeping
Sleeping
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| # Initialize Flask app | |
| app = Flask("Engineering College Predictor") | |
| CORS(app) | |
| # π· Load trained model & helpers | |
| model = joblib.load('xgb_best_model.joblib') | |
| label_encoder = joblib.load('label_encoder.joblib') | |
| feature_columns = joblib.load('feature_columns.joblib') | |
| choice_code_map = pd.read_csv('choice_code_map.csv', index_col='Choice Code') | |
| print("β Model and helpers loaded.") | |
| # Home route | |
| def home(): | |
| return "β Welcome to Engineering College Predictor API!" | |
| # Predict route | |
| def predict(): | |
| try: | |
| # Parse input JSON | |
| data = request.get_json() | |
| print(f"π₯ Received data: {data}") | |
| # Validate input | |
| required_fields = ['Category', 'Rank', 'Percentage', 'Course Name'] | |
| missing = [f for f in required_fields if f not in data] | |
| if missing: | |
| return jsonify({"error": f"Missing fields: {missing}"}), 400 | |
| # Build DataFrame | |
| df = pd.DataFrame([{ | |
| 'Category': data['Category'], | |
| 'Rank': data['Rank'], | |
| 'Percentage': data['Percentage'], | |
| 'Course Name': data['Course Name'] | |
| }]) | |
| # Feature engineering | |
| df["Rank_log"] = np.log1p(df["Rank"]) | |
| df["Percentage_bin"] = pd.cut( | |
| df["Percentage"], bins=[0,50,60,70,80,90,100], labels=False | |
| ) | |
| # One-hot encode and align with training columns | |
| X_query = pd.get_dummies(df) | |
| X_query["Rank_log"] = df["Rank_log"] | |
| X_query["Percentage_bin"] = df["Percentage_bin"] | |
| # Ensure all training columns exist | |
| for col in feature_columns: | |
| if col not in X_query.columns: | |
| X_query[col] = 0 | |
| X_query = X_query[feature_columns] | |
| # Predict probabilities | |
| proba = model.predict_proba(X_query)[0] | |
| # Get top-20 indices | |
| top_20_idx = np.argsort(proba)[::-1][:20] | |
| # Normalize top-20 probabilities | |
| top_20_probs = proba[top_20_idx] | |
| top_20_probs_normalized = top_20_probs / top_20_probs.sum() * 100 | |
| results = [] | |
| for rank, (idx, prob) in enumerate(zip(top_20_idx, top_20_probs_normalized), start=1): | |
| choice_code = label_encoder.inverse_transform([idx])[0] | |
| if choice_code not in choice_code_map.index: | |
| college_name = "Unknown" | |
| course_name = "Unknown" | |
| else: | |
| row = choice_code_map.loc[int(choice_code)] | |
| college_name = row['College Name'] | |
| course_name = row['Course Name'] | |
| results.append({ | |
| "rank": rank, | |
| "choice_code": int(choice_code), | |
| "college_name": college_name, | |
| "course_name": course_name, | |
| "probability_percent": round(float(prob), 2) | |
| }) | |
| return jsonify({"top_20_predictions": results}) | |
| except Exception as e: | |
| import traceback | |
| traceback.print_exc() | |
| return jsonify({"error": str(e)}), 500 | |
| # Run server | |
| if __name__ == '__main__': | |
| app.run(debug=False, host='0.0.0.0', port=7860) |