Spaces:
Sleeping
Sleeping
File size: 3,247 Bytes
23aea5e 8f7d281 23aea5e 8f7d281 94e7d06 23aea5e 8f7d281 23aea5e 8f7d281 23aea5e 8f7d281 94e7d06 f9714df 94e7d06 23aea5e 8f7d281 c353490 8f7d281 23aea5e 8f7d281 23aea5e 8f7d281 23aea5e 8f7d281 23aea5e 8f7d281 23aea5e 8f7d281 23aea5e 8f7d281 23aea5e 8f7d281 23aea5e f9714df 23aea5e c353490 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import joblib
import pandas as pd
import numpy as np
from flask import Flask, request, jsonify
from flask_cors import CORS
# Initialize Flask app
app = Flask("Engineering College Predictor")
CORS(app)
# 🔷 Load trained model & helpers
model = joblib.load('xgb_best_model.joblib')
label_encoder = joblib.load('label_encoder.joblib')
feature_columns = joblib.load('feature_columns.joblib')
choice_code_map = pd.read_csv('choice_code_map.csv', index_col='Choice Code')
print("✅ Model and helpers loaded.")
# Home route
@app.get('/')
def home():
return "✅ Welcome to Engineering College Predictor API!"
# Predict route
@app.post('/predict')
def predict():
try:
# Parse input JSON
data = request.get_json()
print(f"📥 Received data: {data}")
# Validate input
required_fields = ['Category', 'Rank', 'Percentage', 'Course Name']
missing = [f for f in required_fields if f not in data]
if missing:
return jsonify({"error": f"Missing fields: {missing}"}), 400
# Build DataFrame
df = pd.DataFrame([{
'Category': data['Category'],
'Rank': data['Rank'],
'Percentage': data['Percentage'],
'Course Name': data['Course Name']
}])
# Feature engineering
df["Rank_log"] = np.log1p(df["Rank"])
df["Percentage_bin"] = pd.cut(
df["Percentage"], bins=[0,50,60,70,80,90,100], labels=False
)
# One-hot encode and align with training columns
X_query = pd.get_dummies(df)
X_query["Rank_log"] = df["Rank_log"]
X_query["Percentage_bin"] = df["Percentage_bin"]
# Ensure all training columns exist
for col in feature_columns:
if col not in X_query.columns:
X_query[col] = 0
X_query = X_query[feature_columns]
# Predict probabilities
proba = model.predict_proba(X_query)[0]
# Get top-20 indices
top_20_idx = np.argsort(proba)[::-1][:20]
# Normalize top-20 probabilities
top_20_probs = proba[top_20_idx]
top_20_probs_normalized = top_20_probs / top_20_probs.sum() * 100
results = []
for rank, (idx, prob) in enumerate(zip(top_20_idx, top_20_probs_normalized), start=1):
choice_code = label_encoder.inverse_transform([idx])[0]
if choice_code not in choice_code_map.index:
college_name = "Unknown"
course_name = "Unknown"
else:
row = choice_code_map.loc[int(choice_code)]
college_name = row['College Name']
course_name = row['Course Name']
results.append({
"rank": rank,
"choice_code": int(choice_code),
"college_name": college_name,
"course_name": course_name,
"probability_percent": round(float(prob), 2)
})
return jsonify({"top_20_predictions": results})
except Exception as e:
import traceback
traceback.print_exc()
return jsonify({"error": str(e)}), 500
# Run server
if __name__ == '__main__':
app.run(debug=False, host='0.0.0.0', port=7860) |