File size: 3,247 Bytes
23aea5e
 
 
 
 
 
 
8f7d281
23aea5e
 
8f7d281
 
 
 
94e7d06
23aea5e
8f7d281
 
23aea5e
 
 
 
 
 
 
 
 
 
 
8f7d281
23aea5e
 
 
 
 
 
 
 
8f7d281
94e7d06
 
f9714df
94e7d06
23aea5e
 
8f7d281
 
 
 
 
 
 
 
 
 
 
c353490
8f7d281
 
 
 
 
 
23aea5e
8f7d281
23aea5e
8f7d281
23aea5e
 
8f7d281
23aea5e
 
 
 
 
8f7d281
 
 
 
 
 
 
 
23aea5e
 
8f7d281
23aea5e
8f7d281
23aea5e
 
 
 
 
 
8f7d281
 
23aea5e
 
f9714df
23aea5e
 
c353490
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import joblib
import pandas as pd
import numpy as np
from flask import Flask, request, jsonify
from flask_cors import CORS

# Initialize Flask app
app = Flask("Engineering College Predictor")
CORS(app)

# 🔷 Load trained model & helpers
model = joblib.load('xgb_best_model.joblib')
label_encoder = joblib.load('label_encoder.joblib')
feature_columns = joblib.load('feature_columns.joblib')
choice_code_map = pd.read_csv('choice_code_map.csv', index_col='Choice Code')

print("✅ Model and helpers loaded.")

# Home route
@app.get('/')
def home():
    return "✅ Welcome to Engineering College Predictor API!"

# Predict route
@app.post('/predict')
def predict():
    try:
        # Parse input JSON
        data = request.get_json()
        print(f"📥 Received data: {data}")

        # Validate input
        required_fields = ['Category', 'Rank', 'Percentage', 'Course Name']
        missing = [f for f in required_fields if f not in data]
        if missing:
            return jsonify({"error": f"Missing fields: {missing}"}), 400

        # Build DataFrame
        df = pd.DataFrame([{
            'Category': data['Category'],
            'Rank': data['Rank'],
            'Percentage': data['Percentage'],
            'Course Name': data['Course Name']
        }])

        # Feature engineering
        df["Rank_log"] = np.log1p(df["Rank"])
        df["Percentage_bin"] = pd.cut(
            df["Percentage"], bins=[0,50,60,70,80,90,100], labels=False
        )

        # One-hot encode and align with training columns
        X_query = pd.get_dummies(df)
        X_query["Rank_log"] = df["Rank_log"]
        X_query["Percentage_bin"] = df["Percentage_bin"]

        # Ensure all training columns exist
        for col in feature_columns:
            if col not in X_query.columns:
                X_query[col] = 0

        X_query = X_query[feature_columns]

        # Predict probabilities
        proba = model.predict_proba(X_query)[0]

        # Get top-20 indices
        top_20_idx = np.argsort(proba)[::-1][:20]

        # Normalize top-20 probabilities
        top_20_probs = proba[top_20_idx]
        top_20_probs_normalized = top_20_probs / top_20_probs.sum() * 100

        results = []
        for rank, (idx, prob) in enumerate(zip(top_20_idx, top_20_probs_normalized), start=1):
            choice_code = label_encoder.inverse_transform([idx])[0]
            if choice_code not in choice_code_map.index:
                college_name = "Unknown"
                course_name = "Unknown"
            else:
                row = choice_code_map.loc[int(choice_code)]
                college_name = row['College Name']
                course_name = row['Course Name']
            results.append({
                "rank": rank,
                "choice_code": int(choice_code),
                "college_name": college_name,
                "course_name": course_name,
                "probability_percent": round(float(prob), 2)
            })

        return jsonify({"top_20_predictions": results})

    except Exception as e:
        import traceback
        traceback.print_exc()
        return jsonify({"error": str(e)}), 500


# Run server
if __name__ == '__main__':
    app.run(debug=False, host='0.0.0.0', port=7860)