Pushpak21 commited on
Commit
8f7d281
·
verified ·
1 Parent(s): f9714df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -14
app.py CHANGED
@@ -5,14 +5,17 @@ from flask import Flask, request, jsonify
5
  from flask_cors import CORS
6
 
7
  # Initialize Flask app
8
- app = Flask("Engineering College General Predictor")
9
  CORS(app)
10
 
11
- # Load trained pipeline & label encoder & choice_code_map
12
- pipeline = joblib.load('pipeline.joblib')
13
- target_encoder = joblib.load('label_encoder.joblib')
 
14
  choice_code_map = pd.read_csv('choice_code_map.csv', index_col='Choice Code')
15
 
 
 
16
  # Home route
17
  @app.get('/')
18
  def home():
@@ -24,6 +27,7 @@ def predict():
24
  try:
25
  # Parse input JSON
26
  data = request.get_json()
 
27
 
28
  # Validate input
29
  required_fields = ['Category', 'Rank', 'Percentage', 'Course Name']
@@ -32,40 +36,64 @@ def predict():
32
  return jsonify({"error": f"Missing fields: {missing}"}), 400
33
 
34
  # Build DataFrame
35
- sample_df = pd.DataFrame([{
36
  'Category': data['Category'],
37
  'Rank': data['Rank'],
38
  'Percentage': data['Percentage'],
39
  'Course Name': data['Course Name']
40
  }])
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Predict probabilities
43
- proba = pipeline.predict_proba(sample_df)[0]
44
 
45
- # Get top-20 indices (highest probabilities)
46
  top_20_idx = np.argsort(proba)[::-1][:20]
47
 
48
- # Normalize top-20 probs to sum to 100
49
  top_20_probs = proba[top_20_idx]
50
  top_20_probs_normalized = top_20_probs / top_20_probs.sum() * 100
51
 
52
  results = []
53
  for rank, (idx, prob) in enumerate(zip(top_20_idx, top_20_probs_normalized), start=1):
54
- choice_code = target_encoder.inverse_transform([idx])[0]
55
- row = choice_code_map.loc[int(choice_code)]
56
- college_name = row['College Name']
57
- course_name = row['Course Name']
 
 
 
 
58
  results.append({
59
  "rank": rank,
60
- "choice_code": choice_code,
61
  "college_name": college_name,
62
- "course name": course_name,
63
  "probability_percent": round(float(prob), 2)
64
  })
65
 
66
  return jsonify({"top_20_predictions": results})
67
 
68
  except Exception as e:
 
 
69
  return jsonify({"error": str(e)}), 500
70
 
71
 
 
5
  from flask_cors import CORS
6
 
7
  # Initialize Flask app
8
+ app = Flask("Engineering College Predictor")
9
  CORS(app)
10
 
11
+ # 🔷 Load trained model & helpers
12
+ model = joblib.load('xgb_best_model.joblib')
13
+ label_encoder = joblib.load('label_encoder.joblib')
14
+ feature_columns = joblib.load('feature_columns.joblib')
15
  choice_code_map = pd.read_csv('choice_code_map.csv', index_col='Choice Code')
16
 
17
+ print("✅ Model and helpers loaded.")
18
+
19
  # Home route
20
  @app.get('/')
21
  def home():
 
27
  try:
28
  # Parse input JSON
29
  data = request.get_json()
30
+ print(f"📥 Received data: {data}")
31
 
32
  # Validate input
33
  required_fields = ['Category', 'Rank', 'Percentage', 'Course Name']
 
36
  return jsonify({"error": f"Missing fields: {missing}"}), 400
37
 
38
  # Build DataFrame
39
+ df = pd.DataFrame([{
40
  'Category': data['Category'],
41
  'Rank': data['Rank'],
42
  'Percentage': data['Percentage'],
43
  'Course Name': data['Course Name']
44
  }])
45
 
46
+ # Feature engineering
47
+ df["Rank_log"] = np.log1p(df["Rank"])
48
+ df["Percentage_bin"] = pd.cut(
49
+ df["Percentage"], bins=[0,50,60,70,80,90,100], labels=False
50
+ )
51
+
52
+ # One-hot encode and align with training columns
53
+ X_query = pd.get_dummies(df)
54
+ X_query["Rank_log"] = df["Rank_log"]
55
+ X_query["Percentage_bin"] = df["Percentage_bin"]
56
+
57
+ # Ensure all training columns exist
58
+ for col in feature_columns:
59
+ if col not in X_query.columns:
60
+ X_query[col] = 0
61
+
62
+ X_query = X_query[feature_columns]
63
+
64
  # Predict probabilities
65
+ proba = model.predict_proba(X_query)[0]
66
 
67
+ # Get top-20 indices
68
  top_20_idx = np.argsort(proba)[::-1][:20]
69
 
70
+ # Normalize top-20 probabilities
71
  top_20_probs = proba[top_20_idx]
72
  top_20_probs_normalized = top_20_probs / top_20_probs.sum() * 100
73
 
74
  results = []
75
  for rank, (idx, prob) in enumerate(zip(top_20_idx, top_20_probs_normalized), start=1):
76
+ choice_code = label_encoder.inverse_transform([idx])[0]
77
+ if choice_code not in choice_code_map.index:
78
+ college_name = "Unknown"
79
+ course_name = "Unknown"
80
+ else:
81
+ row = choice_code_map.loc[int(choice_code)]
82
+ college_name = row['College Name']
83
+ course_name = row['Course Name']
84
  results.append({
85
  "rank": rank,
86
+ "choice_code": int(choice_code),
87
  "college_name": college_name,
88
+ "course_name": course_name,
89
  "probability_percent": round(float(prob), 2)
90
  })
91
 
92
  return jsonify({"top_20_predictions": results})
93
 
94
  except Exception as e:
95
+ import traceback
96
+ traceback.print_exc()
97
  return jsonify({"error": str(e)}), 500
98
 
99