Spaces:

Pushpak21
/

EngineeringGeneral

Sleeping

App Files Files Community

Pushpak21 commited on Jul 20, 2025

Commit

8f7d281

verified ·

1 Parent(s): f9714df

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -14

app.py CHANGED Viewed

@@ -5,14 +5,17 @@ from flask import Flask, request, jsonify
 from flask_cors import CORS
 # Initialize Flask app
-app = Flask("Engineering College General Predictor")
 CORS(app)
-# Load trained pipeline & label encoder & choice_code_map
-pipeline = joblib.load('pipeline.joblib')
-target_encoder = joblib.load('label_encoder.joblib')
 choice_code_map = pd.read_csv('choice_code_map.csv', index_col='Choice Code')
 # Home route
 @app.get('/')
 def home():
@@ -24,6 +27,7 @@ def predict():
     try:
         # Parse input JSON
         data = request.get_json()
         # Validate input
         required_fields = ['Category', 'Rank', 'Percentage', 'Course Name']
@@ -32,40 +36,64 @@ def predict():
             return jsonify({"error": f"Missing fields: {missing}"}), 400
         # Build DataFrame
-        sample_df = pd.DataFrame([{
             'Category': data['Category'],
             'Rank': data['Rank'],
             'Percentage': data['Percentage'],
             'Course Name': data['Course Name']
         }])
         # Predict probabilities
-        proba = pipeline.predict_proba(sample_df)[0]
-        # Get top-20 indices (highest probabilities)
         top_20_idx = np.argsort(proba)[::-1][:20]
-        # Normalize top-20 probs to sum to 100
         top_20_probs = proba[top_20_idx]
         top_20_probs_normalized = top_20_probs / top_20_probs.sum() * 100
         results = []
         for rank, (idx, prob) in enumerate(zip(top_20_idx, top_20_probs_normalized), start=1):
-            choice_code = target_encoder.inverse_transform([idx])[0]
-            row = choice_code_map.loc[int(choice_code)]
-            college_name = row['College Name']
-            course_name = row['Course Name']
             results.append({
                 "rank": rank,
-                "choice_code": choice_code,
                 "college_name": college_name,
-                "course name": course_name,
                 "probability_percent": round(float(prob), 2)
             })
         return jsonify({"top_20_predictions": results})
     except Exception as e:
         return jsonify({"error": str(e)}), 500

 from flask_cors import CORS
 # Initialize Flask app
+app = Flask("Engineering College Predictor")
 CORS(app)
+# 🔷 Load trained model & helpers
+model = joblib.load('xgb_best_model.joblib')
+label_encoder = joblib.load('label_encoder.joblib')
+feature_columns = joblib.load('feature_columns.joblib')
 choice_code_map = pd.read_csv('choice_code_map.csv', index_col='Choice Code')
+print("✅ Model and helpers loaded.")
 # Home route
 @app.get('/')
 def home():
     try:
         # Parse input JSON
         data = request.get_json()
+        print(f"📥 Received data: {data}")
         # Validate input
         required_fields = ['Category', 'Rank', 'Percentage', 'Course Name']
             return jsonify({"error": f"Missing fields: {missing}"}), 400
         # Build DataFrame
+        df = pd.DataFrame([{
             'Category': data['Category'],
             'Rank': data['Rank'],
             'Percentage': data['Percentage'],
             'Course Name': data['Course Name']
         }])
+        # Feature engineering
+        df["Rank_log"] = np.log1p(df["Rank"])
+        df["Percentage_bin"] = pd.cut(
+            df["Percentage"], bins=[0,50,60,70,80,90,100], labels=False
+        )
+        # One-hot encode and align with training columns
+        X_query = pd.get_dummies(df)
+        X_query["Rank_log"] = df["Rank_log"]
+        X_query["Percentage_bin"] = df["Percentage_bin"]
+        # Ensure all training columns exist
+        for col in feature_columns:
+            if col not in X_query.columns:
+                X_query[col] = 0
+        X_query = X_query[feature_columns]
         # Predict probabilities
+        proba = model.predict_proba(X_query)[0]
+        # Get top-20 indices
         top_20_idx = np.argsort(proba)[::-1][:20]
+        # Normalize top-20 probabilities
         top_20_probs = proba[top_20_idx]
         top_20_probs_normalized = top_20_probs / top_20_probs.sum() * 100
         results = []
         for rank, (idx, prob) in enumerate(zip(top_20_idx, top_20_probs_normalized), start=1):
+            choice_code = label_encoder.inverse_transform([idx])[0]
+            if choice_code not in choice_code_map.index:
+                college_name = "Unknown"
+                course_name = "Unknown"
+            else:
+                row = choice_code_map.loc[int(choice_code)]
+                college_name = row['College Name']
+                course_name = row['Course Name']
             results.append({
                 "rank": rank,
+                "choice_code": int(choice_code),
                 "college_name": college_name,
+                "course_name": course_name,
                 "probability_percent": round(float(prob), 2)
             })
         return jsonify({"top_20_predictions": results})
     except Exception as e:
+        import traceback
+        traceback.print_exc()
         return jsonify({"error": str(e)}), 500