Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,87 +1,46 @@
|
|
| 1 |
from flask import Flask, request, jsonify
|
| 2 |
-
import
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
app = Flask(__name__)
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
skill_weights = {
|
| 10 |
-
'java': 3, 'python': 3, 'react': 3, 'c++': 3, 'sql': 3, 'aws': 3, 'ai': 3, 'ml': 3,
|
| 11 |
-
'tensorflow': 3, 'nodejs': 3, 'swift': 3, 'ios': 3, 'cybersecurity': 3,
|
| 12 |
-
'devops': 3, 'docker': 3, 'go': 3, 'microservices': 3, 'qa': 3, 'selenium': 3,
|
| 13 |
-
'data science': 3, 'vhdl': 3, 'verilog': 3, 'embedded c': 3, 'rtos': 3,
|
| 14 |
-
'vlsi': 3, 'cad': 3, 'solidworks': 3, 'robotics': 3, 'staadpro': 3,
|
| 15 |
-
'html': 2, 'css': 2, 'javascript': 2, 'ui/ux': 2, 'figma': 2, 'autocad': 2,
|
| 16 |
-
'thermodynamics': 2, 'manufacturing': 2, 'signal processing': 2, 'analog design': 2,
|
| 17 |
-
}
|
| 18 |
-
|
| 19 |
-
# Branch match bonus also remains
|
| 20 |
-
BRANCH_MATCH_BONUS = 10
|
| 21 |
-
|
| 22 |
-
def calculate_alumni_score(viewer_profile, target_profile):
|
| 23 |
-
viewer_skills = set(str(viewer_profile.get('viewer_skills', '')).lower().split('|'))
|
| 24 |
-
target_skills = set(str(target_profile.get('target_skills', '')).lower().split('|'))
|
| 25 |
-
viewer_branch = str(viewer_profile.get('viewer_branch', '')).lower().strip()
|
| 26 |
-
target_branch = str(target_profile.get('target_branch', '')).lower().strip()
|
| 27 |
-
|
| 28 |
-
# 1. Calculate the raw score from common skills and branch bonus (same as before)
|
| 29 |
-
common_skills = viewer_skills.intersection(target_skills)
|
| 30 |
-
skill_score = sum(skill_weights.get(skill, 1) for skill in common_skills)
|
| 31 |
-
|
| 32 |
-
branch_score = 0
|
| 33 |
-
if viewer_branch and target_branch and viewer_branch == target_branch:
|
| 34 |
-
branch_score = BRANCH_MATCH_BONUS
|
| 35 |
-
|
| 36 |
-
base_score = skill_score + branch_score
|
| 37 |
-
|
| 38 |
-
# 2. **THE KEY IMPROVEMENT**: Calculate Jaccard Similarity for skills
|
| 39 |
-
# Jaccard Score = (Number of Shared Skills) / (Total Number of Unique Skills)
|
| 40 |
-
# This measures the *proportion* of similarity.
|
| 41 |
-
union_skills = viewer_skills.union(target_skills)
|
| 42 |
-
if not union_skills:
|
| 43 |
-
jaccard_similarity = 0
|
| 44 |
-
else:
|
| 45 |
-
jaccard_similarity = len(common_skills) / len(union_skills)
|
| 46 |
-
|
| 47 |
-
# 3. Combine the base score with the similarity score.
|
| 48 |
-
# The similarity acts as a powerful multiplier. A low similarity will
|
| 49 |
-
# heavily reduce the score, even if the base_score is high.
|
| 50 |
-
final_score = base_score * jaccard_similarity
|
| 51 |
-
|
| 52 |
-
# 4. Normalize to the 1-10 scale for the UI
|
| 53 |
-
if final_score == 0:
|
| 54 |
-
return 0
|
| 55 |
-
|
| 56 |
-
normalized_score = min(10, math.log(final_score + 1) * 3.0) # Adjusted multiplier for better spread
|
| 57 |
-
|
| 58 |
-
return round(normalized_score, 1)
|
| 59 |
-
|
| 60 |
|
| 61 |
@app.route('/', methods=['POST'])
|
| 62 |
-
def
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
viewer_profile = {
|
| 67 |
-
'viewer_skills': data.get('viewer_skills'),
|
| 68 |
-
'viewer_branch': data.get('viewer_branch')
|
| 69 |
-
}
|
| 70 |
-
target_profile = {
|
| 71 |
-
'target_skills': data.get('target_skills'),
|
| 72 |
-
'target_branch': data.get('target_branch')
|
| 73 |
-
}
|
| 74 |
-
|
| 75 |
-
if not viewer_profile or not target_profile:
|
| 76 |
-
return jsonify({"error": "Missing profile data"}), 400
|
| 77 |
-
|
| 78 |
-
final_score = calculate_alumni_score(viewer_profile, target_profile)
|
| 79 |
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
return jsonify({"error": "An internal error occurred."}), 500
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from flask import Flask, request, jsonify
|
| 2 |
+
from flask_cors import CORS
|
| 3 |
+
import joblib
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import os
|
| 6 |
|
| 7 |
app = Flask(__name__)
|
| 8 |
+
CORS(app)
|
| 9 |
|
| 10 |
+
# In Spaces, files are in the same directory
|
| 11 |
+
model = joblib.load('alumni_match_model.joblib')
|
| 12 |
+
model_columns = joblib.load('model_feature_columns.joblib')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
@app.route('/', methods=['POST'])
|
| 15 |
+
def handler():
|
| 16 |
+
incoming_data = request.get_json()
|
| 17 |
+
df = pd.DataFrame([incoming_data])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
def count_common_skills(row):
|
| 20 |
+
viewer_skills = set(str(row.get('viewer_skills', '')).lower().split('|'))
|
| 21 |
+
target_skills = set(str(row.get('target_skills', '')).lower().split('|'))
|
| 22 |
+
return len(viewer_skills.intersection(target_skills))
|
| 23 |
|
| 24 |
+
df['common_skills_count'] = df.apply(count_common_skills, axis=1)
|
| 25 |
+
df['branch_match'] = (df['viewer_branch'].str.lower() == df['target_branch'].str.lower()).astype(int)
|
|
|
|
| 26 |
|
| 27 |
+
for col in model_columns:
|
| 28 |
+
if col.startswith('company_'):
|
| 29 |
+
df[col] = 0
|
| 30 |
+
|
| 31 |
+
company_name = incoming_data.get('target_company', '')
|
| 32 |
+
if company_name:
|
| 33 |
+
company_col_name = f"company_{company_name}"
|
| 34 |
+
if company_col_name in df.columns:
|
| 35 |
+
df[company_col_name] = 1
|
| 36 |
+
|
| 37 |
+
final_df = df[model_columns]
|
| 38 |
+
prediction_proba = model.predict_proba(final_df)
|
| 39 |
+
match_probability = prediction_proba[0][1]
|
| 40 |
+
final_score = round(match_probability * 10)
|
| 41 |
+
|
| 42 |
+
return jsonify({'score': final_score})
|
| 43 |
+
|
| 44 |
+
# ADD THESE FINAL TWO LINES TO START THE SERVER
|
| 45 |
+
if __name__ == "__main__":
|
| 46 |
+
app.run(host="0.0.0.0", port=7860)
|