ChitiN7 commited on
Commit
bb36621
Β·
verified Β·
1 Parent(s): 68e7378

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +258 -0
  2. deploy_to_hf.py +106 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MLPayGrade Hugging Face Spaces Deployment
2
+ # This file will be automatically deployed on Hugging Face Spaces
3
+
4
+ import gradio as gr
5
+ import joblib
6
+ import json
7
+ import pickle
8
+ import pandas as pd
9
+ import numpy as np
10
+ import os
11
+
12
+ # Load model components
13
+ def load_model():
14
+ """Load all saved model components"""
15
+ try:
16
+ # Load model and scaler
17
+ model = joblib.load('best_model.pkl')
18
+ scaler = joblib.load('scaler.pkl')
19
+
20
+ # Load feature names
21
+ with open('feature_names.json', 'r') as f:
22
+ feature_names = json.load(f)
23
+
24
+ # Load deployment functions
25
+ with open('deployment_functions.pkl', 'rb') as f:
26
+ deployment_data = pickle.load(f)
27
+
28
+ return model, scaler, feature_names, deployment_data
29
+ except Exception as e:
30
+ print(f"Error loading model components: {e}")
31
+ return None, None, None, None
32
+
33
+ def engineer_features_simple(job_title, experience_level, company_size, employment_type, company_location, remote_ratio):
34
+ """Simple feature engineering without complex dependencies"""
35
+
36
+ # Basic mappings
37
+ exp_mapping = {"EN": 1, "MI": 2, "SE": 3, "EX": 4}
38
+ size_mapping = {"S": 1, "M": 2, "L": 3}
39
+ emp_mapping = {"FT": 1, "PT": 0.5, "CT": 0.8, "FL": 0.7}
40
+
41
+ # Create features
42
+ features = {}
43
+ features['work_year'] = 2024
44
+ features['experience_level_encoded'] = exp_mapping.get(experience_level, 2)
45
+ features['company_size_encoded'] = size_mapping.get(company_size, 2)
46
+ features['employment_type_encoded'] = emp_mapping.get(employment_type, 1)
47
+ features['remote_ratio'] = remote_ratio
48
+
49
+ # Job title categories (simplified)
50
+ if 'data scientist' in job_title.lower():
51
+ features['job_title_Data_Scientist'] = 1
52
+ elif 'ml engineer' in job_title.lower() or 'machine learning engineer' in job_title.lower():
53
+ features['job_title_ML_Engineer'] = 1
54
+ elif 'ai engineer' in job_title.lower():
55
+ features['job_title_AI_Engineer'] = 1
56
+ elif 'data engineer' in job_title.lower():
57
+ features['job_title_Data_Engineer'] = 1
58
+ elif 'data analyst' in job_title.lower():
59
+ features['job_title_Data_Analyst'] = 1
60
+ else:
61
+ features['job_title_Other'] = 1
62
+
63
+ # Location encoding (simplified)
64
+ if company_location.upper() == 'US':
65
+ features['employee_residence_US'] = 1
66
+ elif company_location.upper() == 'CA':
67
+ features['employee_residence_CA'] = 1
68
+ elif company_location.upper() == 'GB':
69
+ features['employee_residence_GB'] = 1
70
+ else:
71
+ features['employee_residence_Other'] = 1
72
+
73
+ # Interaction features
74
+ features['exp_size_interaction'] = features['experience_level_encoded'] * features['company_size_encoded']
75
+ features['exp_remote_interaction'] = features['experience_level_encoded'] * remote_ratio
76
+ features['size_remote_interaction'] = features['company_size_encoded'] * remote_ratio
77
+
78
+ # Complexity features
79
+ features['job_title_complexity'] = len(job_title.split())
80
+ features['location_diversity'] = 1
81
+
82
+ return features
83
+
84
+ def predict_salary(job_title, experience_level, company_size, employment_type, company_location, remote_ratio):
85
+ """Make salary prediction"""
86
+
87
+ # Load model components
88
+ model, scaler, feature_names, deployment_data = load_model()
89
+
90
+ if model is None:
91
+ return "❌ Error: Failed to load model components", "Model not available"
92
+
93
+ try:
94
+ # Engineer features
95
+ features = engineer_features_simple(
96
+ job_title, experience_level, company_size,
97
+ employment_type, company_location, remote_ratio
98
+ )
99
+
100
+ # Create feature vector
101
+ feature_vector = []
102
+ for feature in feature_names:
103
+ feature_vector.append(features.get(feature, 0))
104
+
105
+ # Scale features
106
+ feature_vector = np.array(feature_vector).reshape(1, -1)
107
+ feature_vector_scaled = scaler.transform(feature_vector)
108
+
109
+ # Make prediction
110
+ prediction = model.predict(feature_vector_scaled)[0]
111
+
112
+ # Format output
113
+ salary_formatted = f"${prediction:,.0f}"
114
+
115
+ # Create explanation
116
+ explanation = f"""
117
+ **Prediction Details:**
118
+ - **Job Title:** {job_title}
119
+ - **Experience Level:** {experience_level}
120
+ - **Company Size:** {company_size}
121
+ - **Employment Type:** {employment_type}
122
+ - **Location:** {company_location}
123
+ - **Remote Work:** {remote_ratio}
124
+
125
+ **Model Information:**
126
+ - **Algorithm:** LightGBM Regressor
127
+ - **Features Used:** {len(feature_names)} clean features
128
+ - **Performance:** RΒ² = 0.2848 (honest, no data leakage)
129
+ - **Data Year:** 2024
130
+
131
+ **Key Features:**
132
+ - Experience Level: {features['experience_level_encoded']}
133
+ - Company Size: {features['company_size_encoded']}
134
+ - Remote Ratio: {remote_ratio}
135
+ - Job Complexity: {features['job_title_complexity']} words
136
+ """
137
+
138
+ return salary_formatted, explanation
139
+
140
+ except Exception as e:
141
+ return f"❌ Error: {str(e)}", "Prediction failed"
142
+
143
+ # Create Gradio interface
144
+ with gr.Blocks(
145
+ title="MLPayGrade Advanced Salary Predictor",
146
+ theme=gr.themes.Soft(),
147
+ css="""
148
+ .gradio-container { max-width: 1200px; margin: 0 auto; }
149
+ .header { text-align: center; margin-bottom: 2rem; }
150
+ .prediction-box { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 2rem; border-radius: 15px; text-align: center; }
151
+ .salary-display { font-size: 3rem; font-weight: bold; margin: 1rem 0; }
152
+ .metrics-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-top: 2rem; }
153
+ .metric-card { background: white; padding: 1rem; border-radius: 10px; text-align: center; }
154
+ """
155
+ ) as demo:
156
+
157
+ gr.Markdown("""
158
+ <div class="header">
159
+ <h1>πŸ’° MLPayGrade Advanced Salary Predictor</h1>
160
+ <h3>AI-Powered Salary Prediction with 85 Clean Features (No Data Leakage)</h3>
161
+ <p>Predict salaries for Machine Learning and AI professionals using our honest, data-leakage-free model</p>
162
+ </div>
163
+ """)
164
+
165
+ with gr.Row():
166
+ with gr.Column(scale=1):
167
+ gr.Markdown("## 🎯 Job Configuration")
168
+
169
+ job_title = gr.Textbox(
170
+ label="Job Title",
171
+ value="Data Scientist",
172
+ placeholder="e.g., Data Scientist, ML Engineer, Research Scientist",
173
+ info="Enter the specific job title"
174
+ )
175
+
176
+ experience_level = gr.Dropdown(
177
+ label="Experience Level",
178
+ choices=["EN", "MI", "SE", "EX"],
179
+ value="SE",
180
+ info="EN=Entry, MI=Mid, SE=Senior, EX=Executive"
181
+ )
182
+
183
+ company_size = gr.Dropdown(
184
+ label="Company Size",
185
+ choices=["S", "M", "L"],
186
+ value="M",
187
+ info="S=Small(<50), M=Medium(50-250), L=Large(>250)"
188
+ )
189
+
190
+ employment_type = gr.Dropdown(
191
+ label="Employment Type",
192
+ choices=["FT", "PT", "CT", "FL"],
193
+ value="FT",
194
+ info="FT=Full-time, PT=Part-time, CT=Contract, FL=Freelance"
195
+ )
196
+
197
+ company_location = gr.Textbox(
198
+ label="Company Location",
199
+ value="US",
200
+ placeholder="e.g., US, CA, GB, AU, DE, FR",
201
+ info="Enter country code"
202
+ )
203
+
204
+ remote_ratio = gr.Slider(
205
+ label="Remote Work Ratio",
206
+ minimum=0.0,
207
+ maximum=1.0,
208
+ value=0.5,
209
+ step=0.5,
210
+ info="0.0=On-site, 0.5=Hybrid, 1.0=Remote"
211
+ )
212
+
213
+ predict_btn = gr.Button("πŸš€ Predict Salary", variant="primary", size="lg")
214
+
215
+ gr.Markdown("---")
216
+ gr.Markdown("## πŸ“Š Model Performance (Corrected)")
217
+ gr.Markdown("**RΒ² Score:** 0.2848")
218
+ gr.Markdown("**MAE:** $44,323.68")
219
+ gr.Markdown("**RMSE:** $64,868.74")
220
+ gr.Markdown("**Status:** No Data Leakage βœ…")
221
+
222
+ with gr.Column(scale=2):
223
+ gr.Markdown("## πŸ“ˆ Prediction Results")
224
+
225
+ with gr.Row():
226
+ salary_output = gr.Textbox(
227
+ label="Predicted Annual Salary",
228
+ value="Enter job details and click Predict",
229
+ scale=2
230
+ )
231
+
232
+ explanation_output = gr.Markdown(
233
+ value="Detailed explanation will appear here after prediction",
234
+ label="πŸ“‹ Prediction Details & Model Information"
235
+ )
236
+
237
+ gr.Markdown("## 🎯 What-If Analysis")
238
+ gr.Markdown("Try changing the parameters above to see how they affect salary predictions!")
239
+
240
+ # Event handlers
241
+ predict_btn.click(
242
+ fn=predict_salary,
243
+ inputs=[job_title, experience_level, company_size, employment_type, company_location, remote_ratio],
244
+ outputs=[salary_output, explanation_output]
245
+ )
246
+
247
+ gr.Markdown("---")
248
+ gr.Markdown("""
249
+ <div style="text-align: center; color: #6c757d;">
250
+ <h4>MLPayGrade Advanced Track - Deployed on Hugging Face Spaces</h4>
251
+ <p><strong>Model:</strong> LightGBM Regressor | <strong>Features:</strong> 85 Clean | <strong>Performance:</strong> RΒ² = 0.2848</p>
252
+ <p><strong>Data Quality:</strong> 2024 ML/AI Job Market | <strong>Validation:</strong> Honest Performance (No Data Leakage)</p>
253
+ </div>
254
+ """)
255
+
256
+ # Launch the app
257
+ if __name__ == "__main__":
258
+ demo.launch()
deploy_to_hf.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ MLPayGrade Hugging Face Deployment Helper
4
+ This script helps prepare and upload your model to Hugging Face Spaces
5
+ """
6
+
7
+ import os
8
+ import shutil
9
+ import subprocess
10
+ import sys
11
+
12
+ def check_files():
13
+ """Check if all required files are present"""
14
+ required_files = [
15
+ 'app.py',
16
+ 'requirements.txt',
17
+ 'best_model.pkl',
18
+ 'scaler.pkl',
19
+ 'feature_names.json',
20
+ 'deployment_functions.pkl',
21
+ 'shap_explainer.pkl',
22
+ 'shap_importance.json'
23
+ ]
24
+
25
+ missing_files = []
26
+ for file in required_files:
27
+ if not os.path.exists(file):
28
+ missing_files.append(file)
29
+
30
+ if missing_files:
31
+ print("❌ Missing required files:")
32
+ for file in missing_files:
33
+ print(f" - {file}")
34
+ return False
35
+
36
+ print("βœ… All required files are present!")
37
+ return True
38
+
39
+ def create_deployment_folder():
40
+ """Create a clean deployment folder"""
41
+ deploy_folder = "hf_deployment"
42
+
43
+ if os.path.exists(deploy_folder):
44
+ shutil.rmtree(deploy_folder)
45
+
46
+ os.makedirs(deploy_folder)
47
+
48
+ # Copy all required files
49
+ files_to_copy = [
50
+ 'app.py',
51
+ 'requirements.txt',
52
+ 'best_model.pkl',
53
+ 'scaler.pkl',
54
+ 'feature_names.json',
55
+ 'deployment_functions.pkl',
56
+ 'shap_explainer.pkl',
57
+ 'shap_importance.json'
58
+ ]
59
+
60
+ for file in files_to_copy:
61
+ if os.path.exists(file):
62
+ shutil.copy2(file, deploy_folder)
63
+ print(f"πŸ“ Copied: {file}")
64
+
65
+ return deploy_folder
66
+
67
+ def main():
68
+ print("πŸš€ MLPayGrade Hugging Face Deployment Helper")
69
+ print("=" * 50)
70
+
71
+ # Check files
72
+ if not check_files():
73
+ print("\n❌ Please ensure all required files are present before deployment.")
74
+ return
75
+
76
+ # Create deployment folder
77
+ deploy_folder = create_deployment_folder()
78
+
79
+ print(f"\nβœ… Deployment folder created: {deploy_folder}")
80
+ print("\nπŸ“‹ Next Steps:")
81
+ print("1. Go to https://huggingface.co/spaces")
82
+ print("2. Click 'Create new Space'")
83
+ print("3. Choose 'Gradio' as SDK")
84
+ print("4. Set Space name (e.g., 'MLPayGrade-Salary-Predictor')")
85
+ print("5. Choose visibility (Public or Private)")
86
+ print("6. Upload all files from the 'hf_deployment' folder")
87
+ print("7. Wait for automatic deployment")
88
+
89
+ print(f"\nπŸ“ Files ready in: {os.path.abspath(deploy_folder)}")
90
+ print("\n🎯 Your app will be available at:")
91
+ print(" https://huggingface.co/spaces/YOUR_USERNAME/SPACE_NAME")
92
+
93
+ # Open deployment folder
94
+ try:
95
+ if sys.platform == "darwin": # macOS
96
+ subprocess.run(["open", deploy_folder])
97
+ elif sys.platform == "win32": # Windows
98
+ subprocess.run(["explorer", deploy_folder])
99
+ else: # Linux
100
+ subprocess.run(["xdg-open", deploy_folder])
101
+ print(f"\nπŸ“‚ Opened deployment folder: {deploy_folder}")
102
+ except:
103
+ print(f"\nπŸ“‚ Deployment folder location: {os.path.abspath(deploy_folder)}")
104
+
105
+ if __name__ == "__main__":
106
+ main()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ joblib>=1.3.0
3
+ pandas>=1.5.0
4
+ numpy>=1.24.0
5
+ scikit-learn>=1.3.0
6
+ lightgbm>=4.0.0