Mandr1 commited on
Commit
c15ef70
·
verified ·
1 Parent(s): bea9034

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -179
app.py CHANGED
@@ -6,216 +6,172 @@ import joblib
6
  import traceback
7
 
8
  # ------------------------------
9
- # Helper: safe load joblib with message
10
  # ------------------------------
11
  def safe_load(path, name):
12
  try:
13
  obj = joblib.load(path)
14
  print(f"✅ {name} loaded from {path}")
15
  return obj
16
- except FileNotFoundError:
17
- print(f"❌ Error: '{path}' not found. Please ensure it's in the same directory.")
18
- raise
19
  except Exception as e:
20
- print(f"❌ Error loading {path}: {e}")
21
  raise
22
 
23
- # ==============================
24
- # BAGIAN 0: LOAD PREPROCESSOR & MODELS
25
- # ==============================
26
- print("Loading saved scikit-learn models and preprocessor...")
27
- preprocessor = safe_load('preprocessor.pkl', 'Preprocessor')
28
-
29
- lr_model = safe_load('lr_model.pkl', 'Linear Regression model')
30
- dt_model = safe_load('dt_model.pkl', 'Decision Tree model')
31
- rf_model = safe_load('rf_model.pkl', 'Random Forest model')
32
 
33
  loaded_models = {
34
- 'Linear Regression': lr_model,
35
- 'Decision Tree': dt_model,
36
- 'Random Forest': rf_model
37
  }
38
 
39
- # ==============================
40
- # BAGIAN: LOAD BENCHMARK CSV & WILAYAH
41
- # ==============================
42
- pd_df_clean = None
43
  try:
44
- pd_df_raw = pd.read_csv('job_salary_mean.csv')
45
- pd_df_clean = pd_df_raw.rename(columns={
46
  "Judul Pekerjaan": "judul",
47
  "Perusahaan": "perusahaan",
48
  "Lokasi": "lokasi",
49
  "Gaji_Rata2": "gaji"
50
  })
51
- pd_df_clean['judul_clean'] = pd_df_clean['judul'].astype(str).str.lower()
52
- pd_df_clean['lokasi_clean'] = pd_df_clean['lokasi'].astype(str).str.lower()
53
- pd_df_clean = pd_df_clean.dropna(subset=['judul_clean','lokasi_clean','gaji'])
54
- print(f"✅ Pandas DataFrame for benchmarks loaded and cleaned. Total rows: {len(pd_df_clean)}")
55
- except FileNotFoundError:
56
- print("❌ Error: 'job_salary_mean.csv' not found. Please ensure it's in the same directory.")
57
- pd_df_clean = pd.DataFrame(columns=['judul_clean','lokasi_clean','gaji'])
58
-
59
- # Load wilayah
60
- kamus_wilayah = {}
 
61
  try:
62
- geo_df = pd.read_csv('dataset kabupaten indonesia.csv')
63
- geo_df = geo_df[['name', 'Unnamed: 3']].rename(columns={'name': 'kota', 'Unnamed: 3': 'provinsi'})
64
- geo_df['kota_clean'] = geo_df['kota'].astype(str).str.replace('KABUPATEN ', '', regex=False)\
65
- .str.replace('KOTA ', '', regex=False)\
66
- .str.lower().str.strip()
67
- geo_df['provinsi'] = geo_df['provinsi'].astype(str).str.upper().str.strip()
68
- kamus_wilayah = pd.Series(geo_df.provinsi.values, index=geo_df.kota_clean).to_dict()
69
- print(f"✅ Berhasil memuat {len(kamus_wilayah)} wilayah administrasi Indonesia.")
70
- except FileNotFoundError:
71
- print("❌ WARNING: File 'dataset kabupaten indonesia.csv' tidak ditemukan. Fitur deteksi lokasi manual masih bisa digunakan.")
72
- kamus_wilayah = {}
73
-
74
- def get_pulau_from_provinsi(provinsi):
75
- p = provinsi.upper()
76
- if any(x in p for x in ['JAWA', 'DKI', 'BANTEN', 'YOGYAKARTA']): return "PULAU JAWA"
77
- if any(x in p for x in ['SUMATERA', 'ACEH', 'RIAU', 'JAMBI', 'BENGKULU', 'LAMPUNG', 'BANGKA']): return "PULAU SUMATERA"
78
- if any(x in p for x in ['KALIMANTAN']): return "PULAU KALIMANTAN"
79
- if any(x in p for x in ['SULAWESI', 'GORONTALO']): return "PULAU SULAWESI"
80
- if any(x in p for x in ['BALI', 'NUSA TENGGARA']): return "BALI & NUSA TENGGARA"
81
- if any(x in p for x in ['PAPUA', 'MALUKU']): return "PAPUA & MALUKU"
82
- return "INDONESIA (LAINNYA)"
83
-
84
- def deteksi_info_lokasi(input_user):
85
- text = str(input_user).lower().strip()
86
- provinsi_terdeteksi = "INDONESIA"
87
- for kota_db, prov_db in kamus_wilayah.items():
88
- if kota_db in text:
89
- provinsi_terdeteksi = prov_db
90
- break
91
- pulau_terdeteksi = get_pulau_from_provinsi(provinsi_terdeteksi)
92
- return provinsi_terdeteksi, pulau_terdeteksi
93
-
94
- # ==============================
95
- # FUNGSI UTAMA: analisis_gaji_final
96
- # ==============================
97
- def analisis_gaji_final(judul_input, lokasi_input, model_choice):
 
98
  try:
99
- # Safety for empty inputs
100
- if not judul_input or not lokasi_input:
101
- return ("<div style='color:#9a1f1f; padding:12px;'><b>Masukkan posisi dan lokasi terlebih dahulu.</b></div>", None)
102
-
103
- model_pipeline = loaded_models.get(model_choice)
104
- if model_pipeline is None:
105
- return (f"<div style='color:#9a1f1f; padding:12px;'><b>Model '{model_choice}' tidak tersedia.</b></div>", None)
106
-
107
- input_df = pd.DataFrame({
108
- 'judul_clean': [str(judul_input).lower()],
109
- 'lokasi_clean': [str(lokasi_input).lower()],
110
- 'perusahaan': ['unknown_company_for_prediction']
111
  })
112
 
113
- # If your preprocessor expects different feature names, ensure alignment here.
114
  try:
115
- prediksi_user = model_pipeline.predict(input_df)[0]
116
- prediksi_user = max(0, float(prediksi_user))
117
  except Exception as e:
118
- tb = traceback.format_exc()
119
- print("Prediction error:", tb)
120
- return (f"<div style='color:#9a1f1f; padding:12px;'><b>Gagal memprediksi:</b> {str(e)}</div>", None)
121
-
122
- # Benchmark logic
123
- judul_lower = str(judul_input).lower()
124
- filtered_jobs = pd_df_clean[pd_df_clean['judul_clean'].str.contains(judul_lower, na=False)]
125
- if not filtered_jobs.empty:
126
- max_gaji_job = float(filtered_jobs['gaji'].max())
127
- else:
128
- max_gaji_job = prediksi_user * 1.2
129
-
130
- provinsi_found, pulau_found = deteksi_info_lokasi(lokasi_input)
131
- keyword_pencarian = pulau_found.replace("PULAU ", "").lower()
132
- filtered_locations = pd_df_clean[pd_df_clean['lokasi_clean'].str.contains(keyword_pencarian, na=False)]
133
- if not filtered_locations.empty:
134
- max_gaji_region = float(filtered_locations['gaji'].max())
135
- else:
136
- max_gaji_region = prediksi_user * 1.5
137
-
138
- # Visualisasi (matplotlib)
139
- fig, ax = plt.subplots(figsize=(9,4.6))
140
- labels = [f"Estimasi Anda\n({lokasi_input})", f"Max Posisi\n(Nasional)", f"Max Regional\n({pulau_found})"]
141
- values = [prediksi_user, max_gaji_job, max_gaji_region]
142
- # subtle colors
143
- colors = ['#60a5fa', '#94a3b8', '#fbbf24']
144
-
145
- bars = ax.bar(labels, values, color=colors, edgecolor='none', alpha=0.95)
146
- ax.axhline(y=prediksi_user, color='#2563eb', linestyle='--', linewidth=1)
147
- ax.set_ylabel("Gaji (Rupiah)")
148
- ax.set_title(f"Analisis Gaji: {judul_input} — {provinsi_found} | Model: {model_choice}", fontsize=12)
149
- ax.grid(axis='y', linestyle='--', alpha=0.4)
150
- for bar in bars:
151
- height = bar.get_height()
152
- ax.text(bar.get_x() + bar.get_width()/2., height + (max(values)*0.015),
153
- f'Rp {int(height):,}', ha='center', va='bottom', fontsize=9)
154
-
155
- # HTML card hasil
156
- html_output = f"""
157
- <div style="font-family: Inter, system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial;
158
- padding:18px; border-radius:12px; background: linear-gradient(180deg, #ffffff 0%, #fbfbfc 100%);
159
- box-shadow: 0px 6px 20px rgba(16,24,40,0.04); color:#0f172a;">
160
- <h2 style="margin:0 0 6px 0; font-size:18px; color:#0f172a;">💰 Estimasi Gaji: <span style="color:#0b6fb7;">Rp {int(prediksi_user):,}</span></h2>
161
- <div style="font-size:13px; color:#475569; margin-bottom:10px;">
162
- 📍 <b>{provinsi_found}</b> / {pulau_found} &nbsp; • &nbsp; Model: <b>{model_choice}</b>
163
- </div>
164
- <div style="padding:10px; border-radius:8px; background:#f8fafc; color:#0f172a; font-size:13px;">
165
- Berdasarkan data historis, batas atas untuk posisi <b>{judul_input}</b> (nasional) mencapai <b>Rp {int(max_gaji_job):,}</b>.
166
- Untuk regional ({pulau_found}) tertinggi tercatat Rp <b>{int(max_gaji_region):,}</b>.
167
- </div>
168
  </div>
169
  """
170
- plt.tight_layout()
171
- return html_output, fig
172
 
173
  except Exception as e:
174
- tb = traceback.format_exc()
175
- print("Unhandled error in analisis_gaji_final:", tb)
176
- return (f"<div style='color:#9a1f1f; padding:12px;'><b>Terjadi kesalahan:</b> {str(e)}</div>", None)
177
-
178
- # ==============================
179
- # GRADIO UI - with custom CSS for subtle/elegant look
180
- # ==============================
181
- custom_css = """
182
- :root{
183
- --primary:#0b6fb7;
184
- --muted:#94a3b8;
185
- --card-bg: #ffffff;
186
- --accent: #f8fafc;
187
- }
188
- body { font-family: Inter, system-ui, -apple-system, 'Segoe UI', Roboto, 'Helvetica Neue', Arial; }
189
- .gradio-container { max-width: 1100px; margin: 18px auto; }
190
- .header { display:flex; align-items:center; gap:12px; margin-bottom:8px; }
191
- .small-brand { font-weight:700; color:var(--primary); font-size:20px; }
192
- .description { color:var(--muted); margin-bottom:14px; }
193
- .input-box .gr-textbox { border-radius:10px; }
194
- .gr-button { border-radius:10px; padding:10px 14px; font-weight:600; }
195
- .result-card { border-radius:12px; padding:6px; }
196
  """
197
 
198
- with gr.Blocks(title="Salary AI — Elegant", css=custom_css) as demo:
199
- with gr.Column():
200
- with gr.Row(elem_id="top-row"):
201
- with gr.Column(scale=2):
202
- gr.Markdown("<div class='header'><div class='small-brand'>🇮🇩 Salary AI</div></div>")
203
- gr.Markdown("<div class='description'>Prediksi gaji berbasis machine learning + data benchmark wilayah Indonesia. Masukkan posisi pekerjaan dan kabupaten/kota untuk analisis.</div>")
204
- with gr.Row():
205
- t1 = gr.Textbox(label="Posisi Pekerjaan", placeholder="Contoh: Guru, Driver, Manager", elem_id="t1")
206
- t2 = gr.Textbox(label="Kabupaten / Kota", placeholder="Contoh: Simeulue, Surakarta, Malang", elem_id="t2")
207
- model_selector = gr.Dropdown(label="Pilih Model Prediksi",
208
- choices=list(loaded_models.keys()),
209
- value='Random Forest',
210
- interactive=True)
211
- btn = gr.Button("🔍 Analisis Sekarang", variant="primary")
212
- with gr.Column(scale=1):
213
- gr.Markdown("### Hasil")
214
- out_html = gr.HTML()
215
- out_plot = gr.Plot()
216
- # Connect
217
- btn.click(analisis_gaji_final, inputs=[t1, t2, model_selector], outputs=[out_html, out_plot])
 
218
 
219
  if __name__ == "__main__":
220
- print("Menjalankan Aplikasi Final...")
221
  demo.launch(share=True, debug=True)
 
6
  import traceback
7
 
8
  # ------------------------------
9
+ # Helper: safe load joblib
10
  # ------------------------------
11
  def safe_load(path, name):
12
  try:
13
  obj = joblib.load(path)
14
  print(f"✅ {name} loaded from {path}")
15
  return obj
 
 
 
16
  except Exception as e:
17
+ print(f"❌ Error loading {name}: {e}")
18
  raise
19
 
20
+ # ------------------------------
21
+ # LOAD MODELS & PREPROCESSOR
22
+ # ------------------------------
23
+ print("Loading models...")
24
+ preprocessor = safe_load("preprocessor.pkl", "Preprocessor")
25
+ lr_model = safe_load("lr_model.pkl", "Linear Regression")
26
+ dt_model = safe_load("dt_model.pkl", "Decision Tree")
27
+ rf_model = safe_load("rf_model.pkl", "Random Forest")
 
28
 
29
  loaded_models = {
30
+ "Linear Regression": lr_model,
31
+ "Decision Tree": dt_model,
32
+ "Random Forest": rf_model
33
  }
34
 
35
+ # ------------------------------
36
+ # LOAD DATASET BENCHMARK
37
+ # ------------------------------
 
38
  try:
39
+ df_raw = pd.read_csv("job_salary_mean.csv")
40
+ df_benchmark = df_raw.rename(columns={
41
  "Judul Pekerjaan": "judul",
42
  "Perusahaan": "perusahaan",
43
  "Lokasi": "lokasi",
44
  "Gaji_Rata2": "gaji"
45
  })
46
+ df_benchmark["judul_clean"] = df_benchmark["judul"].astype(str).str.lower()
47
+ df_benchmark["lokasi_clean"] = df_benchmark["lokasi"].astype(str).str.lower()
48
+ df_benchmark = df_benchmark.dropna(subset=["judul_clean", "lokasi_clean", "gaji"])
49
+ print(f"✅ Benchmark loaded: {len(df_benchmark)} rows")
50
+ except:
51
+ print("❌ job_salary_mean.csv not found")
52
+ df_benchmark = pd.DataFrame(columns=["judul_clean", "lokasi_clean", "gaji"])
53
+
54
+ # ------------------------------
55
+ # LOAD WILAYAH
56
+ # ------------------------------
57
  try:
58
+ geo = pd.read_csv("dataset kabupaten indonesia.csv")
59
+ geo = geo[["name", "Unnamed: 3"]].rename(columns={
60
+ "name": "kota",
61
+ "Unnamed: 3": "provinsi"
62
+ })
63
+ geo["kota_clean"] = geo["kota"].astype(str).str.lower().str.replace("kota ", "").replace("kabupaten ", "")
64
+ geo["provinsi"] = geo["provinsi"].astype(str).str.upper()
65
+ MASTER_WILAYAH = pd.Series(geo.provinsi.values, index=geo.kota_clean).to_dict()
66
+ print(f"✅ Loaded {len(MASTER_WILAYAH)} wilayah")
67
+ except:
68
+ print("⚠ dataset kabupaten indonesia.csv tidak ada")
69
+ MASTER_WILAYAH = {}
70
+
71
+ # ------------------------------
72
+ # WILAYAH FUNCTIONS
73
+ # ------------------------------
74
+ def get_pulau_from_provinsi(p):
75
+ p = p.upper()
76
+ if any(x in p for x in ["JAWA", "DKI", "BANTEN"]): return "PULAU JAWA"
77
+ if any(x in p for x in ["SUMATERA", "ACEH", "RIAU"]): return "PULAU SUMATERA"
78
+ if "KALIMANTAN" in p: return "PULAU KALIMANTAN"
79
+ if "SULAWESI" in p: return "PULAU SULAWESI"
80
+ if any(x in p for x in ["BALI", "NUSA"]): return "BALI & NUSA TENGGARA"
81
+ if any(x in p for x in ["PAPUA", "MALUKU"]): return "PAPUA & MALUKU"
82
+ return "INDONESIA"
83
+
84
+ def deteksi_wilayah(text):
85
+ txt = str(text).lower()
86
+ for kota, prov in MASTER_WILAYAH.items():
87
+ if kota in txt:
88
+ return prov, get_pulau_from_provinsi(prov)
89
+ return "INDONESIA", "INDONESIA"
90
+
91
+ # ------------------------------
92
+ # PREDIKSI + BENCHMARK
93
+ # ------------------------------
94
+ def analisis_gaji_final(judul, lokasi, model_choice):
95
  try:
96
+ if not judul or not lokasi:
97
+ return "<b style='color:red;'>Mohon masukkan posisi dan lokasi.</b>", None
98
+
99
+ model = loaded_models.get(model_choice)
100
+
101
+ df_input = pd.DataFrame({
102
+ "judul_clean": [judul.lower()],
103
+ "lokasi_clean": [lokasi.lower()],
104
+ "perusahaan": ["unknown"]
 
 
 
105
  })
106
 
 
107
  try:
108
+ pred = float(model.predict(df_input)[0])
109
+ pred = max(0, pred)
110
  except Exception as e:
111
+ return f"<b>Gagal memprediksi:</b> {e}", None
112
+
113
+ # Benchmark job
114
+ job_match = df_benchmark[df_benchmark["judul_clean"].str.contains(judul.lower(), na=False)]
115
+ max_job = float(job_match["gaji"].max()) if not job_match.empty else pred * 1.3
116
+
117
+ # Benchmark location
118
+ provinsi, pulau = deteksi_wilayah(lokasi)
119
+ region_match = df_benchmark[df_benchmark["lokasi_clean"].str.contains(pulau.split()[-1].lower(), na=False)]
120
+ max_reg = float(region_match["gaji"].max()) if not region_match.empty else pred * 1.6
121
+
122
+ # Graph
123
+ fig, ax = plt.subplots(figsize=(8,4))
124
+ labels = ["Prediksi Anda", "Max Nasional", "Max Regional"]
125
+ values = [pred, max_job, max_reg]
126
+ ax.bar(labels, values)
127
+ ax.set_title(f"Analisis Gaji: {judul} ({provinsi})")
128
+ ax.set_ylabel("Rp")
129
+
130
+ # HTML clean
131
+ html = f"""
132
+ <div style='padding:14px; border-radius:10px; background:#f8fafc'>
133
+ <h3>💰 Estimasi Gaji: Rp {pred:,.0f}</h3>
134
+ <p>📍 Lokasi terdeteksi: <b>{provinsi}</b> — {pulau}</p>
135
+ <p>Max Nasional posisi ini: <b>Rp {max_job:,.0f}</b></p>
136
+ <p>Max Regional: <b>Rp {max_reg:,.0f}</b></p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  </div>
138
  """
139
+
140
+ return html, fig
141
 
142
  except Exception as e:
143
+ return f"<b>Error:</b> {e}", None
144
+
145
+ # ------------------------------
146
+ # CLEAN UI (NEW GRADIO FORMAT)
147
+ # ------------------------------
148
+
149
+ css = """
150
+ .gradio-container {max-width: 1000px !important; margin:auto;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  """
152
 
153
+ with gr.Blocks(title="Salary AI", css=css) as demo:
154
+
155
+ gr.Markdown("<h1 style='text-align:center;'>💼 Salary AI</h1>")
156
+ gr.Markdown("<p style='text-align:center; color:gray;'>Prediksi gaji dengan Machine Learning + Benchmark Indonesia.</p>")
157
+
158
+ with gr.Row():
159
+ with gr.Column():
160
+ t1 = gr.Textbox(label="Posisi Pekerjaan")
161
+ t2 = gr.Textbox(label="Kabupaten/Kota")
162
+ model = gr.Dropdown(
163
+ choices=list(loaded_models.keys()),
164
+ value="Random Forest",
165
+ label="Model Prediksi"
166
+ )
167
+ btn = gr.Button("🔍 Analisis", variant="primary")
168
+
169
+ with gr.Column():
170
+ out_html = gr.HTML()
171
+ out_plot = gr.Plot()
172
+
173
+ btn.click(analisis_gaji_final, inputs=[t1, t2, model], outputs=[out_html, out_plot])
174
 
175
  if __name__ == "__main__":
176
+ print("App running...")
177
  demo.launch(share=True, debug=True)