Mandr1 commited on
Commit
d93edae
Β·
verified Β·
1 Parent(s): b75e332

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -198
app.py CHANGED
@@ -4,11 +4,9 @@ import matplotlib.pyplot as plt
4
  import pandas as pd
5
  import joblib
6
  import traceback
7
- import numpy as np
8
- import textwrap
9
 
10
  # ------------------------------
11
- # Helper: safe load joblib with message
12
  # ------------------------------
13
  def safe_load(path, name):
14
  try:
@@ -39,75 +37,47 @@ loaded_models = {
39
  # ------------------------------
40
  try:
41
  df_raw = pd.read_csv("job_salary_mean.csv")
42
- # coba normalisasi nama kolom umum
43
  df_benchmark = df_raw.rename(columns={
44
  "Judul Pekerjaan": "judul",
45
  "Perusahaan": "perusahaan",
46
  "Lokasi": "lokasi",
47
- "Gaji_Rata2": "gaji",
48
- "tahun": "year",
49
- "tahun_posting": "year",
50
- "year": "year"
51
  })
52
- # pastikan kolom gaji numeric
53
- if "gaji" in df_benchmark.columns:
54
- df_benchmark["gaji"] = pd.to_numeric(df_benchmark["gaji"], errors="coerce")
55
- else:
56
- df_benchmark["gaji"] = np.nan
57
-
58
- df_benchmark["judul_clean"] = df_benchmark.get("judul", "").astype(str).str.lower()
59
- df_benchmark["lokasi_clean"] = df_benchmark.get("lokasi", "").astype(str).str.lower()
60
- # drop missing gaji for analytics (still allow prediction fallback)
61
- df_benchmark = df_benchmark.dropna(subset=["judul_clean", "lokasi_clean"])
62
  print(f"βœ… Benchmark loaded: {len(df_benchmark)} rows")
63
- except Exception as e:
64
- print("❌ job_salary_mean.csv not found or gagal dibaca:", e)
65
  df_benchmark = pd.DataFrame(columns=["judul_clean", "lokasi_clean", "gaji"])
66
 
67
  # ------------------------------
68
- # LOAD WILAYAH (uploaded file path)
69
  # ------------------------------
70
  try:
71
- # path file yang kamu upload (sesuai session)
72
- geo_path = "/mnt/data/dataset kabupaten indonesia.csv"
73
- geo = pd.read_csv(geo_path)
74
- # menyesuaikan nama kolom dari file upload: ambil kolom 'name' dan 'Unnamed: 3' seperti sebelumnya
75
- if "name" in geo.columns and "Unnamed: 3" in geo.columns:
76
- geo = geo[["name", "Unnamed: 3"]].rename(columns={
77
- "name": "kota",
78
- "Unnamed: 3": "provinsi"
79
- })
80
- else:
81
- # fallback: coba cari kolom serupa
82
- possible_kota = [c for c in geo.columns if "kota" in c.lower() or "name" in c.lower()]
83
- possible_prov = [c for c in geo.columns if "provinsi" in c.lower() or "prov" in c.lower()]
84
- if possible_kota and possible_prov:
85
- geo = geo[[possible_kota[0], possible_prov[0]]].rename(columns={
86
- possible_kota[0]: "kota",
87
- possible_prov[0]: "provinsi"
88
- })
89
- else:
90
- raise ValueError("Format dataset kabupaten tidak dikenali")
91
-
92
- # bersihkan string
93
- geo["kota_clean"] = geo["kota"].astype(str).str.lower().str.replace("kota ", "").str.replace("kabupaten ", "")
94
  geo["provinsi"] = geo["provinsi"].astype(str).str.upper()
95
  MASTER_WILAYAH = pd.Series(geo.provinsi.values, index=geo.kota_clean).to_dict()
96
- print(f"βœ… Loaded {len(MASTER_WILAYAH)} wilayah from {geo_path}")
97
- except Exception as e:
98
- print("⚠ dataset kabupaten indonesia.csv tidak ada atau gagal dibaca:", e)
99
  MASTER_WILAYAH = {}
100
 
101
  # ------------------------------
102
  # WILAYAH FUNCTIONS
103
  # ------------------------------
104
  def get_pulau_from_provinsi(p):
105
- p = str(p).upper()
106
  if any(x in p for x in ["JAWA", "DKI", "BANTEN"]): return "PULAU JAWA"
107
- if any(x in p for x in ["SUMATERA", "ACEH", "RIAU", "LAMPUNG", "BENGKULU", "KEPULAUAN RIAU", "SUMATERA UTARA", "SUMATERA SELATAN", "SUMATERA BARAT"]): return "PULAU SUMATERA"
108
  if "KALIMANTAN" in p: return "PULAU KALIMANTAN"
109
  if "SULAWESI" in p: return "PULAU SULAWESI"
110
- if any(x in p for x in ["BALI", "NUSA", "NTB", "NTT"]): return "BALI & NUSA TENGGARA"
111
  if any(x in p for x in ["PAPUA", "MALUKU"]): return "PAPUA & MALUKU"
112
  return "INDONESIA"
113
 
@@ -119,159 +89,61 @@ def deteksi_wilayah(text):
119
  return "INDONESIA", "INDONESIA"
120
 
121
  # ------------------------------
122
- # PLOTTING HELPERS
123
- # ------------------------------
124
- def plot_trend_nasional(df):
125
- fig, ax = plt.subplots(figsize=(8,3.5))
126
- # jika ada kolom year gunakan line chart median per year
127
- if "year" in df.columns:
128
- try:
129
- trend = df.dropna(subset=["year", "gaji"]).groupby("year")["gaji"].median().sort_index()
130
- if len(trend) > 1:
131
- ax.plot(trend.index.astype(str), trend.values, marker="o")
132
- ax.set_title("Tren Gaji Nasional (median per tahun)")
133
- ax.set_xlabel("Tahun")
134
- ax.set_ylabel("Rp (median)")
135
- ax.grid(alpha=0.25)
136
- else:
137
- # fallback: histogram
138
- ax.hist(df["gaji"].dropna(), bins=40)
139
- ax.set_title("Distribusi Gaji Nasional (tahun tidak cukup)")
140
- ax.set_xlabel("Rp")
141
- except Exception:
142
- ax.hist(df["gaji"].dropna(), bins=40)
143
- ax.set_title("Distribusi Gaji Nasional")
144
- ax.set_xlabel("Rp")
145
- else:
146
- # jika tidak ada kolom year, tampilkan histogram distribusi gaji
147
- ax.hist(df["gaji"].dropna(), bins=40)
148
- ax.set_title("Distribusi Gaji Nasional (tidak ada data tahun)")
149
- ax.set_xlabel("Rp")
150
- plt.tight_layout()
151
- return fig
152
-
153
- def plot_boxplot_per_pulau(df):
154
- fig, ax = plt.subplots(figsize=(8,3.5))
155
- # gabungkan provinsi via MASTER_WILAYAH mapping
156
- df_local = df.copy()
157
- def map_prov_from_lokasi(x):
158
- prov, pulau = deteksi_wilayah(x)
159
- return pulau
160
- df_local["pulau"] = df_local["lokasi_clean"].apply(map_prov_from_lokasi)
161
- # only keep entries with numeric gaji
162
- df_local = df_local.dropna(subset=["gaji"])
163
- groups = df_local.groupby("pulau")["gaji"].apply(list)
164
- # siapkan data dan label
165
- data = []
166
- labels = []
167
- for pulau, arr in groups.items():
168
- data.append(arr)
169
- labels.append(pulau)
170
- if len(data) == 0:
171
- ax.text(0.5, 0.5, "Tidak ada data gaji untuk boxplot", ha="center")
172
- else:
173
- ax.boxplot(data, labels=labels, vert=False, patch_artist=True)
174
- ax.set_title("Boxplot Gaji per Pulau")
175
- ax.set_xlabel("Rp")
176
- plt.tight_layout()
177
- return fig
178
-
179
- def plot_heatmap_provinsi(df):
180
- # rata-rata gaji per provinsi
181
- fig, ax = plt.subplots(figsize=(6,8))
182
- df_local = df.copy()
183
- # coba mapping provinsi dari lokasi
184
- def map_prov(x):
185
- prov, pulau = deteksi_wilayah(x)
186
- return prov
187
- df_local["provinsi_mapped"] = df_local["lokasi_clean"].apply(map_prov)
188
- agg = df_local.dropna(subset=["gaji"]).groupby("provinsi_mapped")["gaji"].mean().sort_values(ascending=False)
189
- if agg.empty:
190
- ax.text(0.5, 0.5, "Tidak ada data untuk heatmap provinsi", ha="center")
191
- ax.axis("off")
192
- return fig
193
- # untuk tampilkan heatmap simpel: gunakan bar horizontal dengan colormap, disusun ke 2 kolom agar mirip heatmap
194
- provs = agg.index.tolist()
195
- values = agg.values
196
- # normalisasi untuk colormap
197
- norm = plt.Normalize(vmin=min(values), vmax=max(values))
198
- cmap = plt.cm.viridis
199
- colors = cmap(norm(values))
200
- y = np.arange(len(provs))
201
- ax.barh(y, values, color=colors)
202
- ax.set_yticks(y)
203
- ax.set_yticklabels(provs, fontsize=8)
204
- ax.invert_yaxis()
205
- ax.set_title("Rata-rata Gaji per Provinsi (Rp)")
206
- plt.tight_layout()
207
- return fig
208
-
209
- # ------------------------------
210
- # PREDIKSI + BENCHMARK + GRAFIK
211
  # ------------------------------
212
  def analisis_gaji_final(judul, lokasi, model_choice):
213
  try:
214
  if not judul or not lokasi:
215
- return "<b style='color:red;'>Mohon masukkan posisi dan lokasi.</b>", None, None, None
216
 
217
  model = loaded_models.get(model_choice)
218
 
219
  df_input = pd.DataFrame({
220
- "judul_clean": [str(judul).lower()],
221
- "lokasi_clean": [str(lokasi).lower()],
222
  "perusahaan": ["unknown"]
223
  })
224
 
225
  try:
226
- # jika model membutuhkan preprocessor, apply jika tersedia
227
- try:
228
- X_in = preprocessor.transform(df_input)
229
- except Exception:
230
- X_in = df_input
231
- pred = float(model.predict(df_input if X_in is None else df_input)[0])
232
  pred = max(0, pred)
233
  except Exception as e:
234
- # fallback: jika model gagal, beri pesan
235
- return f"<b>Gagal memprediksi:</b> {e}", None, None, None
236
 
237
  # Benchmark job
238
- job_match = df_benchmark[df_benchmark["judul_clean"].str.contains(str(judul).lower(), na=False)]
239
- max_job = float(job_match["gaji"].max()) if not job_match.empty and job_match["gaji"].notna().any() else pred * 1.3
240
 
241
  # Benchmark location
242
  provinsi, pulau = deteksi_wilayah(lokasi)
243
- # cari regional dengan mencocokkan nama provinsi atau nama pulau (simple contains)
244
- region_match = df_benchmark[df_benchmark["lokasi_clean"].str.contains(provinsi.lower(), na=False)]
245
- if region_match.empty:
246
- # coba cari berdasarkan pulau (ambil kata terakhir dari string pulau => mis. 'PULAU JAWA' -> 'JAWA')
247
- region_term = pulau.split()[-1].lower() if pulau != "INDONESIA" else ""
248
- if region_term:
249
- region_match = df_benchmark[df_benchmark["lokasi_clean"].str.contains(region_term, na=False)]
250
- max_reg = float(region_match["gaji"].max()) if not region_match.empty and region_match["gaji"].notna().any() else pred * 1.6
251
-
252
- # Build HTML result
 
 
253
  html = f"""
254
  <div style='padding:14px; border-radius:10px; background:#f8fafc'>
255
- <h3>πŸ’° Estimasi Gaji: <b>Rp {pred:,.0f}</b></h3>
256
  <p>πŸ“ Lokasi terdeteksi: <b>{provinsi}</b> β€” {pulau}</p>
257
  <p>Max Nasional posisi ini: <b>Rp {max_job:,.0f}</b></p>
258
  <p>Max Regional: <b>Rp {max_reg:,.0f}</b></p>
259
  </div>
260
  """
261
 
262
- # Build charts (gunakan seluruh df_benchmark untuk analytics)
263
- fig_trend = plot_trend_nasional(df_benchmark)
264
- fig_box = plot_boxplot_per_pulau(df_benchmark)
265
- fig_heat = plot_heatmap_provinsi(df_benchmark)
266
-
267
- return html, fig_trend, fig_box, fig_heat
268
 
269
  except Exception as e:
270
- tb = traceback.format_exc()
271
- return f"<b>Error:</b> {e}<pre>{tb}</pre>", None, None, None
272
 
273
  # ------------------------------
274
- # CLEAN UI (DASHBOARD STYLE) - FIXED + ANALYTICS
275
  # ------------------------------
276
 
277
  custom_css = """
@@ -282,25 +154,27 @@ custom_css = """
282
  border-radius: 12px;
283
  box-shadow: 0 2px 10px rgba(0,0,0,0.06);
284
  }
285
- .gradio-container {max-width: 1200px !important; margin:auto;}
286
- .small-muted {color: #6b7280; font-size: 0.95rem;}
287
- .card-title {font-weight:600; margin-bottom:8px;}
288
  </style>
289
  """
290
 
291
- with gr.Blocks(title="Salary AI Dashboard") as demo:
292
 
293
  # Inject CSS
294
  gr.HTML(custom_css)
295
 
296
- gr.Markdown("<h1 style='text-align:center;'>πŸ’Ό Salary AI Dashboard</h1>")
297
- gr.Markdown("<p style='text-align:center; color:gray;'>Prediksi gaji + Benchmark Nasional & Regional Indonesia.</p>")
 
 
 
 
298
 
299
  with gr.Row():
 
300
  # LEFT PANEL β€” INPUT FORM
301
  with gr.Column(scale=1):
302
- gr.HTML("<div class='dashboard-box'>")
303
- gr.Markdown("### πŸ“₯ Input Data")
304
  t1 = gr.Textbox(label="Posisi Pekerjaan", placeholder="cth: Data Analyst")
305
  t2 = gr.Textbox(label="Kabupaten/Kota", placeholder="cth: Bandung")
306
  model = gr.Dropdown(
@@ -311,32 +185,19 @@ with gr.Blocks(title="Salary AI Dashboard") as demo:
311
  btn = gr.Button("πŸ” Analisis Gaji", variant="primary")
312
  gr.HTML("</div>")
313
 
314
- # quick info card
315
- with gr.Row():
316
- with gr.Column():
317
- gr.HTML("<div class='dashboard-box'>")
318
- gr.Markdown("#### ℹ️ Info Dataset")
319
- gr.Markdown(f"- Rows benchmark: **{len(df_benchmark)}**")
320
- gr.Markdown(f"- Wilayah terdaftar: **{len(MASTER_WILAYAH)}**")
321
- gr.HTML("</div>")
322
-
323
- # RIGHT PANEL β€” OUTPUT & ANALYTICS
324
  with gr.Column(scale=2):
325
- gr.HTML("<div class='dashboard-box'>")
326
- gr.Markdown("### πŸ“Š Hasil Analisis")
327
  out_html = gr.HTML()
328
- gr.Markdown("#### Grafik Analitik")
329
- out_trend = gr.Plot()
330
- out_box = gr.Plot()
331
- out_heat = gr.Plot()
332
  gr.HTML("</div>")
333
 
334
  btn.click(
335
  analisis_gaji_final,
336
  inputs=[t1, t2, model],
337
- outputs=[out_html, out_trend, out_box, out_heat]
338
  )
339
 
340
  if __name__ == "__main__":
341
  print("App running...")
342
- demo.launch(share=True, debug=True)
 
4
  import pandas as pd
5
  import joblib
6
  import traceback
 
 
7
 
8
  # ------------------------------
9
+ # Helper: safe load joblib
10
  # ------------------------------
11
  def safe_load(path, name):
12
  try:
 
37
  # ------------------------------
38
  try:
39
  df_raw = pd.read_csv("job_salary_mean.csv")
 
40
  df_benchmark = df_raw.rename(columns={
41
  "Judul Pekerjaan": "judul",
42
  "Perusahaan": "perusahaan",
43
  "Lokasi": "lokasi",
44
+ "Gaji_Rata2": "gaji"
 
 
 
45
  })
46
+ df_benchmark["judul_clean"] = df_benchmark["judul"].astype(str).str.lower()
47
+ df_benchmark["lokasi_clean"] = df_benchmark["lokasi"].astype(str).str.lower()
48
+ df_benchmark = df_benchmark.dropna(subset=["judul_clean", "lokasi_clean", "gaji"])
 
 
 
 
 
 
 
49
  print(f"βœ… Benchmark loaded: {len(df_benchmark)} rows")
50
+ except:
51
+ print("❌ job_salary_mean.csv not found")
52
  df_benchmark = pd.DataFrame(columns=["judul_clean", "lokasi_clean", "gaji"])
53
 
54
  # ------------------------------
55
+ # LOAD WILAYAH
56
  # ------------------------------
57
  try:
58
+ geo = pd.read_csv("dataset kabupaten indonesia.csv")
59
+ geo = geo[["name", "Unnamed: 3"]].rename(columns={
60
+ "name": "kota",
61
+ "Unnamed: 3": "provinsi"
62
+ })
63
+ geo["kota_clean"] = geo["kota"].astype(str).str.lower().str.replace("kota ", "").replace("kabupaten ", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  geo["provinsi"] = geo["provinsi"].astype(str).str.upper()
65
  MASTER_WILAYAH = pd.Series(geo.provinsi.values, index=geo.kota_clean).to_dict()
66
+ print(f"βœ… Loaded {len(MASTER_WILAYAH)} wilayah")
67
+ except:
68
+ print("⚠ dataset kabupaten indonesia.csv tidak ada")
69
  MASTER_WILAYAH = {}
70
 
71
  # ------------------------------
72
  # WILAYAH FUNCTIONS
73
  # ------------------------------
74
  def get_pulau_from_provinsi(p):
75
+ p = p.upper()
76
  if any(x in p for x in ["JAWA", "DKI", "BANTEN"]): return "PULAU JAWA"
77
+ if any(x in p for x in ["SUMATERA", "ACEH", "RIAU"]): return "PULAU SUMATERA"
78
  if "KALIMANTAN" in p: return "PULAU KALIMANTAN"
79
  if "SULAWESI" in p: return "PULAU SULAWESI"
80
+ if any(x in p for x in ["BALI", "NUSA"]): return "BALI & NUSA TENGGARA"
81
  if any(x in p for x in ["PAPUA", "MALUKU"]): return "PAPUA & MALUKU"
82
  return "INDONESIA"
83
 
 
89
  return "INDONESIA", "INDONESIA"
90
 
91
  # ------------------------------
92
+ # PREDIKSI + BENCHMARK
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  # ------------------------------
94
  def analisis_gaji_final(judul, lokasi, model_choice):
95
  try:
96
  if not judul or not lokasi:
97
+ return "<b style='color:red;'>Mohon masukkan posisi dan lokasi.</b>", None
98
 
99
  model = loaded_models.get(model_choice)
100
 
101
  df_input = pd.DataFrame({
102
+ "judul_clean": [judul.lower()],
103
+ "lokasi_clean": [lokasi.lower()],
104
  "perusahaan": ["unknown"]
105
  })
106
 
107
  try:
108
+ pred = float(model.predict(df_input)[0])
 
 
 
 
 
109
  pred = max(0, pred)
110
  except Exception as e:
111
+ return f"<b>Gagal memprediksi:</b> {e}", None
 
112
 
113
  # Benchmark job
114
+ job_match = df_benchmark[df_benchmark["judul_clean"].str.contains(judul.lower(), na=False)]
115
+ max_job = float(job_match["gaji"].max()) if not job_match.empty else pred * 1.3
116
 
117
  # Benchmark location
118
  provinsi, pulau = deteksi_wilayah(lokasi)
119
+ region_match = df_benchmark[df_benchmark["lokasi_clean"].str.contains(pulau.split()[-1].lower(), na=False)]
120
+ max_reg = float(region_match["gaji"].max()) if not region_match.empty else pred * 1.6
121
+
122
+ # Graph
123
+ fig, ax = plt.subplots(figsize=(8,4))
124
+ labels = ["Prediksi Anda", "Max Nasional", "Max Regional"]
125
+ values = [pred, max_job, max_reg]
126
+ ax.bar(labels, values)
127
+ ax.set_title(f"Analisis Gaji: {judul} ({provinsi})")
128
+ ax.set_ylabel("Rp")
129
+
130
+ # HTML clean
131
  html = f"""
132
  <div style='padding:14px; border-radius:10px; background:#f8fafc'>
133
+ <h3>πŸ’° Estimasi Gaji: Rp {pred:,.0f}</h3>
134
  <p>πŸ“ Lokasi terdeteksi: <b>{provinsi}</b> β€” {pulau}</p>
135
  <p>Max Nasional posisi ini: <b>Rp {max_job:,.0f}</b></p>
136
  <p>Max Regional: <b>Rp {max_reg:,.0f}</b></p>
137
  </div>
138
  """
139
 
140
+ return html, fig
 
 
 
 
 
141
 
142
  except Exception as e:
143
+ return f"<b>Error:</b> {e}", None
 
144
 
145
  # ------------------------------
146
+ # CLEAN UI (DASHBOARD STYLE) - FIXED
147
  # ------------------------------
148
 
149
  custom_css = """
 
154
  border-radius: 12px;
155
  box-shadow: 0 2px 10px rgba(0,0,0,0.06);
156
  }
157
+ .gradio-container {max-width: 1100px !important; margin:auto;}
 
 
158
  </style>
159
  """
160
 
161
+ with gr.Blocks(title="Salary AI") as demo:
162
 
163
  # Inject CSS
164
  gr.HTML(custom_css)
165
 
166
+ gr.Markdown("""
167
+ <h1 style='text-align:center;'>πŸ’Ό Salary AI Dashboard</h1>
168
+ <p style='text-align:center; color:gray;'>
169
+ Prediksi gaji + Benchmark Nasional & Regional Indonesia.
170
+ </p>
171
+ """)
172
 
173
  with gr.Row():
174
+
175
  # LEFT PANEL β€” INPUT FORM
176
  with gr.Column(scale=1):
177
+ gr.HTML("<div class='dashboard-box'><h3>πŸ“₯ Input Data</h3>")
 
178
  t1 = gr.Textbox(label="Posisi Pekerjaan", placeholder="cth: Data Analyst")
179
  t2 = gr.Textbox(label="Kabupaten/Kota", placeholder="cth: Bandung")
180
  model = gr.Dropdown(
 
185
  btn = gr.Button("πŸ” Analisis Gaji", variant="primary")
186
  gr.HTML("</div>")
187
 
188
+ # RIGHT PANEL β€” OUTPUT
 
 
 
 
 
 
 
 
 
189
  with gr.Column(scale=2):
190
+ gr.HTML("<div class='dashboard-box'><h3>πŸ“Š Hasil Analisis</h3>")
 
191
  out_html = gr.HTML()
192
+ out_plot = gr.Plot()
 
 
 
193
  gr.HTML("</div>")
194
 
195
  btn.click(
196
  analisis_gaji_final,
197
  inputs=[t1, t2, model],
198
+ outputs=[out_html, out_plot]
199
  )
200
 
201
  if __name__ == "__main__":
202
  print("App running...")
203
+ demo.launch(share=True, debug=True)