# app.py import gradio as gr import matplotlib.pyplot as plt import pandas as pd import joblib import traceback # ------------------------------ # Helper: safe load joblib # ------------------------------ def safe_load(path, name): try: obj = joblib.load(path) print(f"✅ {name} loaded from {path}") return obj except Exception as e: print(f"❌ Error loading {name}: {e}") raise # ------------------------------ # LOAD MODELS & PREPROCESSOR # ------------------------------ print("Loading models...") preprocessor = safe_load("preprocessor.pkl", "Preprocessor") lr_model = safe_load("lr_model.pkl", "Linear Regression") dt_model = safe_load("dt_model.pkl", "Decision Tree") rf_model = safe_load("rf_model.pkl", "Random Forest") loaded_models = { "Linear Regression": lr_model, "Decision Tree": dt_model, "Random Forest": rf_model } # ------------------------------ # LOAD DATASET BENCHMARK # ------------------------------ try: df_raw = pd.read_csv("job_salary_mean.csv") df_benchmark = df_raw.rename(columns={ "Judul Pekerjaan": "judul", "Perusahaan": "perusahaan", "Lokasi": "lokasi", "Gaji_Rata2": "gaji" }) df_benchmark["judul_clean"] = df_benchmark["judul"].astype(str).str.lower() df_benchmark["lokasi_clean"] = df_benchmark["lokasi"].astype(str).str.lower() df_benchmark = df_benchmark.dropna(subset=["judul_clean", "lokasi_clean", "gaji"]) print(f"✅ Benchmark loaded: {len(df_benchmark)} rows") except: print("❌ job_salary_mean.csv not found") df_benchmark = pd.DataFrame(columns=["judul_clean", "lokasi_clean", "gaji"]) # ------------------------------ # LOAD WILAYAH # ------------------------------ try: geo = pd.read_csv("dataset kabupaten indonesia.csv") geo = geo[["name", "Unnamed: 3"]].rename(columns={ "name": "kota", "Unnamed: 3": "provinsi" }) geo["kota_clean"] = geo["kota"].astype(str).str.lower().str.replace("kota ", "").replace("kabupaten ", "") geo["provinsi"] = geo["provinsi"].astype(str).str.upper() MASTER_WILAYAH = pd.Series(geo.provinsi.values, index=geo.kota_clean).to_dict() print(f"✅ Loaded {len(MASTER_WILAYAH)} wilayah") except: print("⚠ dataset kabupaten indonesia.csv tidak ada") MASTER_WILAYAH = {} # ------------------------------ # WILAYAH FUNCTIONS # ------------------------------ def get_pulau_from_provinsi(p): p = p.upper() if any(x in p for x in ["JAWA", "DKI", "BANTEN"]): return "PULAU JAWA" if any(x in p for x in ["SUMATERA", "ACEH", "RIAU"]): return "PULAU SUMATERA" if "KALIMANTAN" in p: return "PULAU KALIMANTAN" if "SULAWESI" in p: return "PULAU SULAWESI" if any(x in p for x in ["BALI", "NUSA"]): return "BALI & NUSA TENGGARA" if any(x in p for x in ["PAPUA", "MALUKU"]): return "PAPUA & MALUKU" return "INDONESIA" def deteksi_wilayah(text): txt = str(text).lower() for kota, prov in MASTER_WILAYAH.items(): if kota in txt: return prov, get_pulau_from_provinsi(prov) return "INDONESIA", "INDONESIA" # ------------------------------ # PREDIKSI + BENCHMARK # ------------------------------ def analisis_gaji_final(judul, lokasi, model_choice): try: if not judul or not lokasi: return "Mohon masukkan posisi dan lokasi.", None model = loaded_models.get(model_choice) df_input = pd.DataFrame({ "judul_clean": [judul.lower()], "lokasi_clean": [lokasi.lower()], "perusahaan": ["unknown"] }) try: pred = float(model.predict(df_input)[0]) pred = max(0, pred) except Exception as e: return f"Gagal memprediksi: {e}", None # Benchmark job job_match = df_benchmark[df_benchmark["judul_clean"].str.contains(judul.lower(), na=False)] max_job = float(job_match["gaji"].max()) if not job_match.empty else pred * 1.3 # Benchmark location provinsi, pulau = deteksi_wilayah(lokasi) region_match = df_benchmark[df_benchmark["lokasi_clean"].str.contains(pulau.split()[-1].lower(), na=False)] max_reg = float(region_match["gaji"].max()) if not region_match.empty else pred * 1.6 # Graph fig, ax = plt.subplots(figsize=(8,4)) labels = ["Prediksi Anda", "Max Nasional", "Max Regional"] values = [pred, max_job, max_reg] ax.bar(labels, values) ax.set_title(f"Analisis Gaji: {judul} ({provinsi})") ax.set_ylabel("Rp") # HTML clean html = f"""
📍 Lokasi terdeteksi: {provinsi} — {pulau}
Max Nasional posisi ini: Rp {max_job:,.0f}
Max Regional: Rp {max_reg:,.0f}
Prediksi gaji dengan tampilan simple & elegan ala Gradio.
""") with gr.Row(): # LEFT — Input Form with gr.Column(scale=1, min_width=360): gr.HTML("