File size: 7,918 Bytes
bea9034
ffeba1e
 
 
 
bea9034
ffeba1e
bea9034
d93edae
bea9034
 
 
 
 
 
 
c15ef70
bea9034
ffeba1e
c15ef70
 
 
 
 
 
 
 
bea9034
 
c15ef70
 
 
bea9034
 
c15ef70
 
 
ffeba1e
c15ef70
 
ffeba1e
 
 
d93edae
ffeba1e
d93edae
 
 
c15ef70
d93edae
 
c15ef70
 
 
d93edae
c15ef70
ffeba1e
d93edae
 
 
 
 
 
c15ef70
 
d93edae
 
 
c15ef70
 
 
 
 
 
d93edae
c15ef70
d93edae
c15ef70
 
d93edae
c15ef70
 
 
 
 
 
 
 
 
 
 
d93edae
c15ef70
 
ffeba1e
c15ef70
d93edae
c15ef70
 
 
 
d93edae
 
c15ef70
bea9034
 
 
d93edae
c15ef70
bea9034
d93edae
c15ef70
 
d93edae
 
c15ef70
 
 
d93edae
 
 
 
 
 
 
 
 
 
 
 
c15ef70
 
d93edae
c15ef70
 
 
ffeba1e
bea9034
c15ef70
d93edae
ffeba1e
bea9034
d93edae
c15ef70
0e5bf1d
 
 
499d403
 
 
c15ef70
3afd8ea
 
499d403
 
a747952
499d403
a747952
499d403
a747952
 
 
499d403
3afd8ea
499d403
 
 
 
 
a747952
 
499d403
a747952
 
499d403
 
a747952
 
 
499d403
a747952
 
499d403
 
 
 
 
 
a747952
 
 
499d403
 
a747952
 
 
499d403
 
 
 
 
 
 
 
 
3afd8ea
 
bea9034
 
499d403
3afd8ea
499d403
49721a8
499d403
49721a8
 
499d403
49721a8
499d403
 
 
49721a8
 
c15ef70
 
d93edae
499d403
 
 
49721a8
 
 
0c20dcd
49721a8
 
 
 
 
c15ef70
499d403
3afd8ea
499d403
49721a8
 
 
 
499d403
0c20dcd
 
 
 
 
ffeba1e
bea9034
499d403
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# app.py
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
import joblib
import traceback

# ------------------------------
# Helper: safe load joblib
# ------------------------------
def safe_load(path, name):
    try:
        obj = joblib.load(path)
        print(f"βœ… {name} loaded from {path}")
        return obj
    except Exception as e:
        print(f"❌ Error loading {name}: {e}")
        raise

# ------------------------------
# LOAD MODELS & PREPROCESSOR
# ------------------------------
print("Loading models...")
preprocessor = safe_load("preprocessor.pkl", "Preprocessor")
lr_model = safe_load("lr_model.pkl", "Linear Regression")
dt_model = safe_load("dt_model.pkl", "Decision Tree")
rf_model = safe_load("rf_model.pkl", "Random Forest")

loaded_models = {
    "Linear Regression": lr_model,
    "Decision Tree": dt_model,
    "Random Forest": rf_model
}

# ------------------------------
# LOAD DATASET BENCHMARK
# ------------------------------
try:
    df_raw = pd.read_csv("job_salary_mean.csv")
    df_benchmark = df_raw.rename(columns={
        "Judul Pekerjaan": "judul",
        "Perusahaan": "perusahaan",
        "Lokasi": "lokasi",
        "Gaji_Rata2": "gaji"
    })
    df_benchmark["judul_clean"] = df_benchmark["judul"].astype(str).str.lower()
    df_benchmark["lokasi_clean"] = df_benchmark["lokasi"].astype(str).str.lower()
    df_benchmark = df_benchmark.dropna(subset=["judul_clean", "lokasi_clean", "gaji"])
    print(f"βœ… Benchmark loaded: {len(df_benchmark)} rows")
except:
    print("❌ job_salary_mean.csv not found")
    df_benchmark = pd.DataFrame(columns=["judul_clean", "lokasi_clean", "gaji"])

# ------------------------------
# LOAD WILAYAH
# ------------------------------
try:
    geo = pd.read_csv("dataset kabupaten indonesia.csv")
    geo = geo[["name", "Unnamed: 3"]].rename(columns={
        "name": "kota",
        "Unnamed: 3": "provinsi"
    })
    geo["kota_clean"] = geo["kota"].astype(str).str.lower().str.replace("kota ", "").replace("kabupaten ", "")
    geo["provinsi"] = geo["provinsi"].astype(str).str.upper()
    MASTER_WILAYAH = pd.Series(geo.provinsi.values, index=geo.kota_clean).to_dict()
    print(f"βœ… Loaded {len(MASTER_WILAYAH)} wilayah")
except:
    print("⚠ dataset kabupaten indonesia.csv tidak ada")
    MASTER_WILAYAH = {}

# ------------------------------
# WILAYAH FUNCTIONS
# ------------------------------
def get_pulau_from_provinsi(p):
    p = p.upper()
    if any(x in p for x in ["JAWA", "DKI", "BANTEN"]): return "PULAU JAWA"
    if any(x in p for x in ["SUMATERA", "ACEH", "RIAU"]): return "PULAU SUMATERA"
    if "KALIMANTAN" in p: return "PULAU KALIMANTAN"
    if "SULAWESI" in p: return "PULAU SULAWESI"
    if any(x in p for x in ["BALI", "NUSA"]): return "BALI & NUSA TENGGARA"
    if any(x in p for x in ["PAPUA", "MALUKU"]): return "PAPUA & MALUKU"
    return "INDONESIA"

def deteksi_wilayah(text):
    txt = str(text).lower()
    for kota, prov in MASTER_WILAYAH.items():
        if kota in txt:
            return prov, get_pulau_from_provinsi(prov)
    return "INDONESIA", "INDONESIA"

# ------------------------------
# PREDIKSI + BENCHMARK
# ------------------------------
def analisis_gaji_final(judul, lokasi, model_choice):
    try:
        if not judul or not lokasi:
            return "<b style='color:red;'>Mohon masukkan posisi dan lokasi.</b>", None

        model = loaded_models.get(model_choice)

        df_input = pd.DataFrame({
            "judul_clean": [judul.lower()],
            "lokasi_clean": [lokasi.lower()],
            "perusahaan": ["unknown"]
        })

        try:
            pred = float(model.predict(df_input)[0])
            pred = max(0, pred)
        except Exception as e:
            return f"<b>Gagal memprediksi:</b> {e}", None

        # Benchmark job
        job_match = df_benchmark[df_benchmark["judul_clean"].str.contains(judul.lower(), na=False)]
        max_job = float(job_match["gaji"].max()) if not job_match.empty else pred * 1.3

        # Benchmark location
        provinsi, pulau = deteksi_wilayah(lokasi)
        region_match = df_benchmark[df_benchmark["lokasi_clean"].str.contains(pulau.split()[-1].lower(), na=False)]
        max_reg = float(region_match["gaji"].max()) if not region_match.empty else pred * 1.6

        # Graph
        fig, ax = plt.subplots(figsize=(8,4))
        labels = ["Prediksi Anda", "Max Nasional", "Max Regional"]
        values = [pred, max_job, max_reg]
        ax.bar(labels, values)
        ax.set_title(f"Analisis Gaji: {judul} ({provinsi})")
        ax.set_ylabel("Rp")

        # HTML clean
        html = f"""
        <div style='padding:14px; border-radius:10px; background:#f8fafc'>
            <h3>πŸ’° Estimasi Gaji: Rp {pred:,.0f}</h3>
            <p>πŸ“ Lokasi terdeteksi: <b>{provinsi}</b> β€” {pulau}</p>
            <p>Max Nasional posisi ini: <b>Rp {max_job:,.0f}</b></p>
            <p>Max Regional: <b>Rp {max_reg:,.0f}</b></p>
        </div>
        """

        return html, fig

    except Exception as e:
        return f"<b>Error:</b> {e}", None

import gradio as gr
# Pastikan Anda memiliki variabel loaded_models dan fungsi analisis_gaji_final yang terdefinisi

# -------------------------------------------
# ENHANCED DEFAULT GRADIO UI (CLEAN & MODERN)
# -------------------------------------------

custom_css = """
<style>

/* Maintain original Gradio feel */
.gradio-container { 
    max-width: 1150px !important; 
    margin: auto; 
    padding-top: 20px;
    font-family: 'Inter', sans-serif;
}

/* Subtle card styling */
.dashboard-box {
    background: white;
    padding: 18px 20px;
    border-radius: 12px;
    border: 1px solid #e5e7eb;
    box-shadow: 0px 1px 4px rgba(0,0,0,0.05);
}

/* Header */
h1 {
    font-weight: 700;
    margin-bottom: 6px;
    font-size: 32px;
}
h3 {
    font-weight: 600;
    margin-bottom: 12px;
}

/* Buttons β€” keep default look but nicer */
button.primary {
    background: #3b82f6 !important; /* Gradio blue, enhanced */
    color: white !important;
    border-radius: 8px !important;
    padding: 10px 14px !important;
    font-size: 15px !important;
}

/* Inputs enhance */
input, textarea, select, .gr-text-input {
    border-radius: 8px !important;
}

/* Row spacing */
.gr-row {
    gap: 16px !important;
}

/* Responsiveness */
@media (max-width: 768px) {
    .gradio-container {max-width: 95% !important;}
    h1 {font-size: 26px;}
}
</style>
"""

with gr.Blocks(title="Salary AI") as demo:

    # -------------------------------------------
    # Inject CSS
    # -------------------------------------------
    gr.HTML(custom_css)

    # Header
    gr.Markdown("""
    <h1 style='text-align:center;'>πŸ’Ό Salary AI Dashboard</h1>
    <p style='text-align:center; color:#555; font-size:15px;'>
        Prediksi gaji dengan tampilan simple & elegan ala Gradio.
    </p>
    """)

    with gr.Row():

        # LEFT β€” Input Form
        with gr.Column(scale=1, min_width=360):
            gr.HTML("<div class='dashboard-box'><h3>πŸ“₯ Input</h3>")
            t1 = gr.Textbox(label="Posisi Pekerjaan", placeholder="cth: Data Analyst")
            t2 = gr.Textbox(label="Kabupaten/Kota", placeholder="cth: Bandung")
            model = gr.Dropdown(
                choices=list(loaded_models.keys()),
                value="Random Forest",
                label="Model Prediksi"
            )
            btn = gr.Button("πŸ” Analisis Gaji", variant="primary")
            gr.HTML("</div>")

        # RIGHT β€” Output
        with gr.Column(scale=2):
            gr.HTML("<div class='dashboard-box'><h3>πŸ“Š Hasil</h3>")
            out_html = gr.HTML()
            out_plot = gr.Plot()
            gr.HTML("</div>")

    # Button Function
    btn.click(
        analisis_gaji_final,
        inputs=[t1, t2, model],
        outputs=[out_html, out_plot]
    )

if __name__ == "__main__":
    demo.launch(share=True)