Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
# ============================================================
|
| 2 |
-
# IPLM Audit β HF Spaces (Gradio) β + LLM Analysis (Gemini
|
| 3 |
-
# -
|
| 4 |
-
# -
|
| 5 |
# ============================================================
|
| 6 |
|
| 7 |
import os
|
|
@@ -21,6 +21,7 @@ import pandas as pd
|
|
| 21 |
import gradio as gr
|
| 22 |
from PIL import Image
|
| 23 |
|
|
|
|
| 24 |
from scipy.stats import chisquare
|
| 25 |
from sklearn.preprocessing import StandardScaler
|
| 26 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
@@ -29,18 +30,20 @@ import matplotlib
|
|
| 29 |
matplotlib.use("Agg")
|
| 30 |
import matplotlib.pyplot as plt
|
| 31 |
|
| 32 |
-
import google.generativeai as genai
|
| 33 |
-
|
| 34 |
|
| 35 |
# ============================================================
|
| 36 |
# CONFIG
|
| 37 |
# ============================================================
|
| 38 |
DATA_PATH = os.getenv("IPLM_DATA_PATH", "IPLM_clean_manual_131225.xlsx")
|
| 39 |
|
| 40 |
-
# Gemini only
|
| 41 |
GEMINI_API_KEY = os.getenv("HF_TOKEN_DQ", "").strip()
|
| 42 |
GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
EXCLUDE_COLS_EXACT = {"kontak_wa", "npp", "tanggal_kirim", "updated_at", "created_at"}
|
| 45 |
|
| 46 |
BENFORD_P = np.array([math.log10(1 + 1/d) for d in range(1, 10)])
|
|
@@ -204,6 +207,51 @@ def scatter_plot(peer_agg, x_col, y_col):
|
|
| 204 |
return fig_to_pil(fig)
|
| 205 |
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
# ============================================================
|
| 208 |
# LOAD DATA (GLOBAL)
|
| 209 |
# ============================================================
|
|
@@ -237,7 +285,7 @@ PROVS = clean_str_list(df["_prov_str"].unique().tolist())
|
|
| 237 |
if not PROVS:
|
| 238 |
raise ValueError("Tidak ada nilai provinsi yang valid setelah cleaning.")
|
| 239 |
|
| 240 |
-
prov_cache_peer = {}
|
| 241 |
|
| 242 |
|
| 243 |
def kabs_for_prov(pv):
|
|
@@ -373,20 +421,12 @@ def audit(pv, kv, kw):
|
|
| 373 |
|
| 374 |
|
| 375 |
# ============================================================
|
| 376 |
-
# LLM (GEMINI
|
| 377 |
# ============================================================
|
| 378 |
-
def
|
| 379 |
if scorecard_df is None or len(scorecard_df) == 0:
|
| 380 |
return "β Scorecard kosong. Jalankan audit dulu."
|
| 381 |
|
| 382 |
-
if not GEMINI_API_KEY:
|
| 383 |
-
return (
|
| 384 |
-
"β οΈ **LLM belum aktif** karena `HF_TOKEN_DQ` belum diset.\n\n"
|
| 385 |
-
"Set di HuggingFace Space β Settings β Variables and secrets:\n"
|
| 386 |
-
"- `HF_TOKEN_DQ` = API key Gemini kamu\n"
|
| 387 |
-
"- opsional: `GEMINI_MODEL` (default: gemini-1.5-flash)\n"
|
| 388 |
-
)
|
| 389 |
-
|
| 390 |
payload = {
|
| 391 |
"provinsi": pv,
|
| 392 |
"kab_kota": kv,
|
|
@@ -411,14 +451,7 @@ def gemini_analyze_scorecard(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
|
|
| 411 |
f"DATA (JSON):\n{json.dumps(payload, ensure_ascii=False)}"
|
| 412 |
)
|
| 413 |
|
| 414 |
-
|
| 415 |
-
genai.configure(api_key=GEMINI_API_KEY)
|
| 416 |
-
model = genai.GenerativeModel(GEMINI_MODEL)
|
| 417 |
-
resp = model.generate_content(f"{system}\n\n{user}")
|
| 418 |
-
text = getattr(resp, "text", "")
|
| 419 |
-
return (text or "").strip() or "β Gemini tidak mengembalikan teks."
|
| 420 |
-
except Exception as e:
|
| 421 |
-
return f"β LLM error (Gemini): {e}"
|
| 422 |
|
| 423 |
|
| 424 |
# ============================================================
|
|
@@ -449,7 +482,7 @@ def run_audit(pv, kv, kw):
|
|
| 449 |
return narasi, scorecard, ben_tbl, ben_img, scat_img, sim_tbl, scorecard, ben_tbl, sim_tbl
|
| 450 |
|
| 451 |
def run_llm(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
|
| 452 |
-
return
|
| 453 |
|
| 454 |
|
| 455 |
pv0, kv0, kw0, kabs0, kews0 = ui_init()
|
|
@@ -461,7 +494,7 @@ with gr.Blocks(title="IPLM Audit β Kualitas Data & Indikasi Tidak Wajar", them
|
|
| 461 |
f"- EXCLUDE (no analysis): `{', '.join(sorted(EXCLUDE_COLS_EXACT))}`\n"
|
| 462 |
f"- prov_col = `{prov_col}` Β· kab_col = `{kab_col}` Β· kewenangan_col = `{kew_col if kew_col else 'TIDAK ADA'}`\n"
|
| 463 |
"---\n"
|
| 464 |
-
"**LLM Analysis (Gemini
|
| 465 |
)
|
| 466 |
|
| 467 |
with gr.Row():
|
|
@@ -489,7 +522,6 @@ with gr.Blocks(title="IPLM Audit β Kualitas Data & Indikasi Tidak Wajar", them
|
|
| 489 |
gr.Markdown("## Analisis Naratif (LLM β Gemini)")
|
| 490 |
out_llm = gr.Markdown()
|
| 491 |
|
| 492 |
-
# hidden states: store last tables for llm button
|
| 493 |
st_score = gr.State(pd.DataFrame())
|
| 494 |
st_ben = gr.State(pd.DataFrame())
|
| 495 |
st_sim = gr.State(pd.DataFrame())
|
|
|
|
| 1 |
# ============================================================
|
| 2 |
+
# IPLM Audit β HF Spaces (Gradio) β + LLM Analysis (Gemini REST)
|
| 3 |
+
# - NO google SDK (avoid grpc segfault / exit 139)
|
| 4 |
+
# - Gemini API key: HF_TOKEN_DQ
|
| 5 |
# ============================================================
|
| 6 |
|
| 7 |
import os
|
|
|
|
| 21 |
import gradio as gr
|
| 22 |
from PIL import Image
|
| 23 |
|
| 24 |
+
import requests
|
| 25 |
from scipy.stats import chisquare
|
| 26 |
from sklearn.preprocessing import StandardScaler
|
| 27 |
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
| 30 |
matplotlib.use("Agg")
|
| 31 |
import matplotlib.pyplot as plt
|
| 32 |
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# ============================================================
|
| 35 |
# CONFIG
|
| 36 |
# ============================================================
|
| 37 |
DATA_PATH = os.getenv("IPLM_DATA_PATH", "IPLM_clean_manual_131225.xlsx")
|
| 38 |
|
| 39 |
+
# Gemini only (REST)
|
| 40 |
GEMINI_API_KEY = os.getenv("HF_TOKEN_DQ", "").strip()
|
| 41 |
GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
|
| 42 |
|
| 43 |
+
# Gemini REST endpoint (GenerateContent)
|
| 44 |
+
# v1beta works broadly; if you use "v1" in your org, adjust accordingly.
|
| 45 |
+
GEMINI_BASE = os.getenv("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1beta")
|
| 46 |
+
|
| 47 |
EXCLUDE_COLS_EXACT = {"kontak_wa", "npp", "tanggal_kirim", "updated_at", "created_at"}
|
| 48 |
|
| 49 |
BENFORD_P = np.array([math.log10(1 + 1/d) for d in range(1, 10)])
|
|
|
|
| 207 |
return fig_to_pil(fig)
|
| 208 |
|
| 209 |
|
| 210 |
+
# ============================================================
|
| 211 |
+
# GEMINI REST CALL
|
| 212 |
+
# ============================================================
|
| 213 |
+
def gemini_generate(system_text: str, user_text: str, temperature=0.2, max_output_tokens=700) -> str:
|
| 214 |
+
if not GEMINI_API_KEY:
|
| 215 |
+
return (
|
| 216 |
+
"β οΈ **LLM belum aktif** karena `HF_TOKEN_DQ` belum diset.\n\n"
|
| 217 |
+
"Set di HuggingFace Space β Settings β Variables and secrets:\n"
|
| 218 |
+
"- `HF_TOKEN_DQ` = API key Gemini kamu\n"
|
| 219 |
+
"- opsional: `GEMINI_MODEL` (default: gemini-1.5-flash)\n"
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
url = f"{GEMINI_BASE}/models/{GEMINI_MODEL}:generateContent"
|
| 223 |
+
params = {"key": GEMINI_API_KEY}
|
| 224 |
+
|
| 225 |
+
# Gemini REST payload
|
| 226 |
+
body = {
|
| 227 |
+
"contents": [{
|
| 228 |
+
"role": "user",
|
| 229 |
+
"parts": [{"text": f"{system_text}\n\n{user_text}"}]
|
| 230 |
+
}],
|
| 231 |
+
"generationConfig": {
|
| 232 |
+
"temperature": float(temperature),
|
| 233 |
+
"maxOutputTokens": int(max_output_tokens),
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
try:
|
| 238 |
+
r = requests.post(url, params=params, json=body, timeout=60)
|
| 239 |
+
if r.status_code != 200:
|
| 240 |
+
return f"β Gemini REST error {r.status_code}: {r.text[:2000]}"
|
| 241 |
+
data = r.json()
|
| 242 |
+
|
| 243 |
+
# Extract text safely
|
| 244 |
+
candidates = data.get("candidates", [])
|
| 245 |
+
if not candidates:
|
| 246 |
+
return "β Gemini tidak mengembalikan candidates."
|
| 247 |
+
content = candidates[0].get("content", {})
|
| 248 |
+
parts = content.get("parts", [])
|
| 249 |
+
text = "".join([p.get("text", "") for p in parts]).strip()
|
| 250 |
+
return text or "β Gemini tidak mengembalikan teks."
|
| 251 |
+
except Exception as e:
|
| 252 |
+
return f"β Gemini REST exception: {e}"
|
| 253 |
+
|
| 254 |
+
|
| 255 |
# ============================================================
|
| 256 |
# LOAD DATA (GLOBAL)
|
| 257 |
# ============================================================
|
|
|
|
| 285 |
if not PROVS:
|
| 286 |
raise ValueError("Tidak ada nilai provinsi yang valid setelah cleaning.")
|
| 287 |
|
| 288 |
+
prov_cache_peer = {}
|
| 289 |
|
| 290 |
|
| 291 |
def kabs_for_prov(pv):
|
|
|
|
| 421 |
|
| 422 |
|
| 423 |
# ============================================================
|
| 424 |
+
# LLM ANALYSIS (GEMINI REST)
|
| 425 |
# ============================================================
|
| 426 |
+
def llm_analyze(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
|
| 427 |
if scorecard_df is None or len(scorecard_df) == 0:
|
| 428 |
return "β Scorecard kosong. Jalankan audit dulu."
|
| 429 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
payload = {
|
| 431 |
"provinsi": pv,
|
| 432 |
"kab_kota": kv,
|
|
|
|
| 451 |
f"DATA (JSON):\n{json.dumps(payload, ensure_ascii=False)}"
|
| 452 |
)
|
| 453 |
|
| 454 |
+
return gemini_generate(system, user, temperature=0.2, max_output_tokens=800)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
|
| 456 |
|
| 457 |
# ============================================================
|
|
|
|
| 482 |
return narasi, scorecard, ben_tbl, ben_img, scat_img, sim_tbl, scorecard, ben_tbl, sim_tbl
|
| 483 |
|
| 484 |
def run_llm(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
|
| 485 |
+
return llm_analyze(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl)
|
| 486 |
|
| 487 |
|
| 488 |
pv0, kv0, kw0, kabs0, kews0 = ui_init()
|
|
|
|
| 494 |
f"- EXCLUDE (no analysis): `{', '.join(sorted(EXCLUDE_COLS_EXACT))}`\n"
|
| 495 |
f"- prov_col = `{prov_col}` Β· kab_col = `{kab_col}` Β· kewenangan_col = `{kew_col if kew_col else 'TIDAK ADA'}`\n"
|
| 496 |
"---\n"
|
| 497 |
+
"**LLM Analysis (Gemini REST):** set `HF_TOKEN_DQ` di Space Variables."
|
| 498 |
)
|
| 499 |
|
| 500 |
with gr.Row():
|
|
|
|
| 522 |
gr.Markdown("## Analisis Naratif (LLM β Gemini)")
|
| 523 |
out_llm = gr.Markdown()
|
| 524 |
|
|
|
|
| 525 |
st_score = gr.State(pd.DataFrame())
|
| 526 |
st_ben = gr.State(pd.DataFrame())
|
| 527 |
st_sim = gr.State(pd.DataFrame())
|