irhamni commited on
Commit
ee60c68
Β·
verified Β·
1 Parent(s): ddfd629

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -28
app.py CHANGED
@@ -1,7 +1,7 @@
1
  # ============================================================
2
- # IPLM Audit β€” HF Spaces (Gradio) β€” + LLM Analysis (Gemini Only)
3
- # - Scorecard + Benford + Similarity + Scatter
4
- # - LLM narasi teknokratis untuk Scorecard (opsional)
5
  # ============================================================
6
 
7
  import os
@@ -21,6 +21,7 @@ import pandas as pd
21
  import gradio as gr
22
  from PIL import Image
23
 
 
24
  from scipy.stats import chisquare
25
  from sklearn.preprocessing import StandardScaler
26
  from sklearn.metrics.pairwise import cosine_similarity
@@ -29,18 +30,20 @@ import matplotlib
29
  matplotlib.use("Agg")
30
  import matplotlib.pyplot as plt
31
 
32
- import google.generativeai as genai
33
-
34
 
35
  # ============================================================
36
  # CONFIG
37
  # ============================================================
38
  DATA_PATH = os.getenv("IPLM_DATA_PATH", "IPLM_clean_manual_131225.xlsx")
39
 
40
- # Gemini only: API key kamu
41
  GEMINI_API_KEY = os.getenv("HF_TOKEN_DQ", "").strip()
42
  GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
43
 
 
 
 
 
44
  EXCLUDE_COLS_EXACT = {"kontak_wa", "npp", "tanggal_kirim", "updated_at", "created_at"}
45
 
46
  BENFORD_P = np.array([math.log10(1 + 1/d) for d in range(1, 10)])
@@ -204,6 +207,51 @@ def scatter_plot(peer_agg, x_col, y_col):
204
  return fig_to_pil(fig)
205
 
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  # ============================================================
208
  # LOAD DATA (GLOBAL)
209
  # ============================================================
@@ -237,7 +285,7 @@ PROVS = clean_str_list(df["_prov_str"].unique().tolist())
237
  if not PROVS:
238
  raise ValueError("Tidak ada nilai provinsi yang valid setelah cleaning.")
239
 
240
- prov_cache_peer = {} # cache peer per prov
241
 
242
 
243
  def kabs_for_prov(pv):
@@ -373,20 +421,12 @@ def audit(pv, kv, kw):
373
 
374
 
375
  # ============================================================
376
- # LLM (GEMINI ONLY)
377
  # ============================================================
378
- def gemini_analyze_scorecard(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
379
  if scorecard_df is None or len(scorecard_df) == 0:
380
  return "❌ Scorecard kosong. Jalankan audit dulu."
381
 
382
- if not GEMINI_API_KEY:
383
- return (
384
- "⚠️ **LLM belum aktif** karena `HF_TOKEN_DQ` belum diset.\n\n"
385
- "Set di HuggingFace Space β†’ Settings β†’ Variables and secrets:\n"
386
- "- `HF_TOKEN_DQ` = API key Gemini kamu\n"
387
- "- opsional: `GEMINI_MODEL` (default: gemini-1.5-flash)\n"
388
- )
389
-
390
  payload = {
391
  "provinsi": pv,
392
  "kab_kota": kv,
@@ -411,14 +451,7 @@ def gemini_analyze_scorecard(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
411
  f"DATA (JSON):\n{json.dumps(payload, ensure_ascii=False)}"
412
  )
413
 
414
- try:
415
- genai.configure(api_key=GEMINI_API_KEY)
416
- model = genai.GenerativeModel(GEMINI_MODEL)
417
- resp = model.generate_content(f"{system}\n\n{user}")
418
- text = getattr(resp, "text", "")
419
- return (text or "").strip() or "❌ Gemini tidak mengembalikan teks."
420
- except Exception as e:
421
- return f"❌ LLM error (Gemini): {e}"
422
 
423
 
424
  # ============================================================
@@ -449,7 +482,7 @@ def run_audit(pv, kv, kw):
449
  return narasi, scorecard, ben_tbl, ben_img, scat_img, sim_tbl, scorecard, ben_tbl, sim_tbl
450
 
451
  def run_llm(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
452
- return gemini_analyze_scorecard(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl)
453
 
454
 
455
  pv0, kv0, kw0, kabs0, kews0 = ui_init()
@@ -461,7 +494,7 @@ with gr.Blocks(title="IPLM Audit β€” Kualitas Data & Indikasi Tidak Wajar", them
461
  f"- EXCLUDE (no analysis): `{', '.join(sorted(EXCLUDE_COLS_EXACT))}`\n"
462
  f"- prov_col = `{prov_col}` Β· kab_col = `{kab_col}` Β· kewenangan_col = `{kew_col if kew_col else 'TIDAK ADA'}`\n"
463
  "---\n"
464
- "**LLM Analysis (Gemini, opsional):** set `HF_TOKEN_DQ` di Space Variables."
465
  )
466
 
467
  with gr.Row():
@@ -489,7 +522,6 @@ with gr.Blocks(title="IPLM Audit β€” Kualitas Data & Indikasi Tidak Wajar", them
489
  gr.Markdown("## Analisis Naratif (LLM β€” Gemini)")
490
  out_llm = gr.Markdown()
491
 
492
- # hidden states: store last tables for llm button
493
  st_score = gr.State(pd.DataFrame())
494
  st_ben = gr.State(pd.DataFrame())
495
  st_sim = gr.State(pd.DataFrame())
 
1
  # ============================================================
2
+ # IPLM Audit β€” HF Spaces (Gradio) β€” + LLM Analysis (Gemini REST)
3
+ # - NO google SDK (avoid grpc segfault / exit 139)
4
+ # - Gemini API key: HF_TOKEN_DQ
5
  # ============================================================
6
 
7
  import os
 
21
  import gradio as gr
22
  from PIL import Image
23
 
24
+ import requests
25
  from scipy.stats import chisquare
26
  from sklearn.preprocessing import StandardScaler
27
  from sklearn.metrics.pairwise import cosine_similarity
 
30
  matplotlib.use("Agg")
31
  import matplotlib.pyplot as plt
32
 
 
 
33
 
34
  # ============================================================
35
  # CONFIG
36
  # ============================================================
37
  DATA_PATH = os.getenv("IPLM_DATA_PATH", "IPLM_clean_manual_131225.xlsx")
38
 
39
+ # Gemini only (REST)
40
  GEMINI_API_KEY = os.getenv("HF_TOKEN_DQ", "").strip()
41
  GEMINI_MODEL = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
42
 
43
+ # Gemini REST endpoint (GenerateContent)
44
+ # v1beta works broadly; if you use "v1" in your org, adjust accordingly.
45
+ GEMINI_BASE = os.getenv("GEMINI_BASE_URL", "https://generativelanguage.googleapis.com/v1beta")
46
+
47
  EXCLUDE_COLS_EXACT = {"kontak_wa", "npp", "tanggal_kirim", "updated_at", "created_at"}
48
 
49
  BENFORD_P = np.array([math.log10(1 + 1/d) for d in range(1, 10)])
 
207
  return fig_to_pil(fig)
208
 
209
 
210
+ # ============================================================
211
+ # GEMINI REST CALL
212
+ # ============================================================
213
+ def gemini_generate(system_text: str, user_text: str, temperature=0.2, max_output_tokens=700) -> str:
214
+ if not GEMINI_API_KEY:
215
+ return (
216
+ "⚠️ **LLM belum aktif** karena `HF_TOKEN_DQ` belum diset.\n\n"
217
+ "Set di HuggingFace Space β†’ Settings β†’ Variables and secrets:\n"
218
+ "- `HF_TOKEN_DQ` = API key Gemini kamu\n"
219
+ "- opsional: `GEMINI_MODEL` (default: gemini-1.5-flash)\n"
220
+ )
221
+
222
+ url = f"{GEMINI_BASE}/models/{GEMINI_MODEL}:generateContent"
223
+ params = {"key": GEMINI_API_KEY}
224
+
225
+ # Gemini REST payload
226
+ body = {
227
+ "contents": [{
228
+ "role": "user",
229
+ "parts": [{"text": f"{system_text}\n\n{user_text}"}]
230
+ }],
231
+ "generationConfig": {
232
+ "temperature": float(temperature),
233
+ "maxOutputTokens": int(max_output_tokens),
234
+ }
235
+ }
236
+
237
+ try:
238
+ r = requests.post(url, params=params, json=body, timeout=60)
239
+ if r.status_code != 200:
240
+ return f"❌ Gemini REST error {r.status_code}: {r.text[:2000]}"
241
+ data = r.json()
242
+
243
+ # Extract text safely
244
+ candidates = data.get("candidates", [])
245
+ if not candidates:
246
+ return "❌ Gemini tidak mengembalikan candidates."
247
+ content = candidates[0].get("content", {})
248
+ parts = content.get("parts", [])
249
+ text = "".join([p.get("text", "") for p in parts]).strip()
250
+ return text or "❌ Gemini tidak mengembalikan teks."
251
+ except Exception as e:
252
+ return f"❌ Gemini REST exception: {e}"
253
+
254
+
255
  # ============================================================
256
  # LOAD DATA (GLOBAL)
257
  # ============================================================
 
285
  if not PROVS:
286
  raise ValueError("Tidak ada nilai provinsi yang valid setelah cleaning.")
287
 
288
+ prov_cache_peer = {}
289
 
290
 
291
  def kabs_for_prov(pv):
 
421
 
422
 
423
  # ============================================================
424
+ # LLM ANALYSIS (GEMINI REST)
425
  # ============================================================
426
+ def llm_analyze(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
427
  if scorecard_df is None or len(scorecard_df) == 0:
428
  return "❌ Scorecard kosong. Jalankan audit dulu."
429
 
 
 
 
 
 
 
 
 
430
  payload = {
431
  "provinsi": pv,
432
  "kab_kota": kv,
 
451
  f"DATA (JSON):\n{json.dumps(payload, ensure_ascii=False)}"
452
  )
453
 
454
+ return gemini_generate(system, user, temperature=0.2, max_output_tokens=800)
 
 
 
 
 
 
 
455
 
456
 
457
  # ============================================================
 
482
  return narasi, scorecard, ben_tbl, ben_img, scat_img, sim_tbl, scorecard, ben_tbl, sim_tbl
483
 
484
  def run_llm(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl):
485
+ return llm_analyze(pv, kv, kw, scorecard_df, ben_tbl, sim_tbl)
486
 
487
 
488
  pv0, kv0, kw0, kabs0, kews0 = ui_init()
 
494
  f"- EXCLUDE (no analysis): `{', '.join(sorted(EXCLUDE_COLS_EXACT))}`\n"
495
  f"- prov_col = `{prov_col}` Β· kab_col = `{kab_col}` Β· kewenangan_col = `{kew_col if kew_col else 'TIDAK ADA'}`\n"
496
  "---\n"
497
+ "**LLM Analysis (Gemini REST):** set `HF_TOKEN_DQ` di Space Variables."
498
  )
499
 
500
  with gr.Row():
 
522
  gr.Markdown("## Analisis Naratif (LLM β€” Gemini)")
523
  out_llm = gr.Markdown()
524
 
 
525
  st_score = gr.State(pd.DataFrame())
526
  st_ben = gr.State(pd.DataFrame())
527
  st_sim = gr.State(pd.DataFrame())