allbibek commited on
Commit
c926b76
·
verified ·
1 Parent(s): 12aaebc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py CHANGED
@@ -5,6 +5,10 @@ import os
5
  from dotenv import load_dotenv
6
  from google import genai
7
 
 
 
 
 
8
  load_dotenv()
9
 
10
  GOOGLE_API_KEY = os.getenv("GEMINI_API")
@@ -300,6 +304,86 @@ def search_kbli(text: str):
300
  html += "</div>"
301
  return html
302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
  with gr.Blocks(css="""
304
  .title {font-size: 22px; font-weight: 700; color: #111827; margin-bottom: 4px;}
305
  .desc {font-size: 14px; color: #6b7280; margin-bottom: 16px;}
@@ -390,5 +474,31 @@ with gr.Blocks(css="""
390
  btn_clear4.click(lambda: ("", None), None, [inp4, out4])
391
  btn_submit4.click(hybrid_search, inp4, out4, api_name="hybrid_search")
392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  if __name__ == "__main__":
394
  demo.queue().launch(show_error=True)
 
5
  from dotenv import load_dotenv
6
  from google import genai
7
 
8
+ import pandas as pd
9
+ import time
10
+ import math
11
+
12
  load_dotenv()
13
 
14
  GOOGLE_API_KEY = os.getenv("GEMINI_API")
 
304
  html += "</div>"
305
  return html
306
 
307
+ def calculate_mrr(retrieved_kodes, relevant_kodes):
308
+ for i, kode in enumerate(retrieved_kodes):
309
+ if kode in relevant_kodes: return 1.0 / (i + 1)
310
+ return 0.0
311
+
312
+ def calculate_recall(retrieved_kodes, relevant_kodes, k=10):
313
+ retrieved_k_set = set(retrieved_kodes[:k])
314
+ relevant_set = set(relevant_kodes)
315
+ if not relevant_set: return 0.0
316
+ return len(retrieved_k_set.intersection(relevant_set)) / len(relevant_set)
317
+
318
+ def calculate_ndcg(retrieved_kodes, relevance_dict, k=10):
319
+ dcg = sum(relevance_dict.get(k, 0) / math.log2(i + 2) for i, k in enumerate(retrieved_kodes[:k]))
320
+ ideal_rels = sorted(relevance_dict.values(), reverse=True)[:k]
321
+ idcg = sum(rel / math.log2(i + 2) for i, rel in enumerate(ideal_rels))
322
+ return dcg / idcg if idcg > 0 else 0.0
323
+
324
+ def run_evaluation(file_obj, scenario):
325
+ if file_obj is None:
326
+ return "Peringatan: Silakan unggah file ground_truth.csv terlebih dahulu.", None, None
327
+
328
+ df = pd.read_csv(file_obj.name)
329
+ queries = df.groupby('query_id').first()['query'].to_dict()
330
+
331
+ ground_truth = {}
332
+ for q_id, group in df.groupby('query_id'):
333
+ ground_truth[q_id] = dict(zip(group['kode_kbli'].astype(str), group['relevance']))
334
+
335
+ results_list = []
336
+
337
+ for q_id, query_text in queries.items():
338
+ start_time = time.perf_counter() # Mulai hitung latensi
339
+
340
+ # Eksekusi fungsi berdasarkan skenario yang dipilih
341
+ if scenario == "Semantic Only (Baseline)":
342
+ response = fn_semantic(query_text, match_count=50)
343
+ else: # "Hybrid + Reranker (Final)"
344
+ response = hybrid_search(query_text, match_count=50)
345
+
346
+ latency = time.perf_counter() - start_time # Hitung selisih waktu
347
+
348
+ candidates = response.get("results", [])
349
+ retrieved_kodes = [str(r.get('kode')) for r in candidates]
350
+
351
+ rel_dict = ground_truth.get(q_id, {})
352
+ relevant_kodes = list(rel_dict.keys())
353
+
354
+ mrr = calculate_mrr(retrieved_kodes, relevant_kodes)
355
+ recall = calculate_recall(retrieved_kodes, relevant_kodes, k=10)
356
+ ndcg = calculate_ndcg(retrieved_kodes, rel_dict, k=10)
357
+
358
+ results_list.append({
359
+ "Query ID": q_id,
360
+ "Query Text": query_text,
361
+ "MRR@10": round(mrr, 4),
362
+ "Recall@10": round(recall, 4),
363
+ "nDCG@10": round(ndcg, 4),
364
+ "Latency (sec)": round(latency, 4) # Menyimpan data latensi per kueri
365
+ })
366
+
367
+ time.sleep(1) # Hindari rate limit Gemini API
368
+
369
+ results_df = pd.DataFrame(results_list)
370
+
371
+ # Hitung rata-rata
372
+ summary = {
373
+ "Skenario": scenario,
374
+ "Total Query": len(queries),
375
+ "Avg MRR@10": round(results_df["MRR@10"].mean(), 4),
376
+ "Avg Recall@10": round(results_df["Recall@10"].mean(), 4),
377
+ "Avg nDCG@10": round(results_df["nDCG@10"].mean(), 4),
378
+ "Avg Latency (sec)": round(results_df["Latency (sec)"].mean(), 4)
379
+ }
380
+
381
+ # Export ke Excel
382
+ output_filename = f"Evaluasi_{scenario.split()[0]}.xlsx"
383
+ results_df.to_excel(output_filename, index=False)
384
+
385
+ return summary, results_df, output_filename
386
+
387
  with gr.Blocks(css="""
388
  .title {font-size: 22px; font-weight: 700; color: #111827; margin-bottom: 4px;}
389
  .desc {font-size: 14px; color: #6b7280; margin-bottom: 16px;}
 
474
  btn_clear4.click(lambda: ("", None), None, [inp4, out4])
475
  btn_submit4.click(hybrid_search, inp4, out4, api_name="hybrid_search")
476
 
477
+ with gr.Tab("Ablation Study"):
478
+ gr.Markdown("### Metrics & Latency")
479
+ gr.Markdown("Unggah file `ground_truth.csv` Anda untuk menjalankan *batch testing* dan membandingkan skenario.")
480
+
481
+ with gr.Row():
482
+ with gr.Column(scale=1):
483
+ eval_file = gr.File(label="Upload ground_truth.csv", file_types=[".csv"])
484
+ eval_scenario = gr.Dropdown(
485
+ choices=["Semantic Only (Baseline)", "Hybrid + Reranker (Final)"],
486
+ value="Hybrid + Reranker (Final)",
487
+ label="Pilih Skenario Riset"
488
+ )
489
+ btn_run_eval = gr.Button("Jalankan Evaluasi Otomatis", variant="primary")
490
+
491
+ with gr.Column(scale=1):
492
+ eval_summary = gr.JSON(label="Ringkasan Skor Rata-rata & Latensi")
493
+ eval_download = gr.File(label="Download Laporan (Excel)")
494
+
495
+ eval_table = gr.Dataframe(label="Detail Per-Kueri")
496
+
497
+ btn_run_eval.click(
498
+ run_evaluation,
499
+ inputs=[eval_file, eval_scenario],
500
+ outputs=[eval_summary, eval_table, eval_download]
501
+ )
502
+
503
  if __name__ == "__main__":
504
  demo.queue().launch(show_error=True)