Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,10 @@ import os
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from google import genai
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
load_dotenv()
|
| 9 |
|
| 10 |
GOOGLE_API_KEY = os.getenv("GEMINI_API")
|
|
@@ -300,6 +304,86 @@ def search_kbli(text: str):
|
|
| 300 |
html += "</div>"
|
| 301 |
return html
|
| 302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
with gr.Blocks(css="""
|
| 304 |
.title {font-size: 22px; font-weight: 700; color: #111827; margin-bottom: 4px;}
|
| 305 |
.desc {font-size: 14px; color: #6b7280; margin-bottom: 16px;}
|
|
@@ -390,5 +474,31 @@ with gr.Blocks(css="""
|
|
| 390 |
btn_clear4.click(lambda: ("", None), None, [inp4, out4])
|
| 391 |
btn_submit4.click(hybrid_search, inp4, out4, api_name="hybrid_search")
|
| 392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
if __name__ == "__main__":
|
| 394 |
demo.queue().launch(show_error=True)
|
|
|
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
from google import genai
|
| 7 |
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import time
|
| 10 |
+
import math
|
| 11 |
+
|
| 12 |
load_dotenv()
|
| 13 |
|
| 14 |
GOOGLE_API_KEY = os.getenv("GEMINI_API")
|
|
|
|
| 304 |
html += "</div>"
|
| 305 |
return html
|
| 306 |
|
| 307 |
+
def calculate_mrr(retrieved_kodes, relevant_kodes):
|
| 308 |
+
for i, kode in enumerate(retrieved_kodes):
|
| 309 |
+
if kode in relevant_kodes: return 1.0 / (i + 1)
|
| 310 |
+
return 0.0
|
| 311 |
+
|
| 312 |
+
def calculate_recall(retrieved_kodes, relevant_kodes, k=10):
|
| 313 |
+
retrieved_k_set = set(retrieved_kodes[:k])
|
| 314 |
+
relevant_set = set(relevant_kodes)
|
| 315 |
+
if not relevant_set: return 0.0
|
| 316 |
+
return len(retrieved_k_set.intersection(relevant_set)) / len(relevant_set)
|
| 317 |
+
|
| 318 |
+
def calculate_ndcg(retrieved_kodes, relevance_dict, k=10):
|
| 319 |
+
dcg = sum(relevance_dict.get(k, 0) / math.log2(i + 2) for i, k in enumerate(retrieved_kodes[:k]))
|
| 320 |
+
ideal_rels = sorted(relevance_dict.values(), reverse=True)[:k]
|
| 321 |
+
idcg = sum(rel / math.log2(i + 2) for i, rel in enumerate(ideal_rels))
|
| 322 |
+
return dcg / idcg if idcg > 0 else 0.0
|
| 323 |
+
|
| 324 |
+
def run_evaluation(file_obj, scenario):
|
| 325 |
+
if file_obj is None:
|
| 326 |
+
return "Peringatan: Silakan unggah file ground_truth.csv terlebih dahulu.", None, None
|
| 327 |
+
|
| 328 |
+
df = pd.read_csv(file_obj.name)
|
| 329 |
+
queries = df.groupby('query_id').first()['query'].to_dict()
|
| 330 |
+
|
| 331 |
+
ground_truth = {}
|
| 332 |
+
for q_id, group in df.groupby('query_id'):
|
| 333 |
+
ground_truth[q_id] = dict(zip(group['kode_kbli'].astype(str), group['relevance']))
|
| 334 |
+
|
| 335 |
+
results_list = []
|
| 336 |
+
|
| 337 |
+
for q_id, query_text in queries.items():
|
| 338 |
+
start_time = time.perf_counter() # Mulai hitung latensi
|
| 339 |
+
|
| 340 |
+
# Eksekusi fungsi berdasarkan skenario yang dipilih
|
| 341 |
+
if scenario == "Semantic Only (Baseline)":
|
| 342 |
+
response = fn_semantic(query_text, match_count=50)
|
| 343 |
+
else: # "Hybrid + Reranker (Final)"
|
| 344 |
+
response = hybrid_search(query_text, match_count=50)
|
| 345 |
+
|
| 346 |
+
latency = time.perf_counter() - start_time # Hitung selisih waktu
|
| 347 |
+
|
| 348 |
+
candidates = response.get("results", [])
|
| 349 |
+
retrieved_kodes = [str(r.get('kode')) for r in candidates]
|
| 350 |
+
|
| 351 |
+
rel_dict = ground_truth.get(q_id, {})
|
| 352 |
+
relevant_kodes = list(rel_dict.keys())
|
| 353 |
+
|
| 354 |
+
mrr = calculate_mrr(retrieved_kodes, relevant_kodes)
|
| 355 |
+
recall = calculate_recall(retrieved_kodes, relevant_kodes, k=10)
|
| 356 |
+
ndcg = calculate_ndcg(retrieved_kodes, rel_dict, k=10)
|
| 357 |
+
|
| 358 |
+
results_list.append({
|
| 359 |
+
"Query ID": q_id,
|
| 360 |
+
"Query Text": query_text,
|
| 361 |
+
"MRR@10": round(mrr, 4),
|
| 362 |
+
"Recall@10": round(recall, 4),
|
| 363 |
+
"nDCG@10": round(ndcg, 4),
|
| 364 |
+
"Latency (sec)": round(latency, 4) # Menyimpan data latensi per kueri
|
| 365 |
+
})
|
| 366 |
+
|
| 367 |
+
time.sleep(1) # Hindari rate limit Gemini API
|
| 368 |
+
|
| 369 |
+
results_df = pd.DataFrame(results_list)
|
| 370 |
+
|
| 371 |
+
# Hitung rata-rata
|
| 372 |
+
summary = {
|
| 373 |
+
"Skenario": scenario,
|
| 374 |
+
"Total Query": len(queries),
|
| 375 |
+
"Avg MRR@10": round(results_df["MRR@10"].mean(), 4),
|
| 376 |
+
"Avg Recall@10": round(results_df["Recall@10"].mean(), 4),
|
| 377 |
+
"Avg nDCG@10": round(results_df["nDCG@10"].mean(), 4),
|
| 378 |
+
"Avg Latency (sec)": round(results_df["Latency (sec)"].mean(), 4)
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
# Export ke Excel
|
| 382 |
+
output_filename = f"Evaluasi_{scenario.split()[0]}.xlsx"
|
| 383 |
+
results_df.to_excel(output_filename, index=False)
|
| 384 |
+
|
| 385 |
+
return summary, results_df, output_filename
|
| 386 |
+
|
| 387 |
with gr.Blocks(css="""
|
| 388 |
.title {font-size: 22px; font-weight: 700; color: #111827; margin-bottom: 4px;}
|
| 389 |
.desc {font-size: 14px; color: #6b7280; margin-bottom: 16px;}
|
|
|
|
| 474 |
btn_clear4.click(lambda: ("", None), None, [inp4, out4])
|
| 475 |
btn_submit4.click(hybrid_search, inp4, out4, api_name="hybrid_search")
|
| 476 |
|
| 477 |
+
with gr.Tab("Ablation Study"):
|
| 478 |
+
gr.Markdown("### Metrics & Latency")
|
| 479 |
+
gr.Markdown("Unggah file `ground_truth.csv` Anda untuk menjalankan *batch testing* dan membandingkan skenario.")
|
| 480 |
+
|
| 481 |
+
with gr.Row():
|
| 482 |
+
with gr.Column(scale=1):
|
| 483 |
+
eval_file = gr.File(label="Upload ground_truth.csv", file_types=[".csv"])
|
| 484 |
+
eval_scenario = gr.Dropdown(
|
| 485 |
+
choices=["Semantic Only (Baseline)", "Hybrid + Reranker (Final)"],
|
| 486 |
+
value="Hybrid + Reranker (Final)",
|
| 487 |
+
label="Pilih Skenario Riset"
|
| 488 |
+
)
|
| 489 |
+
btn_run_eval = gr.Button("Jalankan Evaluasi Otomatis", variant="primary")
|
| 490 |
+
|
| 491 |
+
with gr.Column(scale=1):
|
| 492 |
+
eval_summary = gr.JSON(label="Ringkasan Skor Rata-rata & Latensi")
|
| 493 |
+
eval_download = gr.File(label="Download Laporan (Excel)")
|
| 494 |
+
|
| 495 |
+
eval_table = gr.Dataframe(label="Detail Per-Kueri")
|
| 496 |
+
|
| 497 |
+
btn_run_eval.click(
|
| 498 |
+
run_evaluation,
|
| 499 |
+
inputs=[eval_file, eval_scenario],
|
| 500 |
+
outputs=[eval_summary, eval_table, eval_download]
|
| 501 |
+
)
|
| 502 |
+
|
| 503 |
if __name__ == "__main__":
|
| 504 |
demo.queue().launch(show_error=True)
|