allbibek commited on
Commit
cea444d
·
verified ·
1 Parent(s): c809919

normalisasi rerank

Browse files
Files changed (1) hide show
  1. app.py +34 -17
app.py CHANGED
@@ -59,7 +59,8 @@ def expand_query(query: str, num_variations: int = 3) -> str:
59
  try:
60
  # Panggil Gemini
61
  response = gemini_client.models.generate_content(
62
- model="gemini-2.5-flash",
 
63
  contents=prompt,
64
  )
65
 
@@ -190,22 +191,38 @@ def hybrid_search_no_gemini(query: str, match_count: int = 50):
190
  pairs = [(query, c["judul"] + " " + c["deskripsi"]) for c in candidates]
191
 
192
  try:
193
- scores = reranker.predict(pairs)
194
- except Exception as e:
195
- print("Reranker error:", e)
196
- return {"results": sorted(candidates, key=lambda x: x.get("similarity", 0), reverse=True)[:10]}
197
-
198
- for c, s in zip(candidates, scores):
199
- c["rerank_score"] = float(s)
200
- rerank_prob = apply_sigmoid(float(s))
201
- c["rerank_prob"] = rerank_prob
202
-
203
- db_sim = c.get("similarity", 0.0)
204
-
205
- c["final_score"] = (0.6 * rerank_prob) + (0.4 * db_sim)
206
-
207
- candidates = sorted(candidates, key=lambda x: x["final_score"], reverse=True)
208
- return {"results": candidates[:10]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
  # ==========================================
211
  # CORE APPS
 
59
  try:
60
  # Panggil Gemini
61
  response = gemini_client.models.generate_content(
62
+ # model="gemini-2.5-flash",
63
+ model="gemini-robotics-er-1.5-preview",
64
  contents=prompt,
65
  )
66
 
 
191
  pairs = [(query, c["judul"] + " " + c["deskripsi"]) for c in candidates]
192
 
193
  try:
194
+ scores = reranker.predict(pairs)
195
+ except Exception as e:
196
+ print("Reranker error:", e)
197
+ return {"results": sorted(candidates, key=lambda x: x.get("similarity", 0), reverse=True)[:10]}
198
+
199
+ # Ambil nilai min dan max dari skor RRF bawaan database
200
+ db_scores = [c.get("similarity", 0.0) for c in candidates]
201
+ db_min = min(db_scores) if db_scores else 0.0
202
+ db_max = max(db_scores) if db_scores else 1.0
203
+
204
+ for c, s in zip(candidates, scores):
205
+ # 1. Probabilitas Reranker (0 sampai 1)
206
+ c["rerank_score"] = float(s)
207
+ rerank_prob = apply_sigmoid(float(s))
208
+ c["rerank_prob"] = rerank_prob
209
+
210
+ # 2. Normalisasi Skor Database (RRF) ke 0 sampai 1
211
+ raw_db_sim = c.get("similarity", 0.0)
212
+ if db_max > db_min:
213
+ norm_db_sim = (raw_db_sim - db_min) / (db_max - db_min)
214
+ else:
215
+ norm_db_sim = 0.0
216
+
217
+ c["norm_db_sim"] = norm_db_sim # Opsional: simpan untuk debugging
218
+
219
+ # 3. Interpolasi yang SEIMBANG
220
+ c["final_score"] = (0.6 * rerank_prob) + (0.4 * norm_db_sim)
221
+
222
+ # Urutkan berdasarkan final_score
223
+ candidates = sorted(candidates, key=lambda x: x["final_score"], reverse=True)
224
+
225
+ return {"results": candidates[:10]}
226
 
227
  # ==========================================
228
  # CORE APPS