Spaces:

allbibek
/

semanticsphrase

Running

App Files Files Community

allbibek commited on Sep 26, 2025

Commit

2489a92

verified ·

1 Parent(s): 84eb043

cosin cross reranker

Browse files

Files changed (1) hide show

app.py +32 -6

app.py CHANGED Viewed

@@ -30,28 +30,54 @@ def get_embedding(text: str):
         return []
     expanded_text = expand_query(text)
-    embedding = embedder.encode(expanded_text).tolist()
     return embedding
 def fn_semantic(query: str, match_count: int = 100):
-    embedding = embedder.encode(query).tolist()
     response = supabase.rpc(
         "search_kbli",
-        {"query_embedding": embedding, "match_count": match_count}
     ).execute()
     candidates = response.data or []
     if not candidates:
         return {"results": []}
-    pairs = [(query, c["judul"] + " " + c["deskripsi"]) for c in candidates]
-    scores = reranker.predict(pairs)
     for c, s in zip(candidates, scores):
         c["rerank_score"] = float(s)
-    candidates = sorted(candidates, key=lambda x: x["rerank_score"], reverse=True)
     return {"results": candidates[:10]}

         return []
     expanded_text = expand_query(text)
+    embedding = embedder.encode(expanded_text, normalize_embeddings=True).tolist()
     return embedding
 def fn_semantic(query: str, match_count: int = 100):
+    expanded = expand_query(query)
+    embedding_q = embedder.encode(expanded, normalize_embeddings=True).tolist()
     response = supabase.rpc(
         "search_kbli",
+        {"query_embedding": embedding_q, "match_count": match_count}
     ).execute()
     candidates = response.data or []
     if not candidates:
         return {"results": []}
+    print("=== Candidates BEFORE rerank (top 10) ===")
+    for c in candidates[:10]:
+        print(c.get("kode"), c.get("judul")[:80], "sim=", c.get("similarity"))
+    pairs = [(expanded, c["judul"] + " " + c["deskripsi"]) for c in candidates]
+    try:
+        scores = reranker.predict(pairs)
+    except Exception as e:
+        print("Reranker error:", e)
+        return {"results": sorted(candidates, key=lambda x: x.get("similarity", 0), reverse=True)[:10]}
     for c, s in zip(candidates, scores):
         c["rerank_score"] = float(s)
+    print("=== Candidates AFTER rerank (top 10) ===")
+    for c in candidates[:10]:
+        print(c.get("kode"), c.get("judul")[:80], "sim=", c.get("similarity"), "rerank=", c.get("rerank_score"))
+    rerank_vals = [c["rerank_score"] for c in candidates]
+    rmin, rmax = min(rerank_vals), max(rerank_vals)
+    for c in candidates:
+        if rmax - rmin > 1e-9:
+            c["rerank_norm"] = (c["rerank_score"] - rmin) / (rmax - rmin)
+        else:
+            c["rerank_norm"] = 0.0
+    for c in candidates:
+        sim = c.get("similarity", 0.0)
+        c["hybrid_score"] = 0.6 * sim + 0.4 * c["rerank_norm"]
+    candidates = sorted(candidates, key=lambda x: x["hybrid_score"], reverse=True)
     return {"results": candidates[:10]}