senatus123 commited on
Commit
4f192d9
·
verified ·
1 Parent(s): 945cefe

Upload doc_searcher_v2.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. doc_searcher_v2.py +36 -4
doc_searcher_v2.py CHANGED
@@ -38,14 +38,46 @@ class DocSearcherV2:
38
  limit = 100,
39
  ).points
40
 
41
- data = []
 
 
 
42
 
43
  for hit in search_result:
44
- data.append(hit.payload["tekst"])
 
 
 
 
 
 
45
 
46
- scores = self.reranker.compute_logits(queries,data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- return scores
 
49
 
50
  async def search_keyword(self, text: str):
51
  sparse_query = next(self.sparse_model.query_embed(text))
 
38
  limit = 100,
39
  ).points
40
 
41
+ # Sačuvaj kompletan hit objekat i tekst za reranking
42
+ # Kreiraj mapu tekst -> hit za brzo mapiranje
43
+ text_to_hit = {}
44
+ texts_for_reranking = []
45
 
46
  for hit in search_result:
47
+ text = hit.payload.get("tekst", "")
48
+ if text: # Samo ako postoji tekst
49
+ text_to_hit[text] = hit
50
+ texts_for_reranking.append(text)
51
+
52
+ if not texts_for_reranking:
53
+ return []
54
 
55
+ # Reranking - vraća top_10 sa (score, query, document) tuple-ovima
56
+ # queries * len(...) znači da svaki dokument dobija isti query
57
+ reranked_results = self.reranker.compute_logits(queries * len(texts_for_reranking), texts_for_reranking)
58
+
59
+ # Kombinuj rezultate: mapiraj rerank skorove sa originalnim hit-ovima
60
+ # reranked_results je lista tuple-ova: [(score, query, document_text), ...]
61
+ # gde je document_text originalni tekst koji je poslat reranker-u
62
+ results_with_scores = []
63
+ for score, query, document_text in reranked_results:
64
+ # Pronađi originalni hit po tekstu
65
+ if document_text in text_to_hit:
66
+ hit = text_to_hit[document_text]
67
+ # Vrati kompletan payload sa skorom
68
+ result = {
69
+ "score": float(score),
70
+ "id": str(hit.id),
71
+ "text": document_text,
72
+ "payload": hit.payload # Kompletan payload sa svim podacima
73
+ }
74
+ results_with_scores.append(result)
75
+
76
+ # Reranker već vraća sortirano, ali osigurajmo da je sortirano
77
+ results_with_scores.sort(key=lambda x: x["score"], reverse=True)
78
 
79
+ # Vrati top rezultate (reranker već vraća top 10)
80
+ return results_with_scores
81
 
82
  async def search_keyword(self, text: str):
83
  sparse_query = next(self.sparse_model.query_embed(text))