ParshvPatel's picture
feat: HuggingFace Spaces deployment
d992912
import numpy as np
from backend.app.engine.bm25 import SimpleBM25
class TestSimpleBM25:
def test_fit_and_score(self):
bm25 = SimpleBM25()
docs = [
"black leather jacket mens",
"red floral dress womens",
"blue denim jeans casual",
]
bm25.fit(docs)
assert bm25.n_docs == 3
scores = bm25.score_candidates("black leather", [0, 1, 2])
assert scores[0] > scores[1]
assert scores[0] > scores[2]
def test_empty_query(self):
bm25 = SimpleBM25()
bm25.fit(["black dress", "red shoes"])
scores = bm25.score_candidates("", [0, 1])
assert np.all(scores == 0.0)
def test_unknown_terms(self):
bm25 = SimpleBM25()
bm25.fit(["black dress"])
scores = bm25.score_candidates("xyznotaword", [0])
assert scores[0] == 0.0
def test_out_of_range_index(self):
bm25 = SimpleBM25()
bm25.fit(["black dress"])
scores = bm25.score_candidates("black", [0, 999])
assert scores[0] > 0
assert scores[1] == 0.0
def test_exact_match_scores_higher(self):
bm25 = SimpleBM25()
bm25.fit(["black leather jacket", "red silk dress", "blue cotton shirt"])
scores = bm25.score_candidates("black leather jacket", [0, 1, 2])
assert scores[0] > scores[1]
assert scores[0] > scores[2]
def test_doc_frequency_computed(self):
bm25 = SimpleBM25()
bm25.fit(["black dress", "black shoes", "red dress"])
assert bm25.df["black"] == 2
assert bm25.df["dress"] == 2
assert bm25.df["red"] == 1