import numpy as np from backend.app.engine.bm25 import SimpleBM25 class TestSimpleBM25: def test_fit_and_score(self): bm25 = SimpleBM25() docs = [ "black leather jacket mens", "red floral dress womens", "blue denim jeans casual", ] bm25.fit(docs) assert bm25.n_docs == 3 scores = bm25.score_candidates("black leather", [0, 1, 2]) assert scores[0] > scores[1] assert scores[0] > scores[2] def test_empty_query(self): bm25 = SimpleBM25() bm25.fit(["black dress", "red shoes"]) scores = bm25.score_candidates("", [0, 1]) assert np.all(scores == 0.0) def test_unknown_terms(self): bm25 = SimpleBM25() bm25.fit(["black dress"]) scores = bm25.score_candidates("xyznotaword", [0]) assert scores[0] == 0.0 def test_out_of_range_index(self): bm25 = SimpleBM25() bm25.fit(["black dress"]) scores = bm25.score_candidates("black", [0, 999]) assert scores[0] > 0 assert scores[1] == 0.0 def test_exact_match_scores_higher(self): bm25 = SimpleBM25() bm25.fit(["black leather jacket", "red silk dress", "blue cotton shirt"]) scores = bm25.score_candidates("black leather jacket", [0, 1, 2]) assert scores[0] > scores[1] assert scores[0] > scores[2] def test_doc_frequency_computed(self): bm25 = SimpleBM25() bm25.fit(["black dress", "black shoes", "red dress"]) assert bm25.df["black"] == 2 assert bm25.df["dress"] == 2 assert bm25.df["red"] == 1