Spaces:

SouravNath
/

repomind-api

Running

App Files Files Community

repomind-api / tests /test_phase3_localisation.py

SouravNath

Initial commit

dc71cad 4 days ago

raw

history blame contribute delete

18.9 kB

	"""
	tests/test_phase3_localisation.py
	──────────────────────────────────
	Unit tests for Phase 3: BM25, RRF fusion, DeBERTa ranker, and pipeline.

	All tests work without OpenAI API key or GPU — components degrade gracefully.

	Run with: pytest tests/test_phase3_localisation.py -v
	"""
	from __future__ import annotations

	import textwrap
	from pathlib import Path
	from unittest.mock import MagicMock, patch

	import pytest


	# ── Helpers ────────────────────────────────────────────────────────────────────

	def make_file_symbols(file_path: str, summary: str = ""):
	"""Create a minimal FileSymbols for testing."""
	from ast_parser.python_parser import FileSymbols, FunctionInfo
	fs = FileSymbols(file_path=file_path, file_hash="aaa111")
	fs.module_docstring = summary
	# Also add a fake function whose name contains the summary words
	# so summary_text is fully populated
	if summary:
	fs.functions = [
	FunctionInfo(
	name=summary.split()[0] if summary.split() else "placeholder",
	qualified_name=summary.split()[0] if summary.split() else "placeholder",
	args=[], decorators=[], docstring=summary,
	start_line=1, end_line=5,
	)
	]
	return fs


	# ── BM25 tokeniser ────────────────────────────────────────────────────────────

	class TestTokeniser:
	def test_lowercase(self):
	from localisation.bm25_retriever import _tokenise
	result = _tokenise("Hello World")
	assert all(t == t.lower() for t in result)

	def test_camel_case_split(self):
	from localisation.bm25_retriever import _tokenise
	result = _tokenise("QuerySet")
	assert "query" in result
	assert "set" in result

	def test_snake_case_split(self):
	from localisation.bm25_retriever import _tokenise
	result = _tokenise("get_queryset")
	assert "get" in result
	assert "queryset" in result

	def test_short_tokens_filtered(self):
	from localisation.bm25_retriever import _tokenise
	result = _tokenise("a b c def")
	assert "a" not in result
	assert "b" not in result
	assert "def" in result

	def test_path_tokenisation(self):
	from localisation.bm25_retriever import _tokenise
	result = _tokenise("django/db/models/query.py")
	assert "django" in result
	assert "models" in result
	assert "query" in result


	# ── BM25 Retriever ────────────────────────────────────────────────────────────

	class TestBM25Retriever:
	def test_index_and_query_basic(self):
	from localisation.bm25_retriever import BM25Retriever
	retriever = BM25Retriever()
	symbols = [
	make_file_symbols("models/user.py", "User authentication login password"),
	make_file_symbols("views/dashboard.py", "Dashboard render template"),
	make_file_symbols("utils/email.py", "Email sending SMTP"),
	]
	retriever.index(symbols)
	hits = retriever.query("user authentication login", top_k=3)
	assert len(hits) >= 1
	assert hits[0].file_path == "models/user.py"

	def test_query_returns_positive_scores_only(self):
	from localisation.bm25_retriever import BM25Retriever
	retriever = BM25Retriever()
	symbols = [make_file_symbols(f"file{i}.py", f"content {i}") for i in range(5)]
	retriever.index(symbols)
	hits = retriever.query("content 3", top_k=5)
	assert all(h.score > 0 for h in hits)

	def test_ranks_are_sequential(self):
	from localisation.bm25_retriever import BM25Retriever
	retriever = BM25Retriever()
	symbols = [make_file_symbols(f"f{i}.py", f"word{i} text") for i in range(3)]
	retriever.index(symbols)
	hits = retriever.query("word0 text", top_k=3)
	assert [h.rank for h in hits] == list(range(1, len(hits) + 1))

	def test_empty_query_returns_empty(self):
	from localisation.bm25_retriever import BM25Retriever
	retriever = BM25Retriever()
	symbols = [make_file_symbols("a.py", "content")]
	retriever.index(symbols)
	hits = retriever.query("", top_k=5)
	assert hits == []

	def test_corpus_size(self):
	from localisation.bm25_retriever import BM25Retriever
	retriever = BM25Retriever()
	symbols = [make_file_symbols(f"f{i}.py", "text") for i in range(7)]
	retriever.index(symbols)
	assert retriever.corpus_size == 7

	def test_file_path_tokens_boost(self):
	from localisation.bm25_retriever import BM25Retriever
	# Both files have 'models' in content. But models.py ALSO has it in
	# path (doubled) — with a larger corpus that gives positive BM25 scores.
	retriever = BM25Retriever()
	symbols = [
	make_file_symbols("django/db/models.py", "handles database records"),
	make_file_symbols("utils/helper.py", "general utilities helper"),
	make_file_symbols("views/base.py", "base view rendering"),
	make_file_symbols("core/app.py", "application entry point"),
	make_file_symbols("api/serializers.py", "rest framework serializers"),
	]
	retriever.index(symbols)
	hits = retriever.query("models", top_k=5)
	# models.py has 'models' in path (2x weight) — must appear in results
	paths = [h.file_path for h in hits]
	assert "django/db/models.py" in paths

	def test_not_indexed_raises(self):
	from localisation.bm25_retriever import BM25Retriever
	retriever = BM25Retriever()
	with pytest.raises(RuntimeError, match="not indexed"):
	retriever.query("test", top_k=5)

	def test_skips_parse_error_files(self):
	from localisation.bm25_retriever import BM25Retriever
	from ast_parser.python_parser import FileSymbols
	retriever = BM25Retriever()
	good = make_file_symbols("good.py", "good content")
	bad = FileSymbols(file_path="bad.py", file_hash="bbb", parse_error="SyntaxError")
	retriever.index([good, bad])
	assert retriever.corpus_size == 1


	# ── RRF Fusion ────────────────────────────────────────────────────────────────

	class TestRRFFusion:
	def test_basic_fusion(self):
	from localisation.rrf_fusion import reciprocal_rank_fusion
	bm25 = [("a.py", 1.0, 1), ("b.py", 0.8, 2), ("c.py", 0.5, 3)]
	embed = [("b.py", 0.9, 1), ("a.py", 0.7, 2), ("d.py", 0.6, 3)]
	ppr = {"a.py": 0.5, "b.py": 0.3}

	result = reciprocal_rank_fusion(bm25, embed, ppr, top_k=4)
	assert len(result) <= 4
	# a.py appears in all three → should rank high
	top_paths = [h.file_path for h in result]
	assert "a.py" in top_paths[:2]

	def test_top_k_respected(self):
	from localisation.rrf_fusion import reciprocal_rank_fusion
	bm25 = [(f"f{i}.py", 1.0, i + 1) for i in range(10)]
	result = reciprocal_rank_fusion(bm25, [], {}, top_k=3)
	assert len(result) == 3

	def test_empty_inputs(self):
	from localisation.rrf_fusion import reciprocal_rank_fusion
	result = reciprocal_rank_fusion([], [], {}, top_k=5)
	assert result == []

	def test_ranks_sequential(self):
	from localisation.rrf_fusion import reciprocal_rank_fusion
	bm25 = [("a.py", 1.0, 1), ("b.py", 0.5, 2)]
	result = reciprocal_rank_fusion(bm25, [], {}, top_k=5)
	assert [h.rank for h in result] == list(range(1, len(result) + 1))

	def test_all_sources_tracked(self):
	from localisation.rrf_fusion import reciprocal_rank_fusion
	bm25 = [("a.py", 1.0, 1)]
	embed = [("a.py", 0.9, 1)]
	ppr = {"a.py": 0.5}
	result = reciprocal_rank_fusion(bm25, embed, ppr, top_k=1)
	hit = result[0]
	assert hit.bm25_rank == 1
	assert hit.embed_rank == 1
	assert hit.ppr_rank == 1

	def test_ablation_no_ppr(self):
	from localisation.rrf_fusion import ablate
	bm25 = [("a.py", 1.0, 1)]
	ppr = {"b.py": 99.0} # b.py has huge PPR score
	# With PPR zeroed out, b.py should NOT appear
	result = ablate(bm25, [], ppr, use_ppr=False, top_k=5)
	paths = [h.file_path for h in result]
	assert "b.py" not in paths

	def test_scores_descending(self):
	from localisation.rrf_fusion import reciprocal_rank_fusion
	bm25 = [("a.py", 1.0, 1), ("b.py", 0.5, 2), ("c.py", 0.1, 3)]
	result = reciprocal_rank_fusion(bm25, [], {}, top_k=3)
	scores = [h.fused_score for h in result]
	assert scores == sorted(scores, reverse=True)

	def test_union_of_all_lists(self):
	"""File appearing only in PPR should still be in results."""
	from localisation.rrf_fusion import reciprocal_rank_fusion
	bm25 = [("a.py", 1.0, 1)]
	ppr = {"z.py": 1.0} # only in PPR
	result = reciprocal_rank_fusion(bm25, [], ppr, top_k=10)
	paths = [h.file_path for h in result]
	assert "z.py" in paths


	# ── DeBERTa Ranker — without GPU ──────────────────────────────────────────────

	class TestDeBERTaRankerFallback:
	"""Tests for graceful fallback when model is not loaded."""

	def test_rerank_fallback_returns_stage1_order(self):
	from localisation.deberta_ranker import DeBERTaRanker
	# Don't actually load the model
	ranker = DeBERTaRanker.__new__(DeBERTaRanker)
	ranker._available = False
	ranker._model = None
	ranker._tokenizer = None

	candidates = [("a.py", "summary a"), ("b.py", "summary b"), ("c.py", "summary c")]
	result = ranker.rerank("fix the bug", candidates, top_k=3)
	assert len(result) == 3
	assert result[0].file_path == "a.py"
	assert result[0].rank == 1

	def test_rerank_empty_candidates(self):
	from localisation.deberta_ranker import DeBERTaRanker
	ranker = DeBERTaRanker.__new__(DeBERTaRanker)
	ranker._available = False
	result = ranker.rerank("query", [], top_k=5)
	assert result == []

	def test_ranked_file_scores_are_positive(self):
	from localisation.deberta_ranker import DeBERTaRanker
	ranker = DeBERTaRanker.__new__(DeBERTaRanker)
	ranker._available = False
	candidates = [(f"f{i}.py", f"text {i}") for i in range(5)]
	result = ranker.rerank("test query", candidates, top_k=5)
	assert all(r.relevance_score > 0 for r in result)


	# ── Recall metric ─────────────────────────────────────────────────────────────

	class TestRecallMetric:
	def test_perfect_recall(self):
	from localisation.deberta_ranker import recall_at_k
	preds = ["a.py", "b.py", "c.py"]
	gold = ["a.py", "b.py"]
	assert recall_at_k(preds, gold, k=5) == 1.0

	def test_zero_recall(self):
	from localisation.deberta_ranker import recall_at_k
	preds = ["x.py", "y.py"]
	gold = ["a.py"]
	assert recall_at_k(preds, gold, k=5) == 0.0

	def test_partial_recall(self):
	from localisation.deberta_ranker import recall_at_k
	preds = ["a.py", "b.py", "c.py"]
	gold = ["a.py", "z.py"]
	assert recall_at_k(preds, gold, k=5) == 0.5

	def test_recall_at_k_respects_k(self):
	from localisation.deberta_ranker import recall_at_k
	preds = ["x.py", "a.py"] # a.py is at position 2
	gold = ["a.py"]
	assert recall_at_k(preds, gold, k=1) == 0.0 # only looking at top-1
	assert recall_at_k(preds, gold, k=2) == 1.0

	def test_empty_gold(self):
	from localisation.deberta_ranker import recall_at_k
	assert recall_at_k(["a.py"], [], k=5) == 0.0


	# ── Patch file extraction ─────────────────────────────────────────────────────

	class TestExtractFilesFromPatch:
	def test_basic_unified_diff(self):
	from localisation.deberta_ranker import _extract_files_from_patch
	patch = textwrap.dedent("""
	diff --git a/django/db/models/query.py b/django/db/models/query.py
	--- a/django/db/models/query.py
	+++ b/django/db/models/query.py
	@@ -1 +1 @@
	-old
	+new
	""")
	files = _extract_files_from_patch(patch)
	assert "django/db/models/query.py" in files

	def test_multiple_files(self):
	from localisation.deberta_ranker import _extract_files_from_patch
	patch = textwrap.dedent("""
	--- a/foo.py
	+++ b/foo.py
	@@ -1 +1 @@ fix
	--- a/bar.py
	+++ b/bar.py
	@@ -1 +1 @@ fix
	""")
	files = _extract_files_from_patch(patch)
	assert "foo.py" in files
	assert "bar.py" in files

	def test_dev_null_excluded(self):
	from localisation.deberta_ranker import _extract_files_from_patch
	patch = "--- /dev/null\n+++ b/new_file.py\n"
	files = _extract_files_from_patch(patch)
	assert "/dev/null" not in files

	def test_empty_patch(self):
	from localisation.deberta_ranker import _extract_files_from_patch
	assert _extract_files_from_patch("") == []


	# ── Failure categorisation ────────────────────────────────────────────────────

	class TestFailureCategorisation:
	def test_success(self):
	from localisation.pipeline import categorise_localisation_failure
	result = categorise_localisation_failure(["a.py", "b.py"], ["a.py"], "good long detailed issue text here")
	assert result == "success"

	def test_wrong_file(self):
	from localisation.pipeline import categorise_localisation_failure
	# Long issue text (>10 words) + no gold file found → wrong_file
	long_issue = "there is a null pointer exception in the query filter method"
	result = categorise_localisation_failure(["x.py", "y.py"], ["z.py"], long_issue)
	assert result == "wrong_file"

	def test_partial_file(self):
	from localisation.pipeline import categorise_localisation_failure
	result = categorise_localisation_failure(["a.py"], ["a.py", "b.py"], "long enough issue text to be valid")
	assert result == "partial_file"

	def test_ambiguous_issue(self):
	from localisation.pipeline import categorise_localisation_failure
	result = categorise_localisation_failure(["x.py"], ["z.py"], "fix bug") # very short
	assert result == "ambiguous_issue"


	# ── Pipeline integration (no API required) ────────────────────────────────────

	class TestLocalisationPipeline:
	def test_pipeline_bm25_only(self):
	from localisation.pipeline import LocalisationPipeline
	pipeline = LocalisationPipeline(
	use_embeddings=False,
	use_deberta=False,
	use_ppr=False,
	)
	symbols = [
	make_file_symbols("auth/models.py", "User model authentication password hash"),
	make_file_symbols("views/login.py", "Login view render form"),
	make_file_symbols("utils/email.py", "Email SMTP send message"),
	]
	pipeline.index_repo(symbols)
	result = pipeline.localise("user authentication login password", top_k=3)
	assert len(result.hits) >= 1
	assert result.hits[0].file_path == "auth/models.py"

	def test_pipeline_empty_query(self):
	from localisation.pipeline import LocalisationPipeline
	pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False)
	symbols = [make_file_symbols("a.py", "content")]
	pipeline.index_repo(symbols)
	result = pipeline.localise("")
	assert result.failure_category == "empty_query"
	assert result.hits == []

	def test_pipeline_with_gold_files_computes_recall(self):
	from localisation.pipeline import LocalisationPipeline
	pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False, use_ppr=False)
	# Use a larger corpus so BM25 gives positive scores
	# 'queryset' appears in path AND content of target.py → guaranteed top-1
	symbols = [
	make_file_symbols("db/queryset.py", "queryset filter method database orm"),
	make_file_symbols("views/generic.py", "generic view rendering template"),
	make_file_symbols("utils/helper.py", "utility functions general purpose"),
	make_file_symbols("api/serializer.py", "rest framework serializer fields"),
	make_file_symbols("forms/widget.py", "html form widget rendering input"),
	]
	pipeline.index_repo(symbols)
	result = pipeline.localise(
	"fix null pointer exception in queryset filter", top_k=5,
	gold_files=["db/queryset.py"]
	)
	assert result.recall_at_5 is not None
	assert result.recall_at_10 is not None
	assert result.recall_at_5 == 1.0 # queryset in path+content guarantees top rank

	def test_top_k_paths_property(self):
	from localisation.pipeline import LocalisationPipeline
	pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False, use_ppr=False)
	symbols = [make_file_symbols(f"f{i}.py", f"content {i}") for i in range(5)]
	pipeline.index_repo(symbols)
	result = pipeline.localise("content 1", top_k=3)
	assert len(result.top_k_paths) == len(result.hits)

	def test_hit_diagnostic_flags(self):
	from localisation.pipeline import LocalisationPipeline
	pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False, use_ppr=False)
	symbols = [make_file_symbols("a.py", "special word")]
	pipeline.index_repo(symbols)
	result = pipeline.localise("special word", top_k=1)
	if result.hits:
	hit = result.hits[0]
	assert hit.in_bm25 is True