Spaces:
Running
Running
File size: 8,687 Bytes
0da0699 b616cc1 c44df3b 8da917e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | # backend/tests/test_enumerate_query.py
# Unit tests for the enumeration query classifier (Fix 1) and
# the portfolio-relevance helper (Fix 2 Rule 1).
#
# All tests are pure-Python; no network calls, no Qdrant, no embedder.
import pytest
from unittest.mock import AsyncMock, MagicMock, patch
from app.pipeline.nodes.enumerate_query import (
_has_enumeration_intent,
_extract_source_types,
make_enumerate_query_node,
)
from app.core.portfolio_context import is_portfolio_relevant
# Patch target for LangGraph's stream writer, which requires a runnable context
# that doesn't exist in unit tests.
_WRITER_PATCH = "app.pipeline.nodes.enumerate_query.get_stream_writer"
# ---------------------------------------------------------------------------
# _has_enumeration_intent
# ---------------------------------------------------------------------------
class TestHasEnumerationIntent:
def test_list_all_projects(self):
assert _has_enumeration_intent("list all projects") is True
def test_list_projects_no_all(self):
assert _has_enumeration_intent("list projects") is True
def test_show_all_blogs(self):
assert _has_enumeration_intent("show all blog posts") is True
def test_how_many_blogs(self):
assert _has_enumeration_intent("how many blog posts do you have") is True
def test_count_projects(self):
assert _has_enumeration_intent("count projects") is True
def test_enumerate_skills(self):
assert _has_enumeration_intent("enumerate all skills") is True
def test_give_me_a_list_of(self):
assert _has_enumeration_intent("give me a list of your projects") is True
def test_what_are_all_the_projects(self):
# trailing-regex pattern: "what are all the X"
assert _has_enumeration_intent("what are all the projects") is True
def test_which_are_all_the_blogs(self):
# Requires "all" keyword — the trailing regex gate prevents over-triggering.
assert _has_enumeration_intent("which are all the blog posts") is True
def test_regular_how_query_no_intent(self):
assert _has_enumeration_intent("how does TextOps work") is False
def test_explain_query_no_intent(self):
assert _has_enumeration_intent("explain the architecture of PersonaBot") is False
def test_what_is_query_no_intent(self):
assert _has_enumeration_intent("what is echo-echo") is False
def test_tell_me_about_no_intent(self):
assert _has_enumeration_intent("tell me about your background") is False
def test_empty_string(self):
assert _has_enumeration_intent("") is False
# ---------------------------------------------------------------------------
# _extract_source_types
# ---------------------------------------------------------------------------
class TestExtractSourceTypes:
def test_projects(self):
types = _extract_source_types("list all projects")
assert "project" in types
def test_blogs(self):
types = _extract_source_types("show all blog posts")
assert "blog" in types
def test_skills_cv(self):
types = _extract_source_types("list all your skills")
assert "cv" in types
def test_generic_returns_empty(self):
# "everything" or "all" without a type token → [] meaning scroll all types
types = _extract_source_types("list everything")
assert types == []
def test_github_repos(self):
types = _extract_source_types("show all github repos")
assert "github" in types
def test_work_experience(self):
types = _extract_source_types("list all work experience")
assert "cv" in types
# ---------------------------------------------------------------------------
# make_enumerate_query_node
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_non_enumeration_query_passes_through():
"""A regular query must exit the node with is_enumeration_query=False."""
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[])
node = make_enumerate_query_node(mock_vs)
state = {"query": "how does TextOps work", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
assert result["is_enumeration_query"] is False
# Vector store must NOT be called for normal queries (zero cost guarantee).
mock_vs.scroll_by_source_type.assert_not_called()
@pytest.mark.asyncio
async def test_enumeration_query_sets_flag_and_populates_chunks():
"""An enumeration query must call scroll and set is_enumeration_query=True."""
chunk_a = {
"text": "TextOps is a CLI toolkit.",
"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
}
chunk_b = {
"text": "Echo-Echo is a WebRTC demo.",
"metadata": {"source_title": "Echo-Echo", "source_type": "project", "doc_id": "echo-1"},
}
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])
node = make_enumerate_query_node(mock_vs)
state = {"query": "list all projects", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
assert result["is_enumeration_query"] is True
assert len(result["reranked_chunks"]) == 2
mock_vs.scroll_by_source_type.assert_called_once()
@pytest.mark.asyncio
async def test_enumeration_deduplicates_by_source_title():
"""Duplicate source_title chunks must be collapsed to one representative."""
chunk_a = {
"text": "TextOps chunk 1",
"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
}
chunk_b = {
"text": "TextOps chunk 2",
"metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-2"},
}
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])
node = make_enumerate_query_node(mock_vs)
state = {"query": "list all projects", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
assert result["is_enumeration_query"] is True
assert len(result["reranked_chunks"]) == 1
@pytest.mark.asyncio
async def test_enumeration_empty_scroll_returns_not_found():
"""When Qdrant returns no chunks, is_enumeration_query stays False (no results to list)."""
mock_vs = MagicMock()
mock_vs.scroll_by_source_type = MagicMock(return_value=[])
node = make_enumerate_query_node(mock_vs)
state = {"query": "list all projects", "retrieval_attempts": 0}
with patch(_WRITER_PATCH, return_value=MagicMock()):
result = node(state)
# With no chunks, the node does not commit to enumeration path; falls to RAG.
assert result["is_enumeration_query"] is False
# ---------------------------------------------------------------------------
# is_portfolio_relevant (Fix 2 Rule 1)
# ---------------------------------------------------------------------------
class TestIsPortfolioRelevant:
def test_known_project_name(self):
assert is_portfolio_relevant("how does textops work") is True
def test_known_project_variant(self):
assert is_portfolio_relevant("tell me about echo echo") is True
def test_known_technology(self):
assert is_portfolio_relevant("explain the use of langchain in your stack") is True
def test_known_organisation(self):
assert is_portfolio_relevant("what did you do at vk live") is True
def test_unrelated_query(self):
assert is_portfolio_relevant("what is the weather in london") is False
def test_generic_question(self):
assert is_portfolio_relevant("tell me a joke") is False
def test_empty_string(self):
assert is_portfolio_relevant("") is False
def test_resume_intent_keywords_are_relevant(self):
assert is_portfolio_relevant("tell me about his work experience") is True
def test_stt_typo_work_experience_is_still_relevant(self):
assert is_portfolio_relevant("tell me about his walk experience") is True
def test_tech_stack_intent_is_relevant(self):
assert is_portfolio_relevant("Could you tell me about his tech stack?") is True
def test_professional_setting_work_experience_is_relevant(self):
assert is_portfolio_relevant("What work experience do you have in a professional setting") is True
def test_tech_stack_use_phrase_is_relevant(self):
assert is_portfolio_relevant("What tech stack does he use") is True
|