Spaces:

1337XCode
/

personabot-api

Running

File size: 8,687 Bytes

# backend/tests/test_enumerate_query.py
# Unit tests for the enumeration query classifier (Fix 1) and
# the portfolio-relevance helper (Fix 2 Rule 1).
#
# All tests are pure-Python; no network calls, no Qdrant, no embedder.

import pytest
from unittest.mock import AsyncMock, MagicMock, patch

from app.pipeline.nodes.enumerate_query import (
    _has_enumeration_intent,
    _extract_source_types,
    make_enumerate_query_node,
)
from app.core.portfolio_context import is_portfolio_relevant

# Patch target for LangGraph's stream writer, which requires a runnable context
# that doesn't exist in unit tests.
_WRITER_PATCH = "app.pipeline.nodes.enumerate_query.get_stream_writer"


# ---------------------------------------------------------------------------
# _has_enumeration_intent
# ---------------------------------------------------------------------------


class TestHasEnumerationIntent:
    def test_list_all_projects(self):
        assert _has_enumeration_intent("list all projects") is True

    def test_list_projects_no_all(self):
        assert _has_enumeration_intent("list projects") is True

    def test_show_all_blogs(self):
        assert _has_enumeration_intent("show all blog posts") is True

    def test_how_many_blogs(self):
        assert _has_enumeration_intent("how many blog posts do you have") is True

    def test_count_projects(self):
        assert _has_enumeration_intent("count projects") is True

    def test_enumerate_skills(self):
        assert _has_enumeration_intent("enumerate all skills") is True

    def test_give_me_a_list_of(self):
        assert _has_enumeration_intent("give me a list of your projects") is True

    def test_what_are_all_the_projects(self):
        # trailing-regex pattern: "what are all the X"
        assert _has_enumeration_intent("what are all the projects") is True

    def test_which_are_all_the_blogs(self):
        # Requires "all" keyword — the trailing regex gate prevents over-triggering.
        assert _has_enumeration_intent("which are all the blog posts") is True

    def test_regular_how_query_no_intent(self):
        assert _has_enumeration_intent("how does TextOps work") is False

    def test_explain_query_no_intent(self):
        assert _has_enumeration_intent("explain the architecture of PersonaBot") is False

    def test_what_is_query_no_intent(self):
        assert _has_enumeration_intent("what is echo-echo") is False

    def test_tell_me_about_no_intent(self):
        assert _has_enumeration_intent("tell me about your background") is False

    def test_empty_string(self):
        assert _has_enumeration_intent("") is False


# ---------------------------------------------------------------------------
# _extract_source_types
# ---------------------------------------------------------------------------


class TestExtractSourceTypes:
    def test_projects(self):
        types = _extract_source_types("list all projects")
        assert "project" in types

    def test_blogs(self):
        types = _extract_source_types("show all blog posts")
        assert "blog" in types

    def test_skills_cv(self):
        types = _extract_source_types("list all your skills")
        assert "cv" in types

    def test_generic_returns_empty(self):
        # "everything" or "all" without a type token → [] meaning scroll all types
        types = _extract_source_types("list everything")
        assert types == []

    def test_github_repos(self):
        types = _extract_source_types("show all github repos")
        assert "github" in types

    def test_work_experience(self):
        types = _extract_source_types("list all work experience")
        assert "cv" in types


# ---------------------------------------------------------------------------
# make_enumerate_query_node
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_non_enumeration_query_passes_through():
    """A regular query must exit the node with is_enumeration_query=False."""
    mock_vs = MagicMock()
    mock_vs.scroll_by_source_type = MagicMock(return_value=[])

    node = make_enumerate_query_node(mock_vs)
    state = {"query": "how does TextOps work", "retrieval_attempts": 0}
    with patch(_WRITER_PATCH, return_value=MagicMock()):
        result = node(state)

    assert result["is_enumeration_query"] is False
    # Vector store must NOT be called for normal queries (zero cost guarantee).
    mock_vs.scroll_by_source_type.assert_not_called()


@pytest.mark.asyncio
async def test_enumeration_query_sets_flag_and_populates_chunks():
    """An enumeration query must call scroll and set is_enumeration_query=True."""
    chunk_a = {
        "text": "TextOps is a CLI toolkit.",
        "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
    }
    chunk_b = {
        "text": "Echo-Echo is a WebRTC demo.",
        "metadata": {"source_title": "Echo-Echo", "source_type": "project", "doc_id": "echo-1"},
    }
    mock_vs = MagicMock()
    mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])

    node = make_enumerate_query_node(mock_vs)
    state = {"query": "list all projects", "retrieval_attempts": 0}
    with patch(_WRITER_PATCH, return_value=MagicMock()):
        result = node(state)

    assert result["is_enumeration_query"] is True
    assert len(result["reranked_chunks"]) == 2
    mock_vs.scroll_by_source_type.assert_called_once()


@pytest.mark.asyncio
async def test_enumeration_deduplicates_by_source_title():
    """Duplicate source_title chunks must be collapsed to one representative."""
    chunk_a = {
        "text": "TextOps chunk 1",
        "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-1"},
    }
    chunk_b = {
        "text": "TextOps chunk 2",
        "metadata": {"source_title": "TextOps", "source_type": "project", "doc_id": "textops-2"},
    }
    mock_vs = MagicMock()
    mock_vs.scroll_by_source_type = MagicMock(return_value=[chunk_a, chunk_b])

    node = make_enumerate_query_node(mock_vs)
    state = {"query": "list all projects", "retrieval_attempts": 0}
    with patch(_WRITER_PATCH, return_value=MagicMock()):
        result = node(state)

    assert result["is_enumeration_query"] is True
    assert len(result["reranked_chunks"]) == 1


@pytest.mark.asyncio
async def test_enumeration_empty_scroll_returns_not_found():
    """When Qdrant returns no chunks, is_enumeration_query stays False (no results to list)."""
    mock_vs = MagicMock()
    mock_vs.scroll_by_source_type = MagicMock(return_value=[])

    node = make_enumerate_query_node(mock_vs)
    state = {"query": "list all projects", "retrieval_attempts": 0}
    with patch(_WRITER_PATCH, return_value=MagicMock()):
        result = node(state)

    # With no chunks, the node does not commit to enumeration path; falls to RAG.
    assert result["is_enumeration_query"] is False


# ---------------------------------------------------------------------------
# is_portfolio_relevant (Fix 2 Rule 1)
# ---------------------------------------------------------------------------


class TestIsPortfolioRelevant:
    def test_known_project_name(self):
        assert is_portfolio_relevant("how does textops work") is True

    def test_known_project_variant(self):
        assert is_portfolio_relevant("tell me about echo echo") is True

    def test_known_technology(self):
        assert is_portfolio_relevant("explain the use of langchain in your stack") is True

    def test_known_organisation(self):
        assert is_portfolio_relevant("what did you do at vk live") is True

    def test_unrelated_query(self):
        assert is_portfolio_relevant("what is the weather in london") is False

    def test_generic_question(self):
        assert is_portfolio_relevant("tell me a joke") is False

    def test_empty_string(self):
        assert is_portfolio_relevant("") is False

    def test_resume_intent_keywords_are_relevant(self):
        assert is_portfolio_relevant("tell me about his work experience") is True

    def test_stt_typo_work_experience_is_still_relevant(self):
        assert is_portfolio_relevant("tell me about his walk experience") is True

    def test_tech_stack_intent_is_relevant(self):
        assert is_portfolio_relevant("Could you tell me about his tech stack?") is True

    def test_professional_setting_work_experience_is_relevant(self):
        assert is_portfolio_relevant("What work experience do you have in a professional setting") is True

    def test_tech_stack_use_phrase_is_relevant(self):
        assert is_portfolio_relevant("What tech stack does he use") is True