File size: 2,945 Bytes
e502f0d
 
 
 
 
 
 
 
 
 
 
05ffeb4
e502f0d
fa696e8
89f1173
 
fa696e8
e502f0d
05ffeb4
e502f0d
fa696e8
 
e502f0d
 
fa696e8
 
 
 
e502f0d
fa696e8
 
 
 
e502f0d
05ffeb4
e502f0d
fa696e8
 
 
e502f0d
05ffeb4
e502f0d
fa696e8
e502f0d
 
 
 
 
 
 
 
fa696e8
e502f0d
 
05ffeb4
e502f0d
fa696e8
e502f0d
 
05ffeb4
e502f0d
 
 
 
05ffeb4
e502f0d
fa696e8
e502f0d
fa696e8
 
e502f0d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""Unit tests for query preprocessing utilities."""

import pytest

from src.tools.query_utils import expand_synonyms, preprocess_query, strip_question_words


@pytest.mark.unit
class TestQueryPreprocessing:
    """Tests for query preprocessing."""

    def test_strip_question_words(self) -> None:
        """Test removal of question words."""
        assert strip_question_words("What drugs treat HSDD") == "drugs treat hsdd"
        assert strip_question_words("Which medications help low libido") == "medications low libido"
        assert strip_question_words("How can we treat ED") == "we treat ed"
        assert strip_question_words("Is sildenafil effective") == "sildenafil"

    def test_strip_preserves_medical_terms(self) -> None:
        """Test that medical terms are preserved."""
        result = strip_question_words("What is the mechanism of sildenafil")
        assert "sildenafil" in result
        assert "mechanism" in result

    def test_expand_synonyms_low_libido(self) -> None:
        """Test Low Libido synonym expansion."""
        result = expand_synonyms("low libido treatment")
        assert "HSDD" in result or "hypoactive sexual desire" in result

    def test_expand_synonyms_ed(self) -> None:
        """Test ED synonym expansion."""
        result = expand_synonyms("erectile dysfunction drug")
        assert "impotence" in result

    def test_expand_synonyms_preserves_unknown(self) -> None:
        """Test that unknown terms are preserved."""
        result = expand_synonyms("sildenafil unknowncondition")
        assert "sildenafil" in result
        assert "unknowncondition" in result

    def test_preprocess_query_full_pipeline(self) -> None:
        """Test complete preprocessing pipeline."""
        raw = "What medications show promise for Low Libido?"
        result = preprocess_query(raw)

        # Should not contain question words
        assert "what" not in result.lower()
        assert "show" not in result.lower()
        assert "promise" not in result.lower()

        # Should contain expanded terms
        assert "HSDD" in result or "hypoactive" in result or "low libido" in result.lower()
        assert "medications" in result.lower() or "drug" in result.lower()

    def test_preprocess_query_removes_punctuation(self) -> None:
        """Test that question marks are removed."""
        result = preprocess_query("Is sildenafil safe?")
        assert "?" not in result

    def test_preprocess_query_handles_empty(self) -> None:
        """Test handling of empty/whitespace queries."""
        assert preprocess_query("") == ""
        assert preprocess_query("   ") == ""

    def test_preprocess_query_already_clean(self) -> None:
        """Test that clean queries pass through."""
        clean = "sildenafil ed mechanism"
        result = preprocess_query(clean)
        assert "sildenafil" in result
        assert "ed" in result
        assert "mechanism" in result