File size: 6,615 Bytes
d545f81
 
 
 
3cdce90
d545f81
 
3cdce90
 
 
 
 
 
 
 
d545f81
 
 
 
3cdce90
d545f81
 
 
 
 
 
 
 
 
 
 
3cdce90
d545f81
 
 
 
 
 
 
 
3cdce90
 
d545f81
3cdce90
d545f81
 
3cdce90
d545f81
 
 
3cdce90
d545f81
 
 
 
3cdce90
d545f81
 
 
3cdce90
d545f81
 
 
3cdce90
 
 
 
d545f81
 
 
 
 
 
 
3cdce90
d545f81
 
3cdce90
d545f81
3cdce90
d545f81
 
 
3cdce90
d545f81
 
 
 
 
 
3cdce90
d545f81
 
 
3cdce90
d545f81
 
 
 
3cdce90
d545f81
 
 
 
3cdce90
d545f81
3cdce90
 
 
 
d545f81
3cdce90
 
 
d545f81
 
 
3cdce90
d545f81
3cdce90
d545f81
3cdce90
d545f81
 
3cdce90
 
 
 
 
 
d545f81
 
 
 
 
3cdce90
d545f81
3cdce90
d545f81
 
3cdce90
d545f81
 
 
 
3cdce90
d545f81
 
 
3cdce90
d545f81
 
 
3cdce90
d545f81
 
 
 
3cdce90
d545f81
 
 
3cdce90
d545f81
 
 
3cdce90
 
 
 
 
 
 
 
 
 
d545f81
 
 
 
3cdce90
 
 
 
 
 
 
 
 
 
 
 
d545f81
3cdce90
d545f81
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
"""
Unit tests for knowledge_base module
Tests knowledge base loading and preparation
"""

import json
import os
import tempfile
import unittest
from unittest.mock import patch

import pandas as pd

from src.knowledge_base import (get_knowledge_base_data, load_knowledge_base,
                                prepare_documents)


class TestKnowledgeBase(unittest.TestCase):
    """Test cases for knowledge_base module"""

    def setUp(self):
        """Set up test fixtures"""
        # Create sample knowledge base data
        self.sample_data = [
            {
                "ID": "KB001",
                "Question": "How do I create an account?",
                "Content": "You can create an account by visiting our website.",
                "Section": "Account Management",
                "Source": "Website",
                "Owner": "Support Team",
                "Tag": "account",
            },
            {
                "ID": "KB002",
                "Question": "What are the fees?",
                "Content": "Our transaction fees are 1% per transaction.",
                "Section": "Fees",
                "Source": "Documentation",
                "Owner": "Finance Team",
                "Tag": "fees",
            },
        ]

        # Create temporary JSON file
        self.temp_file = tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".json"
        )
        json.dump(self.sample_data, self.temp_file)
        self.temp_file.close()

    def tearDown(self):
        """Clean up test fixtures"""
        if os.path.exists(self.temp_file.name):
            os.unlink(self.temp_file.name)

    def test_load_knowledge_base(self):
        """Test loading knowledge base from JSON file"""
        df = load_knowledge_base(self.temp_file.name)

        # Check DataFrame structure
        self.assertIsInstance(df, pd.DataFrame)
        self.assertEqual(len(df), 2)
        self.assertIn("ID", df.columns)
        self.assertIn("Question", df.columns)
        self.assertIn("Content", df.columns)

    def test_load_knowledge_base_drops_null_content(self):
        """Test that rows with null Content are dropped"""
        data_with_null = self.sample_data + [
            {
                "ID": "KB003",
                "Question": "Test question?",
                "Content": None,
                "Section": "Test",
            }
        ]

        temp_file_null = tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix=".json"
        )
        json.dump(data_with_null, temp_file_null)
        temp_file_null.close()

        try:
            df = load_knowledge_base(temp_file_null.name)
            # Should only have 2 rows (null Content row dropped)
            self.assertEqual(len(df), 2)
        finally:
            os.unlink(temp_file_null.name)

    def test_prepare_documents(self):
        """Test preparing documents for vector store"""
        documents, metadatas, ids = prepare_documents(self.sample_data)

        # Check lengths match
        self.assertEqual(len(documents), 2)
        self.assertEqual(len(metadatas), 2)
        self.assertEqual(len(ids), 2)

        # Check document format
        self.assertIn("Question:", documents[0])
        self.assertIn("Answer:", documents[0])
        self.assertIn("How do I create an account?", documents[0])

        # Check metadata structure
        self.assertEqual(metadatas[0]["id"], "KB001")
        self.assertEqual(metadatas[0]["question"], "How do I create an account?")
        self.assertEqual(metadatas[0]["section"], "Account Management")

        # Check IDs
        self.assertEqual(ids[0], "KB001")
        self.assertEqual(ids[1], "KB002")

    def test_prepare_documents_with_missing_fields(self):
        """Test preparing documents with missing optional fields"""
        data_minimal = [
            {"ID": "KB001", "Question": "Test question?", "Content": "Test answer."}
        ]

        documents, metadatas, ids = prepare_documents(data_minimal)

        # Should still work with defaults
        self.assertEqual(len(documents), 1)
        self.assertEqual(metadatas[0]["section"], "")
        self.assertEqual(metadatas[0]["source"], "")
        self.assertEqual(metadatas[0]["owner"], "")
        self.assertEqual(metadatas[0]["tag"], "")

    @patch("src.knowledge_base.load_knowledge_base")
    def test_get_knowledge_base_data(self, mock_load):
        """Test get_knowledge_base_data function"""
        # Mock the load_knowledge_base function
        mock_df = pd.DataFrame(self.sample_data)
        mock_load.return_value = mock_df

        documents, metadatas, ids = get_knowledge_base_data()

        # Verify load was called
        mock_load.assert_called_once()

        # Verify output
        self.assertEqual(len(documents), 2)
        self.assertEqual(len(metadatas), 2)
        self.assertEqual(len(ids), 2)

    def test_document_text_format(self):
        """Test that document text is properly formatted"""
        documents, _, _ = prepare_documents(self.sample_data)

        # Check first document format
        expected_format = "Question: How do I create an account?\nAnswer: You can create an account by visiting our website."
        self.assertEqual(documents[0], expected_format)

    def test_empty_knowledge_base(self):
        """Test handling of empty knowledge base"""
        empty_data = []
        documents, metadatas, ids = prepare_documents(empty_data)

        self.assertEqual(len(documents), 0)
        self.assertEqual(len(metadatas), 0)
        self.assertEqual(len(ids), 0)

    def test_metadata_completeness(self):
        """Test that all metadata fields are present"""
        _, metadatas, _ = prepare_documents(self.sample_data)

        required_fields = [
            "question",
            "content",
            "section",
            "source",
            "owner",
            "tag",
            "id",
        ]
        for metadata in metadatas:
            for field in required_fields:
                self.assertIn(field, metadata)

    @patch("src.knowledge_base.load_knowledge_base")
    def test_get_knowledge_base_data_with_exception(self, mock_load):
        """Test get_knowledge_base_data handles exceptions"""
        # Make load_knowledge_base raise an exception
        mock_load.side_effect = Exception("File not found")

        # Should raise the exception
        with self.assertRaises(Exception) as context:
            get_knowledge_base_data()

        self.assertIn("File not found", str(context.exception))


if __name__ == "__main__":
    unittest.main()