Spaces:
Build error
Build error
File size: 6,615 Bytes
d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 3cdce90 d545f81 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | """
Unit tests for knowledge_base module
Tests knowledge base loading and preparation
"""
import json
import os
import tempfile
import unittest
from unittest.mock import patch
import pandas as pd
from src.knowledge_base import (get_knowledge_base_data, load_knowledge_base,
prepare_documents)
class TestKnowledgeBase(unittest.TestCase):
"""Test cases for knowledge_base module"""
def setUp(self):
"""Set up test fixtures"""
# Create sample knowledge base data
self.sample_data = [
{
"ID": "KB001",
"Question": "How do I create an account?",
"Content": "You can create an account by visiting our website.",
"Section": "Account Management",
"Source": "Website",
"Owner": "Support Team",
"Tag": "account",
},
{
"ID": "KB002",
"Question": "What are the fees?",
"Content": "Our transaction fees are 1% per transaction.",
"Section": "Fees",
"Source": "Documentation",
"Owner": "Finance Team",
"Tag": "fees",
},
]
# Create temporary JSON file
self.temp_file = tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".json"
)
json.dump(self.sample_data, self.temp_file)
self.temp_file.close()
def tearDown(self):
"""Clean up test fixtures"""
if os.path.exists(self.temp_file.name):
os.unlink(self.temp_file.name)
def test_load_knowledge_base(self):
"""Test loading knowledge base from JSON file"""
df = load_knowledge_base(self.temp_file.name)
# Check DataFrame structure
self.assertIsInstance(df, pd.DataFrame)
self.assertEqual(len(df), 2)
self.assertIn("ID", df.columns)
self.assertIn("Question", df.columns)
self.assertIn("Content", df.columns)
def test_load_knowledge_base_drops_null_content(self):
"""Test that rows with null Content are dropped"""
data_with_null = self.sample_data + [
{
"ID": "KB003",
"Question": "Test question?",
"Content": None,
"Section": "Test",
}
]
temp_file_null = tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".json"
)
json.dump(data_with_null, temp_file_null)
temp_file_null.close()
try:
df = load_knowledge_base(temp_file_null.name)
# Should only have 2 rows (null Content row dropped)
self.assertEqual(len(df), 2)
finally:
os.unlink(temp_file_null.name)
def test_prepare_documents(self):
"""Test preparing documents for vector store"""
documents, metadatas, ids = prepare_documents(self.sample_data)
# Check lengths match
self.assertEqual(len(documents), 2)
self.assertEqual(len(metadatas), 2)
self.assertEqual(len(ids), 2)
# Check document format
self.assertIn("Question:", documents[0])
self.assertIn("Answer:", documents[0])
self.assertIn("How do I create an account?", documents[0])
# Check metadata structure
self.assertEqual(metadatas[0]["id"], "KB001")
self.assertEqual(metadatas[0]["question"], "How do I create an account?")
self.assertEqual(metadatas[0]["section"], "Account Management")
# Check IDs
self.assertEqual(ids[0], "KB001")
self.assertEqual(ids[1], "KB002")
def test_prepare_documents_with_missing_fields(self):
"""Test preparing documents with missing optional fields"""
data_minimal = [
{"ID": "KB001", "Question": "Test question?", "Content": "Test answer."}
]
documents, metadatas, ids = prepare_documents(data_minimal)
# Should still work with defaults
self.assertEqual(len(documents), 1)
self.assertEqual(metadatas[0]["section"], "")
self.assertEqual(metadatas[0]["source"], "")
self.assertEqual(metadatas[0]["owner"], "")
self.assertEqual(metadatas[0]["tag"], "")
@patch("src.knowledge_base.load_knowledge_base")
def test_get_knowledge_base_data(self, mock_load):
"""Test get_knowledge_base_data function"""
# Mock the load_knowledge_base function
mock_df = pd.DataFrame(self.sample_data)
mock_load.return_value = mock_df
documents, metadatas, ids = get_knowledge_base_data()
# Verify load was called
mock_load.assert_called_once()
# Verify output
self.assertEqual(len(documents), 2)
self.assertEqual(len(metadatas), 2)
self.assertEqual(len(ids), 2)
def test_document_text_format(self):
"""Test that document text is properly formatted"""
documents, _, _ = prepare_documents(self.sample_data)
# Check first document format
expected_format = "Question: How do I create an account?\nAnswer: You can create an account by visiting our website."
self.assertEqual(documents[0], expected_format)
def test_empty_knowledge_base(self):
"""Test handling of empty knowledge base"""
empty_data = []
documents, metadatas, ids = prepare_documents(empty_data)
self.assertEqual(len(documents), 0)
self.assertEqual(len(metadatas), 0)
self.assertEqual(len(ids), 0)
def test_metadata_completeness(self):
"""Test that all metadata fields are present"""
_, metadatas, _ = prepare_documents(self.sample_data)
required_fields = [
"question",
"content",
"section",
"source",
"owner",
"tag",
"id",
]
for metadata in metadatas:
for field in required_fields:
self.assertIn(field, metadata)
@patch("src.knowledge_base.load_knowledge_base")
def test_get_knowledge_base_data_with_exception(self, mock_load):
"""Test get_knowledge_base_data handles exceptions"""
# Make load_knowledge_base raise an exception
mock_load.side_effect = Exception("File not found")
# Should raise the exception
with self.assertRaises(Exception) as context:
get_knowledge_base_data()
self.assertIn("File not found", str(context.exception))
if __name__ == "__main__":
unittest.main()
|