Spaces:
Sleeping
Sleeping
File size: 1,674 Bytes
046e3b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import unittest
from toc_processor import merge_same_page_headers, clean_text, parse_raw_toc_output
class TestTOCProcessor(unittest.TestCase):
def test_merge_same_page_headers(self):
# Scenario: "American Government..." (Page 31) followed by "Divided World" (Page 31)
input_toc = [
[1, "Chapter 1 Intro", 5],
[1, "American Government and Politics in a Racially", 31],
[1, "Divided World", 31],
[1, "Chapter 2", 57]
]
expected_toc = [
[1, "Chapter 1 Intro", 5],
[1, "American Government and Politics in a Racially Divided World", 31],
[1, "Chapter 2", 57]
]
result = merge_same_page_headers(input_toc)
print(f"\nInput: {[e[1] for e in input_toc]}")
print(f"Result: {[e[1] for e in result]}")
self.assertEqual(len(result), 3)
self.assertEqual(result[1][1], "American Government and Politics in a Racially Divided World")
self.assertEqual(result[1][2], 31)
def test_merge_same_page_headers_mixed_levels(self):
# Scenario: Level 1 followed by Level 2 on same page (Should NOT merge)
input_toc = [
[1, "Chapter 1", 10],
[2, "Section 1.1", 10]
]
result = merge_same_page_headers(input_toc)
self.assertEqual(len(result), 2)
def test_clean_text(self):
dirty = "Hello\xa0World\xad"
clean = clean_text(dirty)
self.assertEqual(clean, "Hello World")
if __name__ == '__main__':
unittest.main()
|