Spaces:
Sleeping
Sleeping
| import unittest | |
| from toc_processor import merge_same_page_headers, clean_text, parse_raw_toc_output | |
| class TestTOCProcessor(unittest.TestCase): | |
| def test_merge_same_page_headers(self): | |
| # Scenario: "American Government..." (Page 31) followed by "Divided World" (Page 31) | |
| input_toc = [ | |
| [1, "Chapter 1 Intro", 5], | |
| [1, "American Government and Politics in a Racially", 31], | |
| [1, "Divided World", 31], | |
| [1, "Chapter 2", 57] | |
| ] | |
| expected_toc = [ | |
| [1, "Chapter 1 Intro", 5], | |
| [1, "American Government and Politics in a Racially Divided World", 31], | |
| [1, "Chapter 2", 57] | |
| ] | |
| result = merge_same_page_headers(input_toc) | |
| print(f"\nInput: {[e[1] for e in input_toc]}") | |
| print(f"Result: {[e[1] for e in result]}") | |
| self.assertEqual(len(result), 3) | |
| self.assertEqual(result[1][1], "American Government and Politics in a Racially Divided World") | |
| self.assertEqual(result[1][2], 31) | |
| def test_merge_same_page_headers_mixed_levels(self): | |
| # Scenario: Level 1 followed by Level 2 on same page (Should NOT merge) | |
| input_toc = [ | |
| [1, "Chapter 1", 10], | |
| [2, "Section 1.1", 10] | |
| ] | |
| result = merge_same_page_headers(input_toc) | |
| self.assertEqual(len(result), 2) | |
| def test_clean_text(self): | |
| dirty = "Hello\xa0World\xad" | |
| clean = clean_text(dirty) | |
| self.assertEqual(clean, "Hello World") | |
| if __name__ == '__main__': | |
| unittest.main() | |