# parser/preamble_extractor.py import re from typing import List, Tuple from parser.article_extractor import ARTICLE_PATTERN from parser.section_extractor import SECTION_KEYWORDS, SECTION_RE def extract_preamble(text_blocks: List[str]) -> Tuple[List[str], List[str]]: preamble_lines = [] remaining_lines = [] found_structure = False for block in text_blocks: for line in block.splitlines(): line = line.strip() if not line: continue # إذا وجدنا أول قسم أو مادة if not found_structure and (re.match(rf"^\s*(?:{'|'.join(SECTION_KEYWORDS)})\b", line) or re.match(ARTICLE_PATTERN, line)): found_structure = True if found_structure: remaining_lines.append(line) else: preamble_lines.append(line) return preamble_lines, remaining_lines # يجب أن يكون tuple من قيمتين فقط