Spaces:
Running
Running
Update parser/preamble_extractor.py
Browse files
parser/preamble_extractor.py
CHANGED
|
@@ -15,7 +15,8 @@ def extract_preamble(text_blocks: List[str]) -> Tuple[List[str], List[str]]:
|
|
| 15 |
if not line:
|
| 16 |
continue
|
| 17 |
|
| 18 |
-
|
|
|
|
| 19 |
found_structure = True
|
| 20 |
|
| 21 |
if found_structure:
|
|
@@ -23,4 +24,4 @@ def extract_preamble(text_blocks: List[str]) -> Tuple[List[str], List[str]]:
|
|
| 23 |
else:
|
| 24 |
preamble_lines.append(line)
|
| 25 |
|
| 26 |
-
return preamble_lines, remaining_lines
|
|
|
|
| 15 |
if not line:
|
| 16 |
continue
|
| 17 |
|
| 18 |
+
# إذا وجدنا أول قسم أو مادة
|
| 19 |
+
if not found_structure and (re.match(rf"^\s*(?:{'|'.join(SECTION_KEYWORDS)})\b", line) or re.match(ARTICLE_PATTERN, line)):
|
| 20 |
found_structure = True
|
| 21 |
|
| 22 |
if found_structure:
|
|
|
|
| 24 |
else:
|
| 25 |
preamble_lines.append(line)
|
| 26 |
|
| 27 |
+
return preamble_lines, remaining_lines # يجب أن يكون tuple من قيمتين فقط
|