Mazenbs commited on
Commit
d54b4e8
·
verified ·
1 Parent(s): 4675394

Update parser/preamble_extractor.py

Browse files
Files changed (1) hide show
  1. parser/preamble_extractor.py +3 -2
parser/preamble_extractor.py CHANGED
@@ -15,7 +15,8 @@ def extract_preamble(text_blocks: List[str]) -> Tuple[List[str], List[str]]:
15
  if not line:
16
  continue
17
 
18
- if re.match(SECTION_RE, line) or ARTICLE_PATTERN.match(line):
 
19
  found_structure = True
20
 
21
  if found_structure:
@@ -23,4 +24,4 @@ def extract_preamble(text_blocks: List[str]) -> Tuple[List[str], List[str]]:
23
  else:
24
  preamble_lines.append(line)
25
 
26
- return preamble_lines, remaining_lines
 
15
  if not line:
16
  continue
17
 
18
+ # إذا وجدنا أول قسم أو مادة
19
+ if not found_structure and (re.match(rf"^\s*(?:{'|'.join(SECTION_KEYWORDS)})\b", line) or re.match(ARTICLE_PATTERN, line)):
20
  found_structure = True
21
 
22
  if found_structure:
 
24
  else:
25
  preamble_lines.append(line)
26
 
27
+ return preamble_lines, remaining_lines # يجب أن يكون tuple من قيمتين فقط