Mazenbs commited on
Commit
8734fe8
·
verified ·
1 Parent(s): 471980d

Update parser/section_extractor.py

Browse files
Files changed (1) hide show
  1. parser/section_extractor.py +9 -5
parser/section_extractor.py CHANGED
@@ -1,16 +1,20 @@
1
  def extract_sections(texts: list):
 
 
 
2
  sections = []
3
  current = {"name": "", "texts": []}
4
 
5
  for t in texts:
6
- if "الباب" in t or "الفصل" in t:
7
- if current["texts"]:
 
8
  sections.append(current)
9
- current = {"name": t, "texts": []}
10
  else:
11
- current["texts"].append(t)
12
 
13
- if current["texts"]:
14
  sections.append(current)
15
 
16
  return sections
 
1
  def extract_sections(texts: list):
2
+ """
3
+ تقسيم النصوص إلى أقسام (أبواب/فصول) مع تجميع النصوص داخل كل قسم
4
+ """
5
  sections = []
6
  current = {"name": "", "texts": []}
7
 
8
  for t in texts:
9
+ # التعرف على البواب/الفصول
10
+ if any(keyword in t for keyword in ["الباب", "الفصل"]):
11
+ if current["texts"] or current["name"]:
12
  sections.append(current)
13
+ current = {"name": t.strip(), "texts": []}
14
  else:
15
+ current["texts"].append(t.strip())
16
 
17
+ if current["texts"] or current["name"]:
18
  sections.append(current)
19
 
20
  return sections