Mazenbs commited on
Commit
79556ef
·
verified ·
1 Parent(s): 5494d71

Update parser/assembler.py

Browse files
Files changed (1) hide show
  1. parser/assembler.py +1 -30
parser/assembler.py CHANGED
@@ -1,6 +1,6 @@
1
  from typing import List, Dict
2
  from helpers.utils import normalize_digits, extract_article_number, detect_line_type
3
-
4
 
5
  def extract_title_and_preamble(texts: List[str]) -> (str, str, List[str]):
6
  """
@@ -69,36 +69,7 @@ def extract_articles_from_texts(texts: List[str]) -> List[Dict]:
69
  return articles
70
 
71
 
72
- def extract_sections(texts: List[str]) -> List[Dict]:
73
- """
74
- تقسيم النصوص إلى أقسام وفصول.
75
- """
76
- sections = []
77
- current = {"name": "", "texts": []}
78
-
79
- for t in texts:
80
- if not isinstance(t, str):
81
- continue
82
-
83
- t_norm = normalize_digits(t.strip())
84
- line_type = detect_line_type(t_norm)
85
-
86
- if line_type == "section":
87
- # حفظ آخر قسم
88
- if current["texts"] or current["name"]:
89
- sections.append(current)
90
-
91
- # قسم جديد
92
- current = {"name": t_norm, "texts": []}
93
-
94
- else:
95
- current["texts"].append(t_norm)
96
-
97
- # إضافة آخر قسم
98
- if current["texts"] or current["name"]:
99
- sections.append(current)
100
 
101
- return sections
102
 
103
 
104
  def parse_law_from_texts(text_blocks: List[Dict[str, str]]) -> Dict:
 
1
  from typing import List, Dict
2
  from helpers.utils import normalize_digits, extract_article_number, detect_line_type
3
+ from .section_extractor import extract_sections
4
 
5
  def extract_title_and_preamble(texts: List[str]) -> (str, str, List[str]):
6
  """
 
69
  return articles
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
 
73
 
74
 
75
  def parse_law_from_texts(text_blocks: List[Dict[str, str]]) -> Dict: