Spaces:

Mazenbs
/

extract_html_full

Sleeping

Mazenbs commited on Dec 5, 2025

Commit

79556ef

verified ·

1 Parent(s): 5494d71

Update parser/assembler.py

Files changed (1) hide show

parser/assembler.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import List, Dict
 from helpers.utils import normalize_digits, extract_article_number, detect_line_type
 def extract_title_and_preamble(texts: List[str]) -> (str, str, List[str]):
     """
@@ -69,36 +69,7 @@ def extract_articles_from_texts(texts: List[str]) -> List[Dict]:
     return articles
-def extract_sections(texts: List[str]) -> List[Dict]:
-    """
-    تقسيم النصوص إلى أقسام وفصول.
-    """
-    sections = []
-    current = {"name": "", "texts": []}
-    for t in texts:
-        if not isinstance(t, str):
-            continue
-        t_norm = normalize_digits(t.strip())
-        line_type = detect_line_type(t_norm)
-        if line_type == "section":
-            # حفظ آخر قسم
-            if current["texts"] or current["name"]:
-                sections.append(current)
-            # قسم جديد
-            current = {"name": t_norm, "texts": []}
-        else:
-            current["texts"].append(t_norm)
-    # إضافة آخر قسم
-    if current["texts"] or current["name"]:
-        sections.append(current)
-    return sections
 def parse_law_from_texts(text_blocks: List[Dict[str, str]]) -> Dict:

 from typing import List, Dict
 from helpers.utils import normalize_digits, extract_article_number, detect_line_type
+from .section_extractor import extract_sections
 def extract_title_and_preamble(texts: List[str]) -> (str, str, List[str]):
     """
     return articles
 def parse_law_from_texts(text_blocks: List[Dict[str, str]]) -> Dict: