Spaces:
Sleeping
Sleeping
Update parser/assembler.py
Browse files- parser/assembler.py +1 -30
parser/assembler.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from typing import List, Dict
|
| 2 |
from helpers.utils import normalize_digits, extract_article_number, detect_line_type
|
| 3 |
-
|
| 4 |
|
| 5 |
def extract_title_and_preamble(texts: List[str]) -> (str, str, List[str]):
|
| 6 |
"""
|
|
@@ -69,36 +69,7 @@ def extract_articles_from_texts(texts: List[str]) -> List[Dict]:
|
|
| 69 |
return articles
|
| 70 |
|
| 71 |
|
| 72 |
-
def extract_sections(texts: List[str]) -> List[Dict]:
|
| 73 |
-
"""
|
| 74 |
-
تقسيم النصوص إلى أقسام وفصول.
|
| 75 |
-
"""
|
| 76 |
-
sections = []
|
| 77 |
-
current = {"name": "", "texts": []}
|
| 78 |
-
|
| 79 |
-
for t in texts:
|
| 80 |
-
if not isinstance(t, str):
|
| 81 |
-
continue
|
| 82 |
-
|
| 83 |
-
t_norm = normalize_digits(t.strip())
|
| 84 |
-
line_type = detect_line_type(t_norm)
|
| 85 |
-
|
| 86 |
-
if line_type == "section":
|
| 87 |
-
# حفظ آخر قسم
|
| 88 |
-
if current["texts"] or current["name"]:
|
| 89 |
-
sections.append(current)
|
| 90 |
-
|
| 91 |
-
# قسم جديد
|
| 92 |
-
current = {"name": t_norm, "texts": []}
|
| 93 |
-
|
| 94 |
-
else:
|
| 95 |
-
current["texts"].append(t_norm)
|
| 96 |
-
|
| 97 |
-
# إضافة آخر قسم
|
| 98 |
-
if current["texts"] or current["name"]:
|
| 99 |
-
sections.append(current)
|
| 100 |
|
| 101 |
-
return sections
|
| 102 |
|
| 103 |
|
| 104 |
def parse_law_from_texts(text_blocks: List[Dict[str, str]]) -> Dict:
|
|
|
|
| 1 |
from typing import List, Dict
|
| 2 |
from helpers.utils import normalize_digits, extract_article_number, detect_line_type
|
| 3 |
+
from .section_extractor import extract_sections
|
| 4 |
|
| 5 |
def extract_title_and_preamble(texts: List[str]) -> (str, str, List[str]):
|
| 6 |
"""
|
|
|
|
| 69 |
return articles
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
def parse_law_from_texts(text_blocks: List[Dict[str, str]]) -> Dict:
|