Spaces:
Sleeping
Sleeping
Update parser/assembler.py
Browse files- parser/assembler.py +0 -41
parser/assembler.py
CHANGED
|
@@ -140,44 +140,3 @@ def parse_law_from_texts(text_blocks: List[Dict[str, str]], url: str = None) ->
|
|
| 140 |
}
|
| 141 |
}
|
| 142 |
|
| 143 |
-
def parse_law_from_textsx(text_blocks: List[Dict[str, str]]) -> Dict:
|
| 144 |
-
title, preamble, remaining_blocks = extract_title_and_preamble(text_blocks)
|
| 145 |
-
sections_raw = extract_sections(remaining_blocks)
|
| 146 |
-
|
| 147 |
-
# 🔥 استخراج رقم القانون والسنة من المقدمة
|
| 148 |
-
law_info = extract_law_number_and_year(preamble) # <-- تمت إضافتها هنا
|
| 149 |
-
|
| 150 |
-
sections = []
|
| 151 |
-
for sec in sections_raw:
|
| 152 |
-
raw_blocks = sec["texts"]
|
| 153 |
-
|
| 154 |
-
# دمج نصوص القسم مع تطبيق merge_colon_lines
|
| 155 |
-
content = "\n".join([b["text"] for b in raw_blocks if not is_article(b["text"])]).strip()
|
| 156 |
-
content = merge_colon_lines(content)
|
| 157 |
-
|
| 158 |
-
articles = extract_articles_from_blocks(raw_blocks)
|
| 159 |
-
|
| 160 |
-
sections.append({
|
| 161 |
-
"title": sec["name"],
|
| 162 |
-
"content": content,
|
| 163 |
-
"articles": [
|
| 164 |
-
{"tag": a["text"]} if a["number"] is None else {"number": a["number"], "text": a["text"]}
|
| 165 |
-
for a in articles
|
| 166 |
-
]
|
| 167 |
-
})
|
| 168 |
-
|
| 169 |
-
return {
|
| 170 |
-
"message": "success",
|
| 171 |
-
"blocks": {
|
| 172 |
-
"count": len(text_blocks),
|
| 173 |
-
},
|
| 174 |
-
"law": {
|
| 175 |
-
"title": title,
|
| 176 |
-
"preamble": preamble,
|
| 177 |
-
"number": law_info.get("law_number"),
|
| 178 |
-
"year": law_info.get("year"),
|
| 179 |
-
"sections": sections
|
| 180 |
-
}
|
| 181 |
-
}
|
| 182 |
-
|
| 183 |
-
|
|
|
|
| 140 |
}
|
| 141 |
}
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|