# parser/article_extractor.py
import re
from helpers.utils import is_article, extract_article_number, normalize_digits

def extract_articles(texts: list):
    """
    استخراج المواد باستخدام الأنماط المعيارية من utils.py
    """
    articles = []
    current = None

    for raw in texts:
        t = raw.strip()

        # تحويل الأرقام الهندية + تنظيف
        tn = normalize_digits(t)

        # هل هي بداية مادة؟
        if is_article(tn):
            number = extract_article_number(tn)

            # احفظ السابقة
            if current:
                current["text"] = current["text"].strip()
                articles.append(current)

            # ابدأ مادة جديدة
            current = {
                "number": number,
                "text": t  # نحتفظ بالنص الأصلي وليس المنظّم
            }
        else:
            # تابع تجميع النص داخل المادة الحالية
            if current:
                current["text"] += "\n" + t

    # أضف آخر مادة
    if current:
        current["text"] = current["text"].strip()
        articles.append(current)

    return articles