Spaces:

Mazenbs
/

extract_html_full

Running

Mazenbs commited on Dec 9, 2025

Commit

3cff8e2

verified ·

1 Parent(s): 08bf392

Update parser/extract_tables_by_article.py

Files changed (1) hide show

parser/extract_tables_by_article.py CHANGED Viewed

@@ -1,9 +1,8 @@
-# extract_tables_by_article.py
 from typing import List, Dict, Any
 from bs4 import BeautifulSoup
 import httpx
 from parser.table_extractor import tables_from_soup, table_to_struct
-from helpers.cleaner import clean_text
 from parser.article_extractor import is_article, extract_article_number  # <-- استخدم الدوال الصحيحة
 async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]:
@@ -36,7 +35,7 @@ async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]
         while prev:
             text = prev.strip()
             if is_article(text):  # <-- استخدم is_article
-                target_article_number = extract_article_number(text)  # <-- استخدم extract_article_number
                 target_article_snippet = text[:100] if len(text) > 100 else text
                 break
             prev = prev.find_previous(string=True)

+# parser/extract_tables_by_article.py
 from typing import List, Dict, Any
 from bs4 import BeautifulSoup
 import httpx
 from parser.table_extractor import tables_from_soup, table_to_struct
 from parser.article_extractor import is_article, extract_article_number  # <-- استخدم الدوال الصحيحة
 async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]:
         while prev:
             text = prev.strip()
             if is_article(text):  # <-- استخدم is_article
+                target_article_number = extract_article_number(text)
                 target_article_snippet = text[:100] if len(text) > 100 else text
                 break
             prev = prev.find_previous(string=True)