Spaces:
Running
Running
Update parser/extract_tables_by_article.py
Browse files
parser/extract_tables_by_article.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
-
# extract_tables_by_article.py
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import httpx
|
| 5 |
from parser.table_extractor import tables_from_soup, table_to_struct
|
| 6 |
-
from helpers.cleaner import clean_text
|
| 7 |
from parser.article_extractor import is_article, extract_article_number # <-- استخدم الدوال الصحيحة
|
| 8 |
|
| 9 |
async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]:
|
|
@@ -36,7 +35,7 @@ async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]
|
|
| 36 |
while prev:
|
| 37 |
text = prev.strip()
|
| 38 |
if is_article(text): # <-- استخدم is_article
|
| 39 |
-
target_article_number = extract_article_number(text)
|
| 40 |
target_article_snippet = text[:100] if len(text) > 100 else text
|
| 41 |
break
|
| 42 |
prev = prev.find_previous(string=True)
|
|
|
|
| 1 |
+
# parser/extract_tables_by_article.py
|
| 2 |
from typing import List, Dict, Any
|
| 3 |
from bs4 import BeautifulSoup
|
| 4 |
import httpx
|
| 5 |
from parser.table_extractor import tables_from_soup, table_to_struct
|
|
|
|
| 6 |
from parser.article_extractor import is_article, extract_article_number # <-- استخدم الدوال الصحيحة
|
| 7 |
|
| 8 |
async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]:
|
|
|
|
| 35 |
while prev:
|
| 36 |
text = prev.strip()
|
| 37 |
if is_article(text): # <-- استخدم is_article
|
| 38 |
+
target_article_number = extract_article_number(text)
|
| 39 |
target_article_snippet = text[:100] if len(text) > 100 else text
|
| 40 |
break
|
| 41 |
prev = prev.find_previous(string=True)
|