Mazenbs commited on
Commit
3cff8e2
·
verified ·
1 Parent(s): 08bf392

Update parser/extract_tables_by_article.py

Browse files
parser/extract_tables_by_article.py CHANGED
@@ -1,9 +1,8 @@
1
- # extract_tables_by_article.py
2
  from typing import List, Dict, Any
3
  from bs4 import BeautifulSoup
4
  import httpx
5
  from parser.table_extractor import tables_from_soup, table_to_struct
6
- from helpers.cleaner import clean_text
7
  from parser.article_extractor import is_article, extract_article_number # <-- استخدم الدوال الصحيحة
8
 
9
  async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]:
@@ -36,7 +35,7 @@ async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]
36
  while prev:
37
  text = prev.strip()
38
  if is_article(text): # <-- استخدم is_article
39
- target_article_number = extract_article_number(text) # <-- استخدم extract_article_number
40
  target_article_snippet = text[:100] if len(text) > 100 else text
41
  break
42
  prev = prev.find_previous(string=True)
 
1
+ # parser/extract_tables_by_article.py
2
  from typing import List, Dict, Any
3
  from bs4 import BeautifulSoup
4
  import httpx
5
  from parser.table_extractor import tables_from_soup, table_to_struct
 
6
  from parser.article_extractor import is_article, extract_article_number # <-- استخدم الدوال الصحيحة
7
 
8
  async def extract_tables_from_url(url: str, timeout: int = 10) -> Dict[str, Any]:
 
35
  while prev:
36
  text = prev.strip()
37
  if is_article(text): # <-- استخدم is_article
38
+ target_article_number = extract_article_number(text)
39
  target_article_snippet = text[:100] if len(text) > 100 else text
40
  break
41
  prev = prev.find_previous(string=True)