Spaces:

Mazenbs
/

extract_html_full

Running

Mazenbs commited on Dec 9, 2025

Commit

884a60b

verified ·

1 Parent(s): 56d507d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from parser.assembler import parse_law_from_texts
 from supabase_utils import save_law_to_supabase
 from helpers.indexer import build_indexed_response
 from helpers.blocks_all import extract_from_url
-from parser.table_extractor import tables_from_soup, table_to_struct
 app = FastAPI(
@@ -78,26 +78,20 @@ async def extract_link_get(
         raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")
 @app.get("/extract_tables")
 async def extract_tables_get(
     url: HttpUrl = Query(..., description="رابط الصفحة المراد استخراج الجداول منها"),
     timeout: int = Query(10, ge=1, le=60, description="مهلة الطلب بالثواني")
 ):
     """
-    استخراج جميع الجداول من صفحة الويب وإرجاعها كهيكل JSON مرتب.
     """
     try:
-        # 1) جلب محتوى الصفحة
-        html_content = await extract_from_url(str(url), timeout)
-        # 2) تحويل HTML إلى BeautifulSoup
-        soup = BeautifulSoup(html_content, "html.parser")
-        # 3) استخراج كل الجداول
-        tables = tables_from_soup(soup)
-        structured_tables = [table_to_struct(table) for table in tables]
-        return {"count": len(structured_tables), "tables": structured_tables}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"خطأ في معالجة الجداول: {str(e)}")

 from supabase_utils import save_law_to_supabase
 from helpers.indexer import build_indexed_response
 from helpers.blocks_all import extract_from_url
+from parser.extract_tables_by_article import extract_tables_from_url
 app = FastAPI(
         raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")
 @app.get("/extract_tables")
 async def extract_tables_get(
     url: HttpUrl = Query(..., description="رابط الصفحة المراد استخراج الجداول منها"),
     timeout: int = Query(10, ge=1, le=60, description="مهلة الطلب بالثواني")
 ):
     """
+    استخراج جميع الجداول المرتبطة بالمواد من صفحة الويب وإرجاعها كهيكل JSON مرتب.
     """
     try:
+        # استخدام الوظيفة المستقلة الجديدة
+        result = await extract_tables_from_url(str(url), timeout)
+        return result
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"خطأ في معالجة الجداول: {str(e)}")