Spaces:

Mazenbs
/

extract_html_full

Sleeping

Mazenbs commited on Dec 9, 2025

Commit

6e6e56f

verified ·

1 Parent(s): 1ad4690

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ from parser.assembler import parse_law_from_texts
 from supabase_utils import save_law_to_supabase
 from helpers.indexer import build_indexed_response
 from helpers.blocks_all import extract_from_url
 app = FastAPI(
@@ -74,4 +75,29 @@ async def extract_link_get(
         return raw_texts
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")

 from supabase_utils import save_law_to_supabase
 from helpers.indexer import build_indexed_response
 from helpers.blocks_all import extract_from_url
+from parser.table_extractor import tables_from_soup, table_to_struct
 app = FastAPI(
         return raw_texts
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")
+@app.get("/extract_tables")
+async def extract_tables_get(
+    url: HttpUrl = Query(..., description="رابط الصفحة المراد استخراج الجداول منها"),
+    timeout: int = Query(10, ge=1, le=60, description="مهلة الطلب بالثواني")
+):
+    """
+    استخراج جميع الجداول من صفحة الويب وإرجاعها كهيكل JSON مرتب.
+    """
+    try:
+        # 1) جلب محتوى الصفحة
+        html_content = await extract_from_url(str(url), timeout)
+        # 2) تحويل HTML إلى BeautifulSoup
+        soup = BeautifulSoup(html_content, "html.parser")
+        # 3) استخراج كل الجداول
+        tables = tables_from_soup(soup)
+        structured_tables = [table_to_struct(table) for table in tables]
+        return {"count": len(structured_tables), "tables": structured_tables}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"خطأ في معالجة الجداول: {str(e)}")