Spaces:

Mazenbs
/

extract_html_full

Running

Mazenbs commited on Dec 8, 2025

Commit

9e0fa9c

verified ·

1 Parent(s): e3aa294

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from helpers.output_clipper import clip_by_ranges
 from parser.assembler import parse_law_from_texts
 from supabase_utils import save_law_to_supabase
 from helpers.indexer import build_indexed_response
 app = FastAPI(
     title="Text Extractor API",
     description="API لاستخراج النصوص من صفحات الويب مع إمكانية التحكم في النطاقات",
@@ -139,3 +141,21 @@ async def extract_indexed(request: IndexedURLRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")

 from parser.assembler import parse_law_from_texts
 from supabase_utils import save_law_to_supabase
 from helpers.indexer import build_indexed_response
+from helpers.indexer_simple import build_indexed
 app = FastAPI(
     title="Text Extractor API",
     description="API لاستخراج النصوص من صفحات الويب مع إمكانية التحكم في النطاقات",
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")
+# ============================
+# نقطة نهاية GET جديدة
+# ============================
+@app.get("/extract_link")
+async def extract_link_get(
+    url: HttpUrl = Query(..., description="رابط الصفحة المراد استخراج النصوص منها"),
+    timeout: int = Query(10, ge=1, le=60, description="مهلة الطلب بالثواني")
+):
+    try:
+        # 1) استخراج النصوص الخام من الرابط
+        raw_texts = await extract_text_from_url(str(url), timeout)
+        # 2) بناء القائمة المفهرسة
+        return build_indexed(texts=raw_texts)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")