Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -21,10 +21,11 @@ class IndexedURLRequest(BaseModel):
|
|
| 21 |
url: HttpUrl
|
| 22 |
timeout: int = Field(10, ge=1, le=60)
|
| 23 |
title_index: int = Field(..., ge=0)
|
| 24 |
-
preamble_start: Optional[int] = Field(None, ge=0
|
| 25 |
preamble_end: int = Field(..., ge=0)
|
| 26 |
-
body_start: Optional[int] = Field(None, ge=0
|
| 27 |
-
body_end: Optional[int] = Field(None, ge=0
|
|
|
|
| 28 |
|
| 29 |
# -----------------------------
|
| 30 |
# نماذج البيانات
|
|
@@ -111,14 +112,38 @@ async def extract_text_post(request: URLRequest):
|
|
| 111 |
@app.post("/extract/indexed")
|
| 112 |
async def extract_indexed(request: IndexedURLRequest):
|
| 113 |
try:
|
|
|
|
| 114 |
raw_texts = await extract_text_from_url(str(request.url), request.timeout)
|
| 115 |
-
|
|
|
|
|
|
|
| 116 |
texts=raw_texts,
|
| 117 |
title_index=request.title_index,
|
| 118 |
-
preamble_start=request.preamble_start,
|
| 119 |
preamble_end=request.preamble_end,
|
| 120 |
-
body_start=request.body_start,
|
| 121 |
body_end=request.body_end
|
| 122 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
except Exception as e:
|
| 124 |
raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")
|
|
|
|
| 21 |
url: HttpUrl
|
| 22 |
timeout: int = Field(10, ge=1, le=60)
|
| 23 |
title_index: int = Field(..., ge=0)
|
| 24 |
+
preamble_start: Optional[int] = Field(None, ge=0)
|
| 25 |
preamble_end: int = Field(..., ge=0)
|
| 26 |
+
body_start: Optional[int] = Field(None, ge=0)
|
| 27 |
+
body_end: Optional[int] = Field(None, ge=0)
|
| 28 |
+
return_parsed: bool = Field(False, description="إرجاع النتيجة محلّلة (parsed) بدلاً من raw texts")
|
| 29 |
|
| 30 |
# -----------------------------
|
| 31 |
# نماذج البيانات
|
|
|
|
| 112 |
@app.post("/extract/indexed")
|
| 113 |
async def extract_indexed(request: IndexedURLRequest):
|
| 114 |
try:
|
| 115 |
+
# 1) جلب النصوص الخام
|
| 116 |
raw_texts = await extract_text_from_url(str(request.url), request.timeout)
|
| 117 |
+
|
| 118 |
+
# 2) بناء القائمة المفهرسة
|
| 119 |
+
indexed = build_indexed_response(
|
| 120 |
texts=raw_texts,
|
| 121 |
title_index=request.title_index,
|
| 122 |
+
preamble_start=request.preamble_start,
|
| 123 |
preamble_end=request.preamble_end,
|
| 124 |
+
body_start=request.body_start,
|
| 125 |
body_end=request.body_end
|
| 126 |
)
|
| 127 |
+
|
| 128 |
+
# 3) هل يُراد التحليل؟
|
| 129 |
+
if request.return_parsed:
|
| 130 |
+
# تحويل القائمة المفهرسة إلى شكل توأمية (نص واحد للعنوان + نص واحد للمقدمة + بقية النصوص)
|
| 131 |
+
title_lines = [item["title"] for item in indexed if item.get("title")]
|
| 132 |
+
preamble_lines= [item["preamble"]for item in indexed if item.get("preamble")]
|
| 133 |
+
body_lines = [item["text"] for item in indexed if item.get("text")]
|
| 134 |
+
|
| 135 |
+
# دمجها في كتل نصية كما كانت
|
| 136 |
+
merged_blocks = (
|
| 137 |
+
[{"text": "\n".join(title_lines)}] +
|
| 138 |
+
[{"text": "\n".join(preamble_lines)}] +
|
| 139 |
+
[{"text": txt} for txt in body_lines]
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
parsed = parse_law_from_texts(merged_blocks)
|
| 143 |
+
return LegalDocumentResponse(parsed_document={**parsed, "saved_to_db": False})
|
| 144 |
+
|
| 145 |
+
# 4) وإلا نُعيد الـ raw كما قبلناه
|
| 146 |
+
return LegalDocumentResponse(raw_texts=[TextResponse(text=item.get("title") or item.get("preamble") or item.get("text")) for item in indexed])
|
| 147 |
+
|
| 148 |
except Exception as e:
|
| 149 |
raise HTTPException(status_code=500, detail=f"خطأ في معالجة المحتوى: {str(e)}")
|