Spaces:
Running
Running
Update helpers/indexer.py
Browse files- helpers/indexer.py +9 -15
helpers/indexer.py
CHANGED
|
@@ -1,12 +1,11 @@
|
|
| 1 |
-
from typing import List, Dict,
|
| 2 |
-
|
| 3 |
|
| 4 |
def detect_extra_type(text: str) -> Optional[str]:
|
| 5 |
"""يتعرف على إن كان النص عبارة عن صورة أو رابط."""
|
| 6 |
lower = text.lower()
|
| 7 |
|
| 8 |
# رابط
|
| 9 |
-
if lower.startswith("http://"
|
| 10 |
# رابط صورة؟
|
| 11 |
if lower.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")):
|
| 12 |
return "image"
|
|
@@ -47,7 +46,7 @@ def build_indexed_response(
|
|
| 47 |
})
|
| 48 |
|
| 49 |
# ================================================================
|
| 50 |
-
# 1) إذا لم يوجد title_index ولا preamble_end → كل شيء نص عادي
|
| 51 |
# ================================================================
|
| 52 |
if title_index is None and preamble_end is None:
|
| 53 |
for t in texts:
|
|
@@ -55,22 +54,20 @@ def build_indexed_response(
|
|
| 55 |
return result
|
| 56 |
|
| 57 |
# ================================================================
|
| 58 |
-
# 2)
|
| 59 |
# ================================================================
|
| 60 |
if title_index is not None:
|
| 61 |
title_index = min(title_index, last_idx)
|
| 62 |
append_item(texts[title_index]["text"], "title")
|
| 63 |
|
| 64 |
# ================================================================
|
| 65 |
-
# 3)
|
| 66 |
-
# - إذا تم تقديم preamble_end فقط → نستنتج start تلقائياً
|
| 67 |
# ================================================================
|
| 68 |
if preamble_end is not None:
|
| 69 |
preamble_end = min(preamble_end, last_idx)
|
| 70 |
|
| 71 |
-
# استنتاج البداية تلقائياً
|
| 72 |
if preamble_start is None:
|
| 73 |
-
# أفضل تخمين منطقي: السطر بعد العنوان
|
| 74 |
preamble_start = (title_index + 1) if title_index is not None else 0
|
| 75 |
|
| 76 |
preamble_start = min(preamble_start, last_idx)
|
|
@@ -79,13 +76,10 @@ def build_indexed_response(
|
|
| 79 |
append_item(texts[i]["text"], "preamble")
|
| 80 |
|
| 81 |
# ================================================================
|
| 82 |
-
# 4) النص الأساسي (body)
|
| 83 |
-
# - إذا لم يتم إرسال body_start → نستنتجه = preamble_end + 1
|
| 84 |
-
# - إذا لم يتم إرسال body_end → يصبح آخر سطر
|
| 85 |
# ================================================================
|
| 86 |
-
if preamble_end is not None:
|
| 87 |
-
|
| 88 |
-
body_start = preamble_end + 1
|
| 89 |
|
| 90 |
if body_start is not None:
|
| 91 |
body_start = min(body_start, last_idx)
|
|
|
|
| 1 |
+
from typing import List, Dict, Optional
|
|
|
|
| 2 |
|
| 3 |
def detect_extra_type(text: str) -> Optional[str]:
|
| 4 |
"""يتعرف على إن كان النص عبارة عن صورة أو رابط."""
|
| 5 |
lower = text.lower()
|
| 6 |
|
| 7 |
# رابط
|
| 8 |
+
if lower.startswith(("http://", "https://")):
|
| 9 |
# رابط صورة؟
|
| 10 |
if lower.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")):
|
| 11 |
return "image"
|
|
|
|
| 46 |
})
|
| 47 |
|
| 48 |
# ================================================================
|
| 49 |
+
# (1) إذا لم يوجد title_index ولا preamble_end → كل شيء نص عادي
|
| 50 |
# ================================================================
|
| 51 |
if title_index is None and preamble_end is None:
|
| 52 |
for t in texts:
|
|
|
|
| 54 |
return result
|
| 55 |
|
| 56 |
# ================================================================
|
| 57 |
+
# (2) العنوان
|
| 58 |
# ================================================================
|
| 59 |
if title_index is not None:
|
| 60 |
title_index = min(title_index, last_idx)
|
| 61 |
append_item(texts[title_index]["text"], "title")
|
| 62 |
|
| 63 |
# ================================================================
|
| 64 |
+
# (3) المقدمة
|
|
|
|
| 65 |
# ================================================================
|
| 66 |
if preamble_end is not None:
|
| 67 |
preamble_end = min(preamble_end, last_idx)
|
| 68 |
|
| 69 |
+
# استنتاج البداية تلقائياً إذا لم يحددها المستخدم
|
| 70 |
if preamble_start is None:
|
|
|
|
| 71 |
preamble_start = (title_index + 1) if title_index is not None else 0
|
| 72 |
|
| 73 |
preamble_start = min(preamble_start, last_idx)
|
|
|
|
| 76 |
append_item(texts[i]["text"], "preamble")
|
| 77 |
|
| 78 |
# ================================================================
|
| 79 |
+
# (4) النص الأساسي (body)
|
|
|
|
|
|
|
| 80 |
# ================================================================
|
| 81 |
+
if preamble_end is not None and body_start is None:
|
| 82 |
+
body_start = preamble_end + 1
|
|
|
|
| 83 |
|
| 84 |
if body_start is not None:
|
| 85 |
body_start = min(body_start, last_idx)
|