Mazenbs commited on
Commit
5a85bba
·
verified ·
1 Parent(s): 3eb4b8d

Update helpers/indexer.py

Browse files
Files changed (1) hide show
  1. helpers/indexer.py +25 -48
helpers/indexer.py CHANGED
@@ -1,11 +1,12 @@
1
- from typing import List, Dict, Optional
 
2
 
3
  def detect_extra_type(text: str) -> Optional[str]:
4
  """يتعرف على إن كان النص عبارة عن صورة أو رابط."""
5
  lower = text.lower()
6
 
7
  # رابط
8
- if lower.startswith(("http://", "https://")):
9
  # رابط صورة؟
10
  if lower.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")):
11
  return "image"
@@ -24,8 +25,8 @@ def detect_extra_type(text: str) -> Optional[str]:
24
 
25
  def build_indexed_response(
26
  texts: List[Dict[str, str]],
27
- title_index: Optional[int] = None,
28
- preamble_end: Optional[int] = None,
29
  body_end: Optional[int] = None,
30
  *,
31
  preamble_start: Optional[int] = None,
@@ -36,6 +37,18 @@ def build_indexed_response(
36
  return []
37
 
38
  last_idx = len(texts) - 1
 
 
 
 
 
 
 
 
 
 
 
 
39
  result = []
40
 
41
  def append_item(text: str, fallback_type: str):
@@ -45,51 +58,15 @@ def build_indexed_response(
45
  "type": extra_type if extra_type else fallback_type
46
  })
47
 
48
- # ================================================================
49
- # (1) إذا لم يوجد title_index ولا preamble_end → كل شيء نص عادي
50
- # ================================================================
51
- if title_index is None and preamble_end is None:
52
- for t in texts:
53
- append_item(t["text"], "text")
54
- return result
55
-
56
- # ================================================================
57
- # (2) العنوان
58
- # ================================================================
59
- if title_index is not None:
60
- title_index = min(title_index, last_idx)
61
- append_item(texts[title_index]["text"], "title")
62
-
63
- # ================================================================
64
- # (3) المقدمة
65
- # ================================================================
66
- if preamble_end is not None:
67
- preamble_end = min(preamble_end, last_idx)
68
-
69
- # استنتاج البداية تلقائياً إذا لم يحددها المستخدم
70
- if preamble_start is None:
71
- preamble_start = (title_index + 1) if title_index is not None else 0
72
-
73
- preamble_start = min(preamble_start, last_idx)
74
-
75
- for i in range(preamble_start, preamble_end + 1):
76
- append_item(texts[i]["text"], "preamble")
77
-
78
- # ================================================================
79
- # (4) النص الأساسي (body)
80
- # ================================================================
81
- if preamble_end is not None and body_start is None:
82
- body_start = preamble_end + 1
83
-
84
- if body_start is not None:
85
- body_start = min(body_start, last_idx)
86
-
87
- if body_end is None:
88
- body_end = last_idx
89
 
90
- body_end = min(body_end, last_idx)
 
 
91
 
92
- for i in range(body_start, body_end + 1):
93
- append_item(texts[i]["text"], "body")
 
94
 
95
  return result
 
1
+ from typing import List, Dict, Any, Optional
2
+
3
 
4
  def detect_extra_type(text: str) -> Optional[str]:
5
  """يتعرف على إن كان النص عبارة عن صورة أو رابط."""
6
  lower = text.lower()
7
 
8
  # رابط
9
+ if lower.startswith("http://") or lower.startswith("https://"):
10
  # رابط صورة؟
11
  if lower.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")):
12
  return "image"
 
25
 
26
  def build_indexed_response(
27
  texts: List[Dict[str, str]],
28
+ title_index: int,
29
+ preamble_end: int,
30
  body_end: Optional[int] = None,
31
  *,
32
  preamble_start: Optional[int] = None,
 
37
  return []
38
 
39
  last_idx = len(texts) - 1
40
+ title_index = min(title_index, last_idx)
41
+ preamble_end = min(preamble_end, last_idx)
42
+ body_end = min(body_end if body_end is not None else last_idx, last_idx)
43
+
44
+ if preamble_start is None:
45
+ preamble_start = title_index + 1
46
+ if body_start is None:
47
+ body_start = preamble_end + 1
48
+
49
+ preamble_start = min(preamble_start, last_idx)
50
+ body_start = min(body_start, last_idx)
51
+
52
  result = []
53
 
54
  def append_item(text: str, fallback_type: str):
 
58
  "type": extra_type if extra_type else fallback_type
59
  })
60
 
61
+ # العنوان
62
+ append_item(texts[title_index]["text"], "title")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ # المقدمة
65
+ for i in range(preamble_start, preamble_end + 1):
66
+ append_item(texts[i]["text"], "preamble")
67
 
68
+ # النص الأساسي (body)
69
+ for i in range(body_start, body_end + 1):
70
+ append_item(texts[i]["text"], "body")
71
 
72
  return result