Mazenbs commited on
Commit
8bdaf1f
·
verified ·
1 Parent(s): 864f855

Update helpers/indexer.py

Browse files
Files changed (1) hide show
  1. helpers/indexer.py +46 -65
helpers/indexer.py CHANGED
@@ -1,70 +1,51 @@
1
  from typing import List, Dict, Any, Optional
2
 
3
- def detect_extra_type(text: str) -> Optional[str]:
4
- """يتعرف على إن كان النص عبارة عن صورة أو رابط."""
5
- lower = text.lower()
6
-
7
- # رابط
8
- if lower.startswith("http://") or lower.startswith("https://"):
9
- # رابط صورة؟
10
- if lower.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")):
11
- return "image"
12
- return "link"
13
-
14
- # base64 image
15
- if lower.startswith("data:image/"):
16
- return "image"
17
-
18
- # ملف صورة بدون رابط
19
- if lower.endswith((".jpg", ".jpeg", ".png", ".gif", ".webp")):
20
- return "image"
21
-
22
- return None
23
-
24
  def build_indexed_response(
25
- texts: List[Dict[str, str]],
26
- title_index: int,
27
- preamble_end: int,
28
- body_end: Optional[int] = None,
29
- *,
30
- preamble_start: Optional[int] = None,
31
- body_start: Optional[int] = None
32
  ) -> List[Dict[str, str]]:
33
 
34
- if not texts:
35
- return []
36
-
37
- last_idx = len(texts) - 1
38
- title_index = min(title_index, last_idx)
39
- preamble_end = min(preamble_end, last_idx)
40
- body_end = min(body_end if body_end is not None else last_idx, last_idx)
41
-
42
- if preamble_start is None:
43
- preamble_start = title_index + 1
44
- if body_start is None:
45
- body_start = preamble_end + 1
46
-
47
- preamble_start = min(preamble_start, last_idx)
48
- body_start = min(body_start, last_idx)
49
-
50
- result = []
51
-
52
- def append_item(text: str, fallback_type: str):
53
- extra_type = detect_extra_type(text)
54
- result.append({
55
- "text": text.strip(),
56
- "type": extra_type if extra_type else fallback_type
57
- })
58
-
59
- # العنوان
60
- append_item(texts[title_index]["text"], "title")
61
-
62
- # المقدمة
63
- for i in range(preamble_start, preamble_end + 1):
64
- append_item(texts[i]["text"], "preamble")
65
-
66
- # النص الأساسي (body)
67
- for i in range(body_start, body_end + 1):
68
- append_item(texts[i]["text"], "body")
69
-
70
- return result
 
 
 
1
  from typing import List, Dict, Any, Optional
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def build_indexed_response(
4
+ texts: List[Dict[str, str]],
5
+ title_index: int,
6
+ preamble_end: int,
7
+ body_end: Optional[int] = None,
8
+ *,
9
+ preamble_start: Optional[int] = None,
10
+ body_start: Optional[int] = None
11
  ) -> List[Dict[str, str]]:
12
 
13
+ if not texts:
14
+ return []
15
+
16
+ last_idx = len(texts) - 1
17
+ title_index = min(title_index, last_idx)
18
+ preamble_end = min(preamble_end, last_idx)
19
+ body_end = min(body_end if body_end is not None else last_idx, last_idx)
20
+
21
+ if preamble_start is None:
22
+ preamble_start = title_index + 1
23
+ if body_start is None:
24
+ body_start = preamble_end + 1
25
+
26
+ preamble_start = min(preamble_start, last_idx)
27
+ body_start = min(body_start, last_idx)
28
+
29
+ result = []
30
+
31
+ # العنوان
32
+ result.append({
33
+ "text": texts[title_index]["text"].strip(),
34
+ "type": "title"
35
+ })
36
+
37
+ # المقدمة
38
+ for i in range(preamble_start, preamble_end + 1):
39
+ result.append({
40
+ "text": texts[i]["text"].strip(),
41
+ "type": "preamble"
42
+ })
43
+
44
+ # بقية النص (body)
45
+ for i in range(body_start, body_end + 1):
46
+ result.append({
47
+ "text": texts[i]["text"].strip(),
48
+ "type": "body"
49
+ })
50
+
51
+ return result