CIAZIZ commited on
Commit
4eea87d
·
verified ·
1 Parent(s): 5b9f444

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -29
app.py CHANGED
@@ -12,22 +12,20 @@ retriever = Retriever(embed_model_name=os.getenv("EMBED_MODEL", "intfloat/multil
12
  TOP_K = int(os.getenv("TOP_K", "4")) # internal default (no UI control)
13
 
14
  # -------- Helpers --------
15
- NEAR = 32 # chars window for proximity checks
16
-
17
  def _status_text():
18
  st = read_status()
19
  phase = st.get("phase","unknown")
20
  if phase == "ready":
21
- return "الفهرس جاهز (FAISS)"
22
  if phase == "embedding":
23
- return f"بناء الفهرس في الخلفية… {st.get('done',0)}/{st.get('total',0)}"
24
  if phase == "chunks_ready":
25
- return f"🗂️ تم تجهيز المقاطع ({st.get('total','?')}). جارٍ بدء التضمين…"
26
  if phase == "waiting_data":
27
- return f"⚠️ {st.get('msg','البيانات غير موجودة')}"
28
  if phase == "error":
29
- return f"{st.get('msg','خطأ في الفهرس')}"
30
- return "ℹ️ حالة غير معروفة"
31
 
32
  def _clean(s):
33
  return (s or "").strip()
@@ -36,42 +34,100 @@ def _sentences(text):
36
  parts = re.split(r"(?<=[\.\!\؟\!])\s+", text or "")
37
  return [p.strip() for p in parts if p.strip()]
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def _pick_snippets(hits, limit=3):
40
- # Prefer explicit evidence; fallback to rule-based sentences.
41
  snippets = []
 
42
  for h in hits:
43
  ev = extract_evidence(h["chunk"])
44
- for lst in (ev["quran"], ev["hadith"], ev["ijma"]):
45
- for q in lst:
46
- snippets.append((q, h))
47
- if len(snippets) >= limit: return snippets
48
- pat = re.compile(r"(?:حرام|محر(?:م)|لا يجوز|لا تأكل|اترك)\b", re.I)
 
 
 
 
 
 
 
 
 
 
49
  for h in hits:
50
  for sent in _sentences(h["chunk"]):
51
- if "خنزير" in sent and pat.search(sent):
52
  snippets.append((sent, h))
53
  if len(snippets) >= limit: return snippets
 
 
54
  for h in hits:
55
  for sent in _sentences(h["chunk"]):
56
- if ("خنزير" in sent and ("حرم" in sent or "تحريم" in sent)) or len(sent) > 25:
57
  snippets.append((sent, h))
58
  if len(snippets) >= limit: return snippets
59
  return snippets
60
 
61
- def _infer_verdict(hits):
62
- text = " ".join(h["chunk"] for h in hits)
63
- text = re.sub(r"\s+", " ", text)
64
- if re.search(r"(حرام|محر(?:م)|لا يجوز).{0,"+str(NEAR)+r"}خنزير", text): return "حرام"
65
- if re.search(r"خنزير.{0,"+str(NEAR)+r"}(حرام|محر(?:م)|لا يجوز)", text): return "حرام"
66
- if re.search(r"(لا تأكل|فدع(?:ه)?|اتركه).{0,"+str(NEAR)+r"}خنزير", text): return "حرام"
67
- if re.search(r"خنزير.{0,"+str(NEAR)+r"}(لا تأكل|فدع(?:ه)?|اتركه)", text): return "حرام"
68
- if "محرم بنص الكتاب والسنة" in text or "إجماع المسلمين" in text: return "حرام"
69
- if re.search(r"(حلال|يجوز).{0,"+str(NEAR)+r"}خنزير", text) or re.search(r"خنزير.{0,"+str(NEAR)+r"}(حلال|يجوز)", text):
70
- return "حلال"
71
- return "غير واضح من النصوص المسترجعة"
72
-
73
  def _format_main_answer(hits):
74
- verdict = _infer_verdict(hits)
 
 
 
 
 
 
 
75
  snippets = _pick_snippets(hits, limit=3)
76
 
77
  lines = [f"### الحكم: **{verdict}**"]
@@ -117,6 +173,11 @@ def answer_fn(question: str):
117
  pass
118
 
119
  hits = retriever.search(question, k=TOP_K)
 
 
 
 
 
120
  main_md = _format_main_answer(hits)
121
  refs_md = _format_refs(hits)
122
  ctx_md = _format_context(hits)
 
12
  TOP_K = int(os.getenv("TOP_K", "4")) # internal default (no UI control)
13
 
14
  # -------- Helpers --------
 
 
15
  def _status_text():
16
  st = read_status()
17
  phase = st.get("phase","unknown")
18
  if phase == "ready":
19
+ return "الفهرس جاهز (FAISS)"
20
  if phase == "embedding":
21
+ return f"بناء الفهرس في الخلفية… {st.get('done',0)}/{st.get('total',0)}"
22
  if phase == "chunks_ready":
23
+ return f"تم تجهيز المقاطع ({st.get('total','?')}). جارٍ بدء التضمين…"
24
  if phase == "waiting_data":
25
+ return f"{st.get('msg','البيانات غير موجودة')}"
26
  if phase == "error":
27
+ return f"{st.get('msg','خطأ في الفهرس')}"
28
+ return "حالة غير معروفة"
29
 
30
  def _clean(s):
31
  return (s or "").strip()
 
34
  parts = re.split(r"(?<=[\.\!\؟\!])\s+", text or "")
35
  return [p.strip() for p in parts if p.strip()]
36
 
37
+ def _wrap_quran(s: str) -> str:
38
+ """Ensure Qur'an lines are shown with ornate brackets."""
39
+ s = s.strip()
40
+ if "﴿" in s and "﴾" in s:
41
+ return s
42
+ # if it's already quoted without brackets, wrap it
43
+ return f"﴿{s}﴾"
44
+
45
+ # -------- Evidence & verdict selection --------
46
+ NEG_PATTERNS = [
47
+ r"\bلا\s+يجوز\b", r"\bلا\s+يحل\b", r"\bلا\s+يصح\b",
48
+ r"\bحرام\b", r"\bمحرم\b", r"\bيحرم\b", r"\bمنع\b", r"\bممنوع\b", r"\bباطل\b"
49
+ ]
50
+ POS_PATTERNS = [
51
+ r"\bيجوز\b", r"\bحلال\b", r"\bمباح\b", r"\bلا\s+بأس\b", r"\bلا\s+حرج\b",
52
+ r"\bسنة\b", r"\bمستحب\b", r"\bواجب\b"
53
+ ]
54
+ EXCEPTION_HINTS = [r"\bإلا\b", r"\bللدعوة\b", r"\bضرورة\b", r"\bحاجة\b", r"\bأمن\s+الفتنة\b", r"\bمع\s+الضوابط\b"]
55
+
56
+ def _count_matches(text, patterns):
57
+ return sum(1 for p in patterns if re.search(p, text))
58
+
59
+ def _infer_verdict_general(hits):
60
+ """General purpose verdict detector from retrieved text."""
61
+ text = " ".join(h["chunk"] for h in hits)
62
+ text = re.sub(r"\s+", " ", text)
63
+
64
+ neg = _count_matches(text, NEG_PATTERNS)
65
+ pos = _count_matches(text, POS_PATTERNS)
66
+ has_exception = _count_matches(text, EXCEPTION_HINTS) > 0
67
+
68
+ if neg == 0 and pos == 0:
69
+ return None # unclear
70
+
71
+ if neg > pos:
72
+ if has_exception:
73
+ return "الأصل المنع، ويُستثنى للضرورة/الدعوة مع أمن الفتنة"
74
+ return "حرام"
75
+ if pos > neg:
76
+ if has_exception:
77
+ return "الأصل الجواز مع الضوابط"
78
+ return "يجوز"
79
+
80
+ # tie-break: prefer المنع if any explicit negation is present
81
+ if neg > 0:
82
+ return "حرام"
83
+ if pos > 0:
84
+ return "يجوز"
85
+ return None
86
+
87
  def _pick_snippets(hits, limit=3):
88
+ """Prefer explicit Qur'an/Hadith/Ijma lines, then strong verdict sentences."""
89
  snippets = []
90
+ # 1) Explicit evidence
91
  for h in hits:
92
  ev = extract_evidence(h["chunk"])
93
+ # Qur'an first, wrapped in ornate brackets
94
+ for q in ev["quran"]:
95
+ snippets.append((_wrap_quran(q), h))
96
+ if len(snippets) >= limit: return snippets
97
+ # Hadith
98
+ for hd in ev["hadith"]:
99
+ snippets.append((hd, h))
100
+ if len(snippets) >= limit: return snippets
101
+ # Ijma'
102
+ for ij in ev["ijma"]:
103
+ snippets.append((ij, h))
104
+ if len(snippets) >= limit: return snippets
105
+
106
+ # 2) Strong rule-like sentences (negative or positive)
107
+ neg_or_pos = re.compile("(" + "|".join(p.strip(r"\b") for p in [*NEG_PATTERNS, *POS_PATTERNS]) + ")", re.I)
108
  for h in hits:
109
  for sent in _sentences(h["chunk"]):
110
+ if neg_or_pos.search(sent):
111
  snippets.append((sent, h))
112
  if len(snippets) >= limit: return snippets
113
+
114
+ # 3) Fallback: first informative sentence from top hits
115
  for h in hits:
116
  for sent in _sentences(h["chunk"]):
117
+ if len(sent) > 25:
118
  snippets.append((sent, h))
119
  if len(snippets) >= limit: return snippets
120
  return snippets
121
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def _format_main_answer(hits):
123
+ # Try general verdict; if still None but we do have hits, choose a cautious closest form.
124
+ verdict = _infer_verdict_general(hits)
125
+ if verdict is None:
126
+ if hits:
127
+ verdict = "الأقرب: المنع" # closest conservative reading when signals exist but weak
128
+ else:
129
+ verdict = "غير واضح"
130
+
131
  snippets = _pick_snippets(hits, limit=3)
132
 
133
  lines = [f"### الحكم: **{verdict}**"]
 
173
  pass
174
 
175
  hits = retriever.search(question, k=TOP_K)
176
+
177
+ if not hits:
178
+ # Only here print غير واضح per your requirement
179
+ return "### الحكم: **غير واضح**\n\n> لم نعثر على نصوص ذات صلة في فتاوى ابن باز.", "—", "—"
180
+
181
  main_md = _format_main_answer(hits)
182
  refs_md = _format_refs(hits)
183
  ctx_md = _format_context(hits)