Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,22 +12,20 @@ retriever = Retriever(embed_model_name=os.getenv("EMBED_MODEL", "intfloat/multil
|
|
| 12 |
TOP_K = int(os.getenv("TOP_K", "4")) # internal default (no UI control)
|
| 13 |
|
| 14 |
# -------- Helpers --------
|
| 15 |
-
NEAR = 32 # chars window for proximity checks
|
| 16 |
-
|
| 17 |
def _status_text():
|
| 18 |
st = read_status()
|
| 19 |
phase = st.get("phase","unknown")
|
| 20 |
if phase == "ready":
|
| 21 |
-
return "
|
| 22 |
if phase == "embedding":
|
| 23 |
-
return f"
|
| 24 |
if phase == "chunks_ready":
|
| 25 |
-
return f"
|
| 26 |
if phase == "waiting_data":
|
| 27 |
-
return f"
|
| 28 |
if phase == "error":
|
| 29 |
-
return f"
|
| 30 |
-
return "
|
| 31 |
|
| 32 |
def _clean(s):
|
| 33 |
return (s or "").strip()
|
|
@@ -36,42 +34,100 @@ def _sentences(text):
|
|
| 36 |
parts = re.split(r"(?<=[\.\!\؟\!])\s+", text or "")
|
| 37 |
return [p.strip() for p in parts if p.strip()]
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
def _pick_snippets(hits, limit=3):
|
| 40 |
-
|
| 41 |
snippets = []
|
|
|
|
| 42 |
for h in hits:
|
| 43 |
ev = extract_evidence(h["chunk"])
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
for h in hits:
|
| 50 |
for sent in _sentences(h["chunk"]):
|
| 51 |
-
if
|
| 52 |
snippets.append((sent, h))
|
| 53 |
if len(snippets) >= limit: return snippets
|
|
|
|
|
|
|
| 54 |
for h in hits:
|
| 55 |
for sent in _sentences(h["chunk"]):
|
| 56 |
-
if
|
| 57 |
snippets.append((sent, h))
|
| 58 |
if len(snippets) >= limit: return snippets
|
| 59 |
return snippets
|
| 60 |
|
| 61 |
-
def _infer_verdict(hits):
|
| 62 |
-
text = " ".join(h["chunk"] for h in hits)
|
| 63 |
-
text = re.sub(r"\s+", " ", text)
|
| 64 |
-
if re.search(r"(حرام|محر(?:م)|لا يجوز).{0,"+str(NEAR)+r"}خنزير", text): return "حرام"
|
| 65 |
-
if re.search(r"خنزير.{0,"+str(NEAR)+r"}(حرام|محر(?:م)|لا يجوز)", text): return "حرام"
|
| 66 |
-
if re.search(r"(لا تأكل|فدع(?:ه)?|اتركه).{0,"+str(NEAR)+r"}خنزير", text): return "حرام"
|
| 67 |
-
if re.search(r"خنزير.{0,"+str(NEAR)+r"}(لا تأكل|فدع(?:ه)?|اتركه)", text): return "حرام"
|
| 68 |
-
if "محرم بنص الكتاب والسنة" in text or "إجماع المسلمين" in text: return "حرام"
|
| 69 |
-
if re.search(r"(حلال|يجوز).{0,"+str(NEAR)+r"}خنزير", text) or re.search(r"خنزير.{0,"+str(NEAR)+r"}(حلال|يجوز)", text):
|
| 70 |
-
return "حلال"
|
| 71 |
-
return "غير واضح من النصوص المسترجعة"
|
| 72 |
-
|
| 73 |
def _format_main_answer(hits):
|
| 74 |
-
verdict
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
snippets = _pick_snippets(hits, limit=3)
|
| 76 |
|
| 77 |
lines = [f"### الحكم: **{verdict}**"]
|
|
@@ -117,6 +173,11 @@ def answer_fn(question: str):
|
|
| 117 |
pass
|
| 118 |
|
| 119 |
hits = retriever.search(question, k=TOP_K)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
main_md = _format_main_answer(hits)
|
| 121 |
refs_md = _format_refs(hits)
|
| 122 |
ctx_md = _format_context(hits)
|
|
|
|
| 12 |
TOP_K = int(os.getenv("TOP_K", "4")) # internal default (no UI control)
|
| 13 |
|
| 14 |
# -------- Helpers --------
|
|
|
|
|
|
|
| 15 |
def _status_text():
|
| 16 |
st = read_status()
|
| 17 |
phase = st.get("phase","unknown")
|
| 18 |
if phase == "ready":
|
| 19 |
+
return "الفهرس جاهز (FAISS)"
|
| 20 |
if phase == "embedding":
|
| 21 |
+
return f"بناء الفهرس في الخلفية… {st.get('done',0)}/{st.get('total',0)}"
|
| 22 |
if phase == "chunks_ready":
|
| 23 |
+
return f"تم تجهيز المقاطع ({st.get('total','?')}). جارٍ بدء التضمين…"
|
| 24 |
if phase == "waiting_data":
|
| 25 |
+
return f"{st.get('msg','البيانات غير موجودة')}"
|
| 26 |
if phase == "error":
|
| 27 |
+
return f"{st.get('msg','خطأ في الفهرس')}"
|
| 28 |
+
return "حالة غير معروفة"
|
| 29 |
|
| 30 |
def _clean(s):
|
| 31 |
return (s or "").strip()
|
|
|
|
| 34 |
parts = re.split(r"(?<=[\.\!\؟\!])\s+", text or "")
|
| 35 |
return [p.strip() for p in parts if p.strip()]
|
| 36 |
|
| 37 |
+
def _wrap_quran(s: str) -> str:
|
| 38 |
+
"""Ensure Qur'an lines are shown with ornate brackets."""
|
| 39 |
+
s = s.strip()
|
| 40 |
+
if "﴿" in s and "﴾" in s:
|
| 41 |
+
return s
|
| 42 |
+
# if it's already quoted without brackets, wrap it
|
| 43 |
+
return f"﴿{s}﴾"
|
| 44 |
+
|
| 45 |
+
# -------- Evidence & verdict selection --------
|
| 46 |
+
NEG_PATTERNS = [
|
| 47 |
+
r"\bلا\s+يجوز\b", r"\bلا\s+يحل\b", r"\bلا\s+يصح\b",
|
| 48 |
+
r"\bحرام\b", r"\bمحرم\b", r"\bيحرم\b", r"\bمنع\b", r"\bممنوع\b", r"\bباطل\b"
|
| 49 |
+
]
|
| 50 |
+
POS_PATTERNS = [
|
| 51 |
+
r"\bيجوز\b", r"\bحلال\b", r"\bمباح\b", r"\bلا\s+بأس\b", r"\bلا\s+حرج\b",
|
| 52 |
+
r"\bسنة\b", r"\bمستحب\b", r"\bواجب\b"
|
| 53 |
+
]
|
| 54 |
+
EXCEPTION_HINTS = [r"\bإلا\b", r"\bللدعوة\b", r"\bضرورة\b", r"\bحاجة\b", r"\bأمن\s+الفتنة\b", r"\bمع\s+الضوابط\b"]
|
| 55 |
+
|
| 56 |
+
def _count_matches(text, patterns):
|
| 57 |
+
return sum(1 for p in patterns if re.search(p, text))
|
| 58 |
+
|
| 59 |
+
def _infer_verdict_general(hits):
|
| 60 |
+
"""General purpose verdict detector from retrieved text."""
|
| 61 |
+
text = " ".join(h["chunk"] for h in hits)
|
| 62 |
+
text = re.sub(r"\s+", " ", text)
|
| 63 |
+
|
| 64 |
+
neg = _count_matches(text, NEG_PATTERNS)
|
| 65 |
+
pos = _count_matches(text, POS_PATTERNS)
|
| 66 |
+
has_exception = _count_matches(text, EXCEPTION_HINTS) > 0
|
| 67 |
+
|
| 68 |
+
if neg == 0 and pos == 0:
|
| 69 |
+
return None # unclear
|
| 70 |
+
|
| 71 |
+
if neg > pos:
|
| 72 |
+
if has_exception:
|
| 73 |
+
return "الأصل المنع، ويُستثنى للضرورة/الدعوة مع أمن الفتنة"
|
| 74 |
+
return "حرام"
|
| 75 |
+
if pos > neg:
|
| 76 |
+
if has_exception:
|
| 77 |
+
return "الأصل الجواز مع الضوابط"
|
| 78 |
+
return "يجوز"
|
| 79 |
+
|
| 80 |
+
# tie-break: prefer المنع if any explicit negation is present
|
| 81 |
+
if neg > 0:
|
| 82 |
+
return "حرام"
|
| 83 |
+
if pos > 0:
|
| 84 |
+
return "يجوز"
|
| 85 |
+
return None
|
| 86 |
+
|
| 87 |
def _pick_snippets(hits, limit=3):
|
| 88 |
+
"""Prefer explicit Qur'an/Hadith/Ijma lines, then strong verdict sentences."""
|
| 89 |
snippets = []
|
| 90 |
+
# 1) Explicit evidence
|
| 91 |
for h in hits:
|
| 92 |
ev = extract_evidence(h["chunk"])
|
| 93 |
+
# Qur'an first, wrapped in ornate brackets
|
| 94 |
+
for q in ev["quran"]:
|
| 95 |
+
snippets.append((_wrap_quran(q), h))
|
| 96 |
+
if len(snippets) >= limit: return snippets
|
| 97 |
+
# Hadith
|
| 98 |
+
for hd in ev["hadith"]:
|
| 99 |
+
snippets.append((hd, h))
|
| 100 |
+
if len(snippets) >= limit: return snippets
|
| 101 |
+
# Ijma'
|
| 102 |
+
for ij in ev["ijma"]:
|
| 103 |
+
snippets.append((ij, h))
|
| 104 |
+
if len(snippets) >= limit: return snippets
|
| 105 |
+
|
| 106 |
+
# 2) Strong rule-like sentences (negative or positive)
|
| 107 |
+
neg_or_pos = re.compile("(" + "|".join(p.strip(r"\b") for p in [*NEG_PATTERNS, *POS_PATTERNS]) + ")", re.I)
|
| 108 |
for h in hits:
|
| 109 |
for sent in _sentences(h["chunk"]):
|
| 110 |
+
if neg_or_pos.search(sent):
|
| 111 |
snippets.append((sent, h))
|
| 112 |
if len(snippets) >= limit: return snippets
|
| 113 |
+
|
| 114 |
+
# 3) Fallback: first informative sentence from top hits
|
| 115 |
for h in hits:
|
| 116 |
for sent in _sentences(h["chunk"]):
|
| 117 |
+
if len(sent) > 25:
|
| 118 |
snippets.append((sent, h))
|
| 119 |
if len(snippets) >= limit: return snippets
|
| 120 |
return snippets
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
def _format_main_answer(hits):
|
| 123 |
+
# Try general verdict; if still None but we do have hits, choose a cautious closest form.
|
| 124 |
+
verdict = _infer_verdict_general(hits)
|
| 125 |
+
if verdict is None:
|
| 126 |
+
if hits:
|
| 127 |
+
verdict = "الأقرب: المنع" # closest conservative reading when signals exist but weak
|
| 128 |
+
else:
|
| 129 |
+
verdict = "غير واضح"
|
| 130 |
+
|
| 131 |
snippets = _pick_snippets(hits, limit=3)
|
| 132 |
|
| 133 |
lines = [f"### الحكم: **{verdict}**"]
|
|
|
|
| 173 |
pass
|
| 174 |
|
| 175 |
hits = retriever.search(question, k=TOP_K)
|
| 176 |
+
|
| 177 |
+
if not hits:
|
| 178 |
+
# Only here print غير واضح per your requirement
|
| 179 |
+
return "### الحكم: **غير واضح**\n\n> لم نعثر على نصوص ذات صلة في فتاوى ابن باز.", "—", "—"
|
| 180 |
+
|
| 181 |
main_md = _format_main_answer(hits)
|
| 182 |
refs_md = _format_refs(hits)
|
| 183 |
ctx_md = _format_context(hits)
|