import os, re, gradio as gr from build_index import ensure_chunks, kick_off_background_build, read_status from retrieval import Retriever from evidence import extract_evidence # ---- Fast boot: prepare chunks; build FAISS in background ensure_chunks() kick_off_background_build() # ---- Load retriever (BM25 works immediately; FAISS used when ready) retriever = Retriever(embed_model_name=os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-small")) TOP_K = int(os.getenv("TOP_K", "4")) # internal default (no UI control) # -------- Helpers -------- def _status_text(): st = read_status() phase = st.get("phase","unknown") if phase == "ready": return "الفهرس جاهز (FAISS)" if phase == "embedding": return f"بناء الفهرس في الخلفية… {st.get('done',0)}/{st.get('total',0)}" if phase == "chunks_ready": return f"تم تجهيز المقاطع ({st.get('total','?')}). جارٍ بدء التضمين…" if phase == "waiting_data": return f"{st.get('msg','البيانات غير موجودة')}" if phase == "error": return f"{st.get('msg','خطأ في الفهرس')}" return "حالة غير معروفة" def _clean(s): return (s or "").strip() def _sentences(text): parts = re.split(r"(?<=[\.\!\؟\!])\s+", text or "") return [p.strip() for p in parts if p.strip()] def _wrap_quran(s: str) -> str: """Ensure Qur'an lines are shown with ornate brackets.""" s = s.strip() if "﴿" in s and "﴾" in s: return s # if it's already quoted without brackets, wrap it return f"﴿{s}﴾" # -------- Evidence & verdict selection -------- NEG_PATTERNS = [ r"\bلا\s+يجوز\b", r"\bلا\s+يحل\b", r"\bلا\s+يصح\b", r"\bحرام\b", r"\bمحرم\b", r"\bيحرم\b", r"\bمنع\b", r"\bممنوع\b", r"\bباطل\b" ] POS_PATTERNS = [ r"\bيجوز\b", r"\bحلال\b", r"\bمباح\b", r"\bلا\s+بأس\b", r"\bلا\s+حرج\b", r"\bسنة\b", r"\bمستحب\b", r"\bواجب\b" ] EXCEPTION_HINTS = [r"\bإلا\b", r"\bللدعوة\b", r"\bضرورة\b", r"\bحاجة\b", r"\bأمن\s+الفتنة\b", r"\bمع\s+الضوابط\b"] def _count_matches(text, patterns): return sum(1 for p in patterns if re.search(p, text)) def _infer_verdict_general(hits): """General purpose verdict detector from retrieved text.""" text = " ".join(h["chunk"] for h in hits) text = re.sub(r"\s+", " ", text) neg = _count_matches(text, NEG_PATTERNS) pos = _count_matches(text, POS_PATTERNS) has_exception = _count_matches(text, EXCEPTION_HINTS) > 0 if neg == 0 and pos == 0: return None # unclear if neg > pos: if has_exception: return "الأصل المنع، ويُستثنى للضرورة/الدعوة مع أمن الفتنة" return "حرام" if pos > neg: if has_exception: return "الأصل الجواز مع الضوابط" return "يجوز" # tie-break: prefer المنع if any explicit negation is present if neg > 0: return "حرام" if pos > 0: return "يجوز" return None def _pick_snippets(hits, limit=3): """Prefer explicit Qur'an/Hadith/Ijma lines, then strong verdict sentences.""" snippets = [] # 1) Explicit evidence for h in hits: ev = extract_evidence(h["chunk"]) # Qur'an first, wrapped in ornate brackets for q in ev["quran"]: snippets.append((_wrap_quran(q), h)) if len(snippets) >= limit: return snippets # Hadith for hd in ev["hadith"]: snippets.append((hd, h)) if len(snippets) >= limit: return snippets # Ijma' for ij in ev["ijma"]: snippets.append((ij, h)) if len(snippets) >= limit: return snippets # 2) Strong rule-like sentences (negative or positive) neg_or_pos = re.compile("(" + "|".join(p.strip(r"\b") for p in [*NEG_PATTERNS, *POS_PATTERNS]) + ")", re.I) for h in hits: for sent in _sentences(h["chunk"]): if neg_or_pos.search(sent): snippets.append((sent, h)) if len(snippets) >= limit: return snippets # 3) Fallback: first informative sentence from top hits for h in hits: for sent in _sentences(h["chunk"]): if len(sent) > 25: snippets.append((sent, h)) if len(snippets) >= limit: return snippets return snippets def _format_main_answer(hits): # Try general verdict; if still None but we do have hits, choose a cautious closest form. verdict = _infer_verdict_general(hits) if verdict is None: if hits: verdict = "الأقرب: المنع" # closest conservative reading when signals exist but weak else: verdict = "غير واضح" snippets = _pick_snippets(hits, limit=3) lines = [f"### الحكم: **{verdict}**"] if snippets: lines.append("\n**توضيح مختصر (من النصوص المسترجعة):**") for (txt, _h) in snippets: lines.append(f"> {txt}") return "\n".join(lines) def _format_refs(hits, limit=4): out = [] for i, h in enumerate(hits[:limit], 1): title = _clean(h.get("title")) or "مرجع" link = _clean(h.get("link")) cat = _clean(h.get("category")) if link: out.append(f"{i}. **{title}** — {cat} — [الرابط]({link})") else: out.append(f"{i}. **{title}** — {cat}") return "\n".join(out) if out else "—" def _format_context(hits, limit_chars=400): blocks = [] for i, h in enumerate(hits, 1): title = _clean(h.get("title")) or f"المصدر {i}" link = _clean(h.get("link")) chunk = _clean(h.get("chunk")) snippet = (chunk[:limit_chars].rstrip() + "…") if len(chunk) > limit_chars else chunk hdr = f"**{i}. {title}**" + (f" — [الرابط]({link})" if link else "") blocks.append(hdr + "\n\n> " + snippet.replace("\n", "\n> ")) return "\n\n---\n\n".join(blocks) if blocks else "—" # -------- Gradio callbacks -------- def answer_fn(question: str): if not question or not question.strip(): return "يرجى كتابة السؤال.", "—", "—" # If FAISS finished while app runs, re-init for better retrieval global retriever if getattr(retriever, "faiss", None) is None and os.path.exists("index/faiss.index"): try: retriever = Retriever(embed_model_name=os.getenv("EMBED_MODEL", "intfloat/multilingual-e5-small")) except Exception: pass hits = retriever.search(question, k=TOP_K) if not hits: # Only here print غير واضح per your requirement return "### الحكم: **غير واضح**\n\n> لم نعثر على نصوص ذات صلة في فتاوى ابن باز.", "—", "—" main_md = _format_main_answer(hits) refs_md = _format_refs(hits) ctx_md = _format_context(hits) return main_md, refs_md, ctx_md def status_fn(): return _status_text() # ---- RTL + centered layout css = """ .gradio-container { direction: rtl; text-align: right; } textarea, input, .wrap, .prose, .markdown-body { direction: rtl; text-align: right; } .centered { max-width: 880px; margin-left: auto !important; margin-right: auto !important; } .centered .gr-textbox, .centered .gr-textbox textarea { text-align: right; } .centered .gr-button { display: block; margin: 0.5rem auto; } .footer { font-size: 12px; color: #6b7280; text-align: center; margin-top: 1rem; } """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: with gr.Column(elem_classes=["centered"]): # Title & description gr.Markdown("## بيان - مساعد الفتاوى للشيخ ابن باز") gr.Markdown("يظهر النتائج بناء على الفتاوى في موقع الشيخ ابن باز - لأغراض تعليمية فقط -") # Input q = gr.Textbox(label="سؤالك", placeholder="اكتب سؤالك هنا…", lines=3) btn = gr.Button("أجب") # Main answer (verdict-first) answer_md = gr.Markdown(label="الإجابة") # Optional panels (collapsed) with gr.Accordion("المراجع (اضغط للعرض)", open=False): refs_md = gr.Markdown() with gr.Accordion("النصوص المستعملة (اختياري)", open=False): ctx_md = gr.Markdown() # Footer: FAISS status at the very bottom (collapsed, manual refresh) with gr.Accordion("حالة الفهرسة (FAISS) — اضغط للتحديث", open=False): status_md = gr.Markdown(value=_status_text()) refresh_btn = gr.Button("تحديث الحالة") refresh_btn.click(status_fn, inputs=None, outputs=status_md) gr.Markdown( "المحتوى المستعمل من موقع سماحة الشيخ عبدالعزيز بن باز " "[binbaz.org.sa](https://binbaz.org.sa). " "جميع الحقوق محفوظة لأصحابها. هذا المنتج تعليمي غير رسمي.", elem_classes=["footer"] ) btn.click(answer_fn, inputs=[q], outputs=[answer_md, refs_md, ctx_md]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)