Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import faiss | |
| import pickle | |
| import html | |
| from sentence_transformers import SentenceTransformer | |
| import os | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | |
| # =============================== | |
| # Load data & models (ONCE) | |
| # =============================== | |
| df = pd.read_csv("data/hadith.csv") | |
| hadith_embeddings = np.load("data/hadith_embeddings.npy") | |
| print(f"Loaded hadith embeddings: {hadith_embeddings.shape}") | |
| with open("data/bm25.pkl", "rb") as f: | |
| bm25 = pickle.load(f) | |
| anchor_index = faiss.read_index("data/faiss_anchor.index") | |
| print(f"Anchor index dimension: {anchor_index.d}") | |
| with open("data/anchor_dict.pkl", "rb") as f: | |
| anchor_dict = pickle.load(f) | |
| with open("data/unique_anchor_texts.pkl", "rb") as f: | |
| unique_anchor_texts = pickle.load(f) | |
| model = SentenceTransformer("omarelshehy/arabic-english-sts-matryoshka-v2.0") | |
| model.max_seq_length = 512 | |
| # Quick dimension check | |
| test_emb = model.encode("test", normalize_embeddings=True) | |
| print(f"Model embedding dimension: {test_emb.shape}") | |
| if test_emb.shape[0] != anchor_index.d: | |
| raise ValueError( | |
| f"Dimension mismatch! Model outputs {test_emb.shape[0]}D but " | |
| f"anchor_index expects {anchor_index.d}D. Rebuild your anchor_index." | |
| ) | |
| from retrieval import hybrid_search_fixed | |
| from utils import preprocess_query | |
| # =============================== | |
| # Helper functions | |
| # =============================== | |
| def safe_get(row, col): | |
| try: | |
| v = row.get(col, "") if hasattr(row, "get") else row[col] if col in row else "" | |
| if v is None or (isinstance(v, float) and np.isnan(v)): | |
| return "" | |
| return str(v).strip() | |
| except Exception: | |
| return "" | |
| def first_nonempty(row, cols): | |
| for c in cols: | |
| val = safe_get(row, c) | |
| if val: | |
| return val | |
| return "" | |
| def short_preview(text, length=300): | |
| text = (text or "").strip() | |
| if len(text) <= length: | |
| return text | |
| return text[:length].rsplit(' ', 1)[0] + "..." | |
| # =============================== | |
| # Search Function | |
| # =============================== | |
| def search_hadith(query, top_k): | |
| if not query or not str(query).strip(): | |
| return "<p class='empty'>الرجاء إدخال استعلام بحث</p>" | |
| results_df, debug = hybrid_search_fixed( | |
| query=query, | |
| df=df, | |
| bm25=bm25, | |
| preprocess_query=preprocess_query, | |
| model=model, | |
| hadith_embeddings=hadith_embeddings, | |
| anchor_index=anchor_index, | |
| anchor_dict=anchor_dict, | |
| unique_anchor_texts=unique_anchor_texts, | |
| top_k=int(top_k) | |
| ) | |
| final_scores = debug.get("final_scores") if isinstance(debug, dict) else None | |
| html_parts = ["<div class='results'>"] | |
| for rank, (_, row) in enumerate(results_df.iterrows(), start=1): | |
| hadith_idx = row.name if hasattr(row, "name") else "" | |
| title_val = first_nonempty(row, ["isnad_text", "hadith_title", "title", "main_subj"]) or \ | |
| short_preview(safe_get(row, "matn_text"), 60) or f"حديث #{hadith_idx}" | |
| clean_text = safe_get(row, "matn_text") | |
| clean_text_escaped = html.escape(clean_text).replace("\n", "<br>") | |
| topic = html.escape(safe_get(row, "main_subj")) | |
| source_url = safe_get(row, "url") | |
| score_display = "" | |
| if isinstance(final_scores, (list, np.ndarray)): | |
| try: | |
| s = float(final_scores[int(hadith_idx)]) | |
| score_display = f"{s:.3f}" | |
| except: | |
| pass | |
| preview = short_preview(clean_text, 360) | |
| preview_escaped = html.escape(preview).replace("\n", "<br>") | |
| card_html = f""" | |
| <div class="card"> | |
| <div class="card-header"> | |
| <div class="left"> | |
| <div class="index">#{rank}</div> | |
| <div class="title">{html.escape(title_val)}</div> | |
| <div class="topic">الموضوع: {topic}</div> | |
| </div> | |
| <div class="right-meta"> | |
| <div class="meta">الرقم: <span class="meta-val">{hadith_idx}</span></div> | |
| {f'<div class="meta">الدرجة: <span class="meta-val">{score_display}</span></div>' if score_display else ''} | |
| </div> | |
| </div> | |
| <div class="text-rtl"> | |
| <details> | |
| <summary class="summary">{preview_escaped}</summary> | |
| <div class="full-text">{clean_text_escaped}</div> | |
| </details> | |
| </div> | |
| <div class="footer"> | |
| {"<a href='" + html.escape(source_url) + "' target='_blank' rel='noopener noreferrer'>📖 عرض المصدر</a>" if source_url else ""} | |
| </div> | |
| </div> | |
| """ | |
| html_parts.append(card_html) | |
| html_parts.append("</div>") | |
| return "\n".join(html_parts) | |
| # =============================== | |
| # PROFESSIONAL DARK MODE + PERFECT ARABIC TYPOGRAPHY + HIDDEN SCROLL ARROWS | |
| # =============================== | |
| custom_css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Cairo:wght@400;500;600;700&family=Noto+Sans+Arabic:wght@400;500;700&display=swap'); | |
| :root { | |
| --body-background-fill: #020617 !important; | |
| --background-fill-primary: #020617 !important; | |
| --background-fill-secondary: #0f172a !important; | |
| --border-color-primary: #334155 !important; | |
| --color-text-primary: #f1f5f9 !important; | |
| --color-text-secondary: #cbd5e1 !important; | |
| --button-primary-background-fill: #3b82f6 !important; | |
| --button-primary-background-fill-hover: #2563eb !important; | |
| } | |
| /* Global dark + Arabic font */ | |
| body, .gradio-container, .gr-panel, .gr-box, .gr-form, .wrap, .panel, .block, footer { | |
| background-color: #020617 !important; | |
| color: #f1f5f9 !important; | |
| font-family: 'Cairo', 'Noto Sans Arabic', system-ui, sans-serif !important; | |
| } | |
| .gradio-container { max-width: 960px !important; margin: 0 auto !important; } | |
| /* Arabic text perfection */ | |
| .text-rtl, .full-text, .summary, .title, .topic { | |
| font-family: 'Cairo', 'Noto Sans Arabic', sans-serif !important; | |
| line-height: 2.05 !important; | |
| } | |
| /* Inputs & Buttons */ | |
| label span, input, textarea, .gr-input, .gr-textarea { | |
| color: #f1f5f9 !important; | |
| background: #1e2937 !important; | |
| border-color: #475569 !important; | |
| } | |
| button.primary { | |
| background: linear-gradient(90deg, #3b82f6, #60a5fa) !important; | |
| font-weight: 600 !important; | |
| } | |
| /* Premium result cards */ | |
| .card { | |
| background: #1e2937 !important; | |
| border-radius: 18px !important; | |
| padding: 26px !important; | |
| margin-bottom: 24px !important; | |
| box-shadow: 0 10px 30px rgba(0,0,0,0.4) !important; | |
| border: 1px solid #334155 !important; | |
| transition: all 0.35s ease !important; | |
| } | |
| .card:hover { | |
| transform: translateY(-8px); | |
| box-shadow: 0 25px 50px rgba(59, 130, 246, 0.2) !important; | |
| } | |
| .index { | |
| background: linear-gradient(90deg, #3b82f6, #60a5fa) !important; | |
| color: #fff !important; | |
| padding: 7px 16px !important; | |
| border-radius: 9999px !important; | |
| font-weight: 700 !important; | |
| } | |
| .title { font-size: 19px !important; font-weight: 700 !important; } | |
| .topic { font-size: 14px !important; color: #94a3b8 !important; } | |
| .text-rtl { | |
| background: #0f172a !important; | |
| border-radius: 12px !important; | |
| padding: 18px !important; | |
| border-right: 6px solid #3b82f6 !important; | |
| font-size: 17px !important; | |
| color: #e2e8f0 !important; | |
| } | |
| /* Hide the unwanted white scroll arrow boxes (the ones in your 2nd screenshot) */ | |
| .gradio-container button[aria-label*="Scroll"], | |
| .gradio-container .gr-button.scroll, | |
| .gr-scrollbar-button, | |
| ::-webkit-scrollbar-button, | |
| [data-testid*="scroll"], | |
| button[class*="scroll"], | |
| .scroll-button, | |
| .gr-scroll-button { | |
| display: none !important; | |
| visibility: hidden !important; | |
| opacity: 0 !important; | |
| pointer-events: none !important; | |
| } | |
| /* Clean up any remaining floating arrows */ | |
| .gradio-container > div > button:last-child { | |
| display: none !important; | |
| } | |
| .results { margin-top: 24px; } | |
| .footer a { color: #60a5fa !important; font-weight: 600 !important; } | |
| .empty { color: #94a3b8 !important; text-align: center; padding: 50px 20px; font-size: 17px; } | |
| @media (max-width: 768px) { | |
| .card-header { flex-direction: column !important; } | |
| } | |
| """ | |
| # =============================== | |
| # Gradio Interface – ENGLISH HEADER ONLY | |
| # =============================== | |
| interface = gr.Interface( | |
| fn=search_hadith, | |
| inputs=[ | |
| gr.Textbox( | |
| label="🔍 استعلام البحث", | |
| placeholder="مثال: أهمية النية في الإسلام / Importance of intention in Islam", | |
| lines=2, | |
| max_lines=3 | |
| ), | |
| gr.Slider( | |
| minimum=1, | |
| maximum=20, | |
| value=5, | |
| step=1, | |
| label="📌 عدد النتائج" | |
| ) | |
| ], | |
| outputs=gr.HTML(), | |
| title="📖 Intelligent Hadith Search Engine", | |
| description=""" | |
| Advanced AI-powered semantic search engine for the Noble Prophetic Hadith.<br> | |
| Combines lexical search (BM25), semantic embeddings, and topic-aware Anchors. | |
| """, | |
| examples=[ | |
| ["أهمية النية وأثرها في قبول الأعمال", 5], | |
| ["حقوق الوالدين في الإسلام", 5], | |
| ["فضل الصلاة على النبي ﷺ", 5] | |
| ], | |
| flagging_mode="never", | |
| theme=gr.themes.Soft(), | |
| css=custom_css | |
| ) | |
| if __name__ == "__main__": | |
| interface.launch() |