import gradio as gr
import pandas as pd
import numpy as np
import faiss
import pickle
import html
from sentence_transformers import SentenceTransformer
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
# ===============================
# Load data & models (ONCE)
# ===============================
df = pd.read_csv("data/hadith.csv")
hadith_embeddings = np.load("data/hadith_embeddings.npy")
print(f"Loaded hadith embeddings: {hadith_embeddings.shape}")
with open("data/bm25.pkl", "rb") as f:
bm25 = pickle.load(f)
anchor_index = faiss.read_index("data/faiss_anchor.index")
print(f"Anchor index dimension: {anchor_index.d}")
with open("data/anchor_dict.pkl", "rb") as f:
anchor_dict = pickle.load(f)
with open("data/unique_anchor_texts.pkl", "rb") as f:
unique_anchor_texts = pickle.load(f)
model = SentenceTransformer("omarelshehy/arabic-english-sts-matryoshka-v2.0")
model.max_seq_length = 512
# Quick dimension check
test_emb = model.encode("test", normalize_embeddings=True)
print(f"Model embedding dimension: {test_emb.shape}")
if test_emb.shape[0] != anchor_index.d:
raise ValueError(
f"Dimension mismatch! Model outputs {test_emb.shape[0]}D but "
f"anchor_index expects {anchor_index.d}D. Rebuild your anchor_index."
)
from retrieval import hybrid_search_fixed
from utils import preprocess_query
# ===============================
# Helper functions
# ===============================
def safe_get(row, col):
try:
v = row.get(col, "") if hasattr(row, "get") else row[col] if col in row else ""
if v is None or (isinstance(v, float) and np.isnan(v)):
return ""
return str(v).strip()
except Exception:
return ""
def first_nonempty(row, cols):
for c in cols:
val = safe_get(row, c)
if val:
return val
return ""
def short_preview(text, length=300):
text = (text or "").strip()
if len(text) <= length:
return text
return text[:length].rsplit(' ', 1)[0] + "..."
# ===============================
# Search Function
# ===============================
def search_hadith(query, top_k):
if not query or not str(query).strip():
return "
الرجاء إدخال استعلام بحث
"
results_df, debug = hybrid_search_fixed(
query=query,
df=df,
bm25=bm25,
preprocess_query=preprocess_query,
model=model,
hadith_embeddings=hadith_embeddings,
anchor_index=anchor_index,
anchor_dict=anchor_dict,
unique_anchor_texts=unique_anchor_texts,
top_k=int(top_k)
)
final_scores = debug.get("final_scores") if isinstance(debug, dict) else None
html_parts = [""]
for rank, (_, row) in enumerate(results_df.iterrows(), start=1):
hadith_idx = row.name if hasattr(row, "name") else ""
title_val = first_nonempty(row, ["isnad_text", "hadith_title", "title", "main_subj"]) or \
short_preview(safe_get(row, "matn_text"), 60) or f"حديث #{hadith_idx}"
clean_text = safe_get(row, "matn_text")
clean_text_escaped = html.escape(clean_text).replace("\n", "
")
topic = html.escape(safe_get(row, "main_subj"))
source_url = safe_get(row, "url")
score_display = ""
if isinstance(final_scores, (list, np.ndarray)):
try:
s = float(final_scores[int(hadith_idx)])
score_display = f"{s:.3f}"
except:
pass
preview = short_preview(clean_text, 360)
preview_escaped = html.escape(preview).replace("\n", "
")
card_html = f"""
{preview_escaped}
{clean_text_escaped}
"""
html_parts.append(card_html)
html_parts.append("
")
return "\n".join(html_parts)
# ===============================
# PROFESSIONAL DARK MODE + PERFECT ARABIC TYPOGRAPHY + HIDDEN SCROLL ARROWS
# ===============================
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Cairo:wght@400;500;600;700&family=Noto+Sans+Arabic:wght@400;500;700&display=swap');
:root {
--body-background-fill: #020617 !important;
--background-fill-primary: #020617 !important;
--background-fill-secondary: #0f172a !important;
--border-color-primary: #334155 !important;
--color-text-primary: #f1f5f9 !important;
--color-text-secondary: #cbd5e1 !important;
--button-primary-background-fill: #3b82f6 !important;
--button-primary-background-fill-hover: #2563eb !important;
}
/* Global dark + Arabic font */
body, .gradio-container, .gr-panel, .gr-box, .gr-form, .wrap, .panel, .block, footer {
background-color: #020617 !important;
color: #f1f5f9 !important;
font-family: 'Cairo', 'Noto Sans Arabic', system-ui, sans-serif !important;
}
.gradio-container { max-width: 960px !important; margin: 0 auto !important; }
/* Arabic text perfection */
.text-rtl, .full-text, .summary, .title, .topic {
font-family: 'Cairo', 'Noto Sans Arabic', sans-serif !important;
line-height: 2.05 !important;
}
/* Inputs & Buttons */
label span, input, textarea, .gr-input, .gr-textarea {
color: #f1f5f9 !important;
background: #1e2937 !important;
border-color: #475569 !important;
}
button.primary {
background: linear-gradient(90deg, #3b82f6, #60a5fa) !important;
font-weight: 600 !important;
}
/* Premium result cards */
.card {
background: #1e2937 !important;
border-radius: 18px !important;
padding: 26px !important;
margin-bottom: 24px !important;
box-shadow: 0 10px 30px rgba(0,0,0,0.4) !important;
border: 1px solid #334155 !important;
transition: all 0.35s ease !important;
}
.card:hover {
transform: translateY(-8px);
box-shadow: 0 25px 50px rgba(59, 130, 246, 0.2) !important;
}
.index {
background: linear-gradient(90deg, #3b82f6, #60a5fa) !important;
color: #fff !important;
padding: 7px 16px !important;
border-radius: 9999px !important;
font-weight: 700 !important;
}
.title { font-size: 19px !important; font-weight: 700 !important; }
.topic { font-size: 14px !important; color: #94a3b8 !important; }
.text-rtl {
background: #0f172a !important;
border-radius: 12px !important;
padding: 18px !important;
border-right: 6px solid #3b82f6 !important;
font-size: 17px !important;
color: #e2e8f0 !important;
}
/* Hide the unwanted white scroll arrow boxes (the ones in your 2nd screenshot) */
.gradio-container button[aria-label*="Scroll"],
.gradio-container .gr-button.scroll,
.gr-scrollbar-button,
::-webkit-scrollbar-button,
[data-testid*="scroll"],
button[class*="scroll"],
.scroll-button,
.gr-scroll-button {
display: none !important;
visibility: hidden !important;
opacity: 0 !important;
pointer-events: none !important;
}
/* Clean up any remaining floating arrows */
.gradio-container > div > button:last-child {
display: none !important;
}
.results { margin-top: 24px; }
.footer a { color: #60a5fa !important; font-weight: 600 !important; }
.empty { color: #94a3b8 !important; text-align: center; padding: 50px 20px; font-size: 17px; }
@media (max-width: 768px) {
.card-header { flex-direction: column !important; }
}
"""
# ===============================
# Gradio Interface – ENGLISH HEADER ONLY
# ===============================
interface = gr.Interface(
fn=search_hadith,
inputs=[
gr.Textbox(
label="🔍 استعلام البحث",
placeholder="مثال: أهمية النية في الإسلام / Importance of intention in Islam",
lines=2,
max_lines=3
),
gr.Slider(
minimum=1,
maximum=20,
value=5,
step=1,
label="📌 عدد النتائج"
)
],
outputs=gr.HTML(),
title="📖 Intelligent Hadith Search Engine",
description="""
Advanced AI-powered semantic search engine for the Noble Prophetic Hadith.
Combines lexical search (BM25), semantic embeddings, and topic-aware Anchors.
""",
examples=[
["أهمية النية وأثرها في قبول الأعمال", 5],
["حقوق الوالدين في الإسلام", 5],
["فضل الصلاة على النبي ﷺ", 5]
],
flagging_mode="never",
theme=gr.themes.Soft(),
css=custom_css
)
if __name__ == "__main__":
interface.launch()