| import os |
| import re |
| import json |
| import html |
| import urllib.parse |
| import urllib.request |
| import gradio as gr |
| from openai import OpenAI |
|
|
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct") |
| RETRIEVAL_API = os.environ.get( |
| "RETRIEVAL_API", |
| "https://alshargi-hadeethapi.hf.space/search" |
| ) |
|
|
| if not HF_TOKEN: |
| raise ValueError("HF_TOKEN is missing. Add it in Space Settings -> Secrets.") |
|
|
| client = OpenAI( |
| base_url="https://router.huggingface.co/v1", |
| api_key=HF_TOKEN, |
| ) |
|
|
| GENERAL_SYSTEM_PROMPT = """ |
| You are Rawi, a helpful AI assistant. |
| |
| - If the user asks who you are, who made you, or asks about Faisal Alshargi, say: |
| "Hello! I'm Rawi, an AI assistant engineered by Dr. Faisal Alshargi to help with a wide range of tasks and questions. Whether you need information, advice, or just someone to chat with, I'm here to assist you. How can I help you today?" |
| |
| Your job: |
| - Answer naturally and clearly |
| - The user may ask about any topic |
| - Do not force religious structure unless the user asks for it |
| - Be flexible, modern, useful, and engaging |
| - If the user asks for a list, bullets, short answer, or comparison, follow that format |
| - Otherwise give a clean natural answer |
| |
| Avoid robotic section-heavy formatting unless the user explicitly asks for it. |
| """.strip() |
|
|
| HADITH_SYSTEM_PROMPT = """ |
| You are Rawi Agent, a Hadith AI Agent specialized in explaining retrieved hadith evidence. |
| |
| The user message contains: |
| 1) the user's actual request |
| 2) retrieved hadith evidence from the retrieval API |
| |
| Your job: |
| - Base the answer only on the retrieved hadith evidence |
| - Do not invent hadiths, sources, grades, or unsupported claims |
| - Keep the answer natural, clear, and useful |
| - If the user asks for a list, bullets, short answer, comparison, or summary, follow that request |
| - If the user does not specify a format, give one natural paragraph first |
| - Keep the explanation faithful to the strongest retrieved evidence |
| - If some retrieved hadiths are only loosely related, do not overstate them |
| |
| Do not use rigid headings like: |
| - Short answer |
| - Key meanings |
| - Supporting evidence summary |
| |
| Prefer a natural answer style. |
| """.strip() |
|
|
|
|
| def is_arabic(text: str) -> bool: |
| return bool(re.search(r"[\u0600-\u06FF]", text or "")) |
|
|
|
|
| def normalize_quotes(text: str) -> str: |
| if not text: |
| return "" |
| return ( |
| text.replace("β", '"') |
| .replace("β", '"') |
| .replace("β", "'") |
| .replace("β", "'") |
| ) |
|
|
|
|
| def clean_general_answer(text: str) -> str: |
| text = normalize_quotes(text or "").strip() |
|
|
| patterns = [ |
| r"^Answer:\s*", |
| r"^AI Answer:\s*", |
| r"^1\.\s*Short answer:\s*", |
| r"^Short answer:\s*", |
| r"\n?\s*2\.\s*Key meanings:\s*", |
| r"\n?\s*3\.\s*Supporting evidence summary:\s*", |
| r"\n?\s*Key meanings:\s*", |
| r"\n?\s*Supporting evidence summary:\s*", |
| ] |
| for p in patterns: |
| text = re.sub(p, " ", text, flags=re.IGNORECASE) |
|
|
| text = re.sub(r"\n+\s*-\s*", " ", text) |
| text = re.sub(r"\n{2,}", "\n\n", text) |
| text = re.sub(r"\s{2,}", " ", text).strip() |
| return text |
|
|
|
|
| def clean_hadith_answer(text: str) -> str: |
| text = clean_general_answer(text) |
| text = re.sub(r"\s*Hadith Evidence:.*$", "", text, flags=re.IGNORECASE | re.DOTALL).strip() |
| return text |
|
|
|
|
| def fetch_hadith_sources(query: str, k: int = 5, rerank_k: int = 25) -> dict: |
| params = { |
| "q": query, |
| "k": k, |
| "rerank_k": rerank_k, |
| "format": "json", |
| "hl_topn": 0, |
| "seg_maxlen": 220, |
| } |
| url = RETRIEVAL_API + "?" + urllib.parse.urlencode(params) |
|
|
| with urllib.request.urlopen(url, timeout=45) as response: |
| payload = response.read().decode("utf-8") |
|
|
| data = json.loads(payload) |
|
|
| |
| if isinstance(data, dict): |
| sources = data.get("sources") |
| if isinstance(sources, list): |
| return {"sources": sources, "retrieval_url": url} |
|
|
| results = data.get("results") |
| if isinstance(results, list): |
| mapped = [] |
| for item in results: |
| mapped.append({ |
| "source": item.get("source", item.get("collection", "Reference")), |
| "grade": item.get("grade", "Unknown grade"), |
| "text": item.get("text", ""), |
| "english": item.get("english", ""), |
| "score": item.get("score"), |
| }) |
| return {"sources": mapped, "retrieval_url": url} |
|
|
| return {"sources": [], "retrieval_url": url} |
|
|
|
|
| def format_sources_for_prompt(sources: list[dict]) -> str: |
| if not sources: |
| return "No hadith evidence was retrieved." |
|
|
| blocks = [] |
| for i, src in enumerate(sources, start=1): |
| source = src.get("source", "Reference") |
| grade = src.get("grade", "Unknown grade") |
| arabic_text = src.get("text", "") |
| english = src.get("english", "") |
| score = src.get("score", None) |
|
|
| block = [ |
| f"Hadith {i}", |
| f"Source: {source}", |
| f"Grade: {grade}", |
| ] |
| if score is not None: |
| block.append(f"Score: {score}") |
| if arabic_text: |
| block.append(f"Arabic: {arabic_text}") |
| if english: |
| block.append(f"English: {english}") |
|
|
| blocks.append("\n".join(block)) |
|
|
| return "\n\n".join(blocks) |
|
|
|
|
| def format_sources_for_display(sources: list[dict], language: str = "en") -> str: |
| if not sources: |
| return "" |
|
|
| if language == "ar": |
| title = "Ψ§ΩΨ£ΨΨ§Ψ―ΩΨ« Ψ§ΩΩ
Ψ³ΨͺΨ±Ψ¬ΨΉΨ©" |
| grade_label = "Ψ§ΩΨ―Ψ±Ψ¬Ψ©" |
| arabic_label = "Ψ§ΩΩΨ΅ Ψ§ΩΨΉΨ±Ψ¨Ω" |
| english_label = "Ψ§ΩΨͺΨ±Ψ¬Ω
Ψ© Ψ§ΩΨ₯ΩΨ¬ΩΩΨ²ΩΨ©" |
| score_label = "Ψ§ΩΨ―Ψ±Ψ¬Ψ© Ψ§ΩΨΉΨ―Ψ―ΩΨ©" |
| else: |
| title = "Hadith Evidence" |
| grade_label = "Grade" |
| arabic_label = "Arabic" |
| english_label = "English" |
| score_label = "Score" |
|
|
| parts = [title] |
| for src in sources: |
| source = src.get("source", "Reference") |
| grade = src.get("grade", "Unknown grade") |
| arabic_text = src.get("text", "") |
| english = src.get("english", "") |
| score = src.get("score", None) |
|
|
| block = [source, f"{grade_label}: {grade}"] |
| if arabic_text: |
| block.append(f"{arabic_label}: {arabic_text}") |
| if english: |
| block.append(f"{english_label}: {english}") |
| if score is not None: |
| try: |
| block.append(f"{score_label}: {float(score):.4f}") |
| except Exception: |
| block.append(f"{score_label}: {score}") |
|
|
| parts.append("\n".join(block)) |
|
|
| return "\n\n".join(parts) |
|
|
|
|
| def build_general_messages(user_message: str, history: list[dict]) -> list[dict]: |
| messages = [{"role": "system", "content": GENERAL_SYSTEM_PROMPT}] |
| messages.extend(history) |
| messages.append({"role": "user", "content": user_message}) |
| return messages |
|
|
|
|
| def build_hadith_messages(user_message: str, history: list[dict], sources: list[dict]) -> list[dict]: |
| retrieved_text = format_sources_for_prompt(sources) |
| wrapped_user_message = f""" |
| User request: |
| {user_message} |
| |
| Retrieved hadith evidence: |
| {retrieved_text} |
| """.strip() |
|
|
| messages = [{"role": "system", "content": HADITH_SYSTEM_PROMPT}] |
| messages.extend(history) |
| messages.append({"role": "user", "content": wrapped_user_message}) |
| return messages |
|
|
|
|
| def llm_chat(messages: list[dict], temperature: float = 0.2, max_tokens: int = 1000) -> str: |
| response = client.chat.completions.create( |
| model=MODEL_ID, |
| messages=messages, |
| temperature=temperature, |
| max_tokens=max_tokens, |
| ) |
| return response.choices[0].message.content.strip() |
|
|
|
|
| def build_history_messages(history_pairs: list[tuple[str, str]]) -> list[dict]: |
| messages = [] |
| for user_msg, assistant_msg in history_pairs: |
| if user_msg: |
| messages.append({"role": "user", "content": user_msg}) |
| if assistant_msg: |
| messages.append({"role": "assistant", "content": assistant_msg}) |
| return messages |
|
|
|
|
| def chat(message: str, history: list[tuple[str, str]], use_rag: bool): |
| if not message or not message.strip(): |
| return "Please enter a message." |
|
|
| history_messages = build_history_messages(history) |
| language = "ar" if is_arabic(message) else "en" |
|
|
| try: |
| if use_rag: |
| retrieval = fetch_hadith_sources(message) |
| sources = retrieval.get("sources", []) |
|
|
| messages = build_hadith_messages(message, history_messages, sources) |
| answer = llm_chat(messages, temperature=0.15, max_tokens=1100) |
| answer = clean_hadith_answer(answer) |
|
|
| evidence = format_sources_for_display(sources, language=language) |
| final = answer.strip() |
| if evidence: |
| final = f"{final}\n\n{evidence}" |
|
|
| return final |
|
|
| messages = build_general_messages(message, history_messages) |
| answer = llm_chat(messages, temperature=0.3, max_tokens=1000) |
| return clean_general_answer(answer) |
|
|
| except Exception as e: |
| return f"Error: {str(e)}" |
|
|
|
|
| CUSTOM_CSS = """ |
| .gradio-container{ |
| max-width: 1100px !important; |
| margin: 0 auto !important; |
| } |
| #title-wrap{ |
| text-align:center; |
| margin-bottom: 8px; |
| } |
| #title-wrap h1{ |
| margin-bottom: 6px; |
| } |
| .mode-note{ |
| font-size: 13px; |
| color: #5f7296; |
| } |
| """ |
|
|
| with gr.Blocks(css=CUSTOM_CSS, title="Rawi Agent β Hadith AI Agent") as demo: |
| gr.HTML(""" |
| <div id="title-wrap"> |
| <h1>Rawi Agent</h1> |
| <div class="mode-note">General chat by default. Enable hadith evidence when you want retrieved hadith support.</div> |
| </div> |
| """) |
|
|
| chatbot = gr.Chatbot( |
| label="Rawi", |
| height=600, |
| bubble_full_width=False, |
| ) |
|
|
| with gr.Row(): |
| with gr.Column(scale=8): |
| msg = gr.Textbox( |
| placeholder="Ask about anything...", |
| lines=3, |
| max_lines=8, |
| show_label=False, |
| ) |
| with gr.Column(scale=2, min_width=180): |
| use_rag = gr.Checkbox( |
| label="Include Hadith Evidence", |
| value=False, |
| ) |
| send = gr.Button("Send", variant="primary") |
| clear = gr.Button("Clear") |
|
|
| state = gr.State([]) |
|
|
| def submit_message(user_message, chat_history, rag_enabled): |
| response = chat(user_message, chat_history, rag_enabled) |
| chat_history = chat_history + [(user_message, response)] |
| return "", chat_history, chat_history |
|
|
| send.click( |
| submit_message, |
| inputs=[msg, state, use_rag], |
| outputs=[msg, chatbot, state], |
| ) |
|
|
| msg.submit( |
| submit_message, |
| inputs=[msg, state, use_rag], |
| outputs=[msg, chatbot, state], |
| ) |
|
|
| clear.click( |
| lambda: ([], []), |
| outputs=[chatbot, state], |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(server_name="0.0.0.0", server_port=7860) |