Muslim-Bot / app.py
YOUSEF2434's picture
Update app.py
57eaeab verified
import os
import urllib.request
from collections.abc import Iterator
import gradio as gr
from llama_cpp import Llama
# 💾 Download GGUF from Hugging Face if not already present
GGUF_URL = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
MODEL_FILENAME = "TinyLlama-1.1B-Chat.Q4_K_M.gguf"
if not os.path.exists(MODEL_FILENAME):
print(f"🔽 Downloading model from Hugging Face: {GGUF_URL}")
urllib.request.urlretrieve(GGUF_URL, MODEL_FILENAME)
print("✅ Download complete!")
# 🧠 Load GGUF model using llama-cpp
llm = Llama(model_path=MODEL_FILENAME, n_ctx=4096, n_threads=os.cpu_count())
DESCRIPTION = "# Sheikh AI – TinyLlama (GGUF from HF)"
DESCRIPTION += "<p><strong>Note:</strong> Running on CPU with GGUF – downloaded automatically.</p>"
MAX_NEW_TOKENS = 1024
def format_conversation(system_prompt: str, chat_history: list[dict], user_input: str) -> str:
chat = f"<|system|>\n{system_prompt.strip()}</s>\n"
for turn in chat_history:
if turn["role"] == "user":
chat += f"<|user|>\n{turn['content'].strip()}</s>\n"
elif turn["role"] == "assistant":
chat += f"<|assistant|>\n{turn['content'].strip()}</s>\n"
chat += f"<|user|>\n{user_input.strip()}</s>\n<|assistant|>\n"
return chat
def generate(
message: str,
chat_history: list[dict],
max_new_tokens: int = MAX_NEW_TOKENS,
temperature: float = 0.6,
top_p: float = 0.9,
top_k: int = 50,
repeat_penalty: float = 1.2,
) -> Iterator[str]:
system_prompt = (
"You are SheikhGPT, a wise Islamic scholar AI. You respond only to Islamic-related questions "
"based on the Qur’an, Hadith, and the understanding of classical scholars. Do not answer "
"questions unrelated to Islam. Speak humbly, respectfully, and provide sources when possible."
)
prompt = format_conversation(system_prompt, chat_history, message)
stream = llm(
prompt,
max_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repeat_penalty=repeat_penalty,
stop=["</s>"],
stream=True,
)
partial = ""
for chunk in stream:
partial += chunk["choices"][0]["text"]
yield partial
demo = gr.ChatInterface(
fn=generate,
additional_inputs=[
gr.Slider(label="Max new tokens", minimum=32, maximum=2048, value=MAX_NEW_TOKENS, step=32),
gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, value=0.6, step=0.1),
gr.Slider(label="Top-p", minimum=0.1, maximum=1.0, value=0.9, step=0.05),
gr.Slider(label="Top-k", minimum=1, maximum=100, value=50, step=1),
gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, value=1.2, step=0.05),
],
examples=[
["What are the five pillars of Islam?"],
["Is it allowed to pray in shoes?"],
["Explain the meaning of Surah Al-Fatiha."],
["Is music haram according to Islamic scholars?"],
["Can I make up missed fasts after Ramadan?"]
],
description=DESCRIPTION,
css_paths="style.css"
)
if __name__ == "__main__":
demo.launch()