Spaces:

YOUSEF2434
/

Muslim-Bot

Sleeping

App Files Files Community

Muslim-Bot / app.py

YOUSEF2434

Update app.py

57eaeab verified 6 months ago

raw

history blame contribute delete

3.22 kB

	import os
	import urllib.request
	from collections.abc import Iterator
	import gradio as gr
	from llama_cpp import Llama

	# 💾 Download GGUF from Hugging Face if not already present
	GGUF_URL = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
	MODEL_FILENAME = "TinyLlama-1.1B-Chat.Q4_K_M.gguf"

	if not os.path.exists(MODEL_FILENAME):
	print(f"🔽 Downloading model from Hugging Face: {GGUF_URL}")
	urllib.request.urlretrieve(GGUF_URL, MODEL_FILENAME)
	print("✅ Download complete!")

	# 🧠 Load GGUF model using llama-cpp
	llm = Llama(model_path=MODEL_FILENAME, n_ctx=4096, n_threads=os.cpu_count())

	DESCRIPTION = "# Sheikh AI – TinyLlama (GGUF from HF)"
	DESCRIPTION += "<p><strong>Note:</strong> Running on CPU with GGUF – downloaded automatically.</p>"

	MAX_NEW_TOKENS = 1024

	def format_conversation(system_prompt: str, chat_history: list[dict], user_input: str) -> str:
	chat = f"<\|system\|>\n{system_prompt.strip()}</s>\n"
	for turn in chat_history:
	if turn["role"] == "user":
	chat += f"<\|user\|>\n{turn['content'].strip()}</s>\n"
	elif turn["role"] == "assistant":
	chat += f"<\|assistant\|>\n{turn['content'].strip()}</s>\n"
	chat += f"<\|user\|>\n{user_input.strip()}</s>\n<\|assistant\|>\n"
	return chat

	def generate(
	message: str,
	chat_history: list[dict],
	max_new_tokens: int = MAX_NEW_TOKENS,
	temperature: float = 0.6,
	top_p: float = 0.9,
	top_k: int = 50,
	repeat_penalty: float = 1.2,
	) -> Iterator[str]:
	system_prompt = (
	"You are SheikhGPT, a wise Islamic scholar AI. You respond only to Islamic-related questions "
	"based on the Qur’an, Hadith, and the understanding of classical scholars. Do not answer "
	"questions unrelated to Islam. Speak humbly, respectfully, and provide sources when possible."
	)

	prompt = format_conversation(system_prompt, chat_history, message)

	stream = llm(
	prompt,
	max_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repeat_penalty=repeat_penalty,
	stop=["</s>"],
	stream=True,
	)

	partial = ""
	for chunk in stream:
	partial += chunk["choices"][0]["text"]
	yield partial


	demo = gr.ChatInterface(
	fn=generate,
	additional_inputs=[
	gr.Slider(label="Max new tokens", minimum=32, maximum=2048, value=MAX_NEW_TOKENS, step=32),
	gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, value=0.6, step=0.1),
	gr.Slider(label="Top-p", minimum=0.1, maximum=1.0, value=0.9, step=0.05),
	gr.Slider(label="Top-k", minimum=1, maximum=100, value=50, step=1),
	gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, value=1.2, step=0.05),
	],
	examples=[
	["What are the five pillars of Islam?"],
	["Is it allowed to pray in shoes?"],
	["Explain the meaning of Surah Al-Fatiha."],
	["Is music haram according to Islamic scholars?"],
	["Can I make up missed fasts after Ramadan?"]
	],
	description=DESCRIPTION,
	css_paths="style.css"
	)

	if __name__ == "__main__":
	demo.launch()