Spaces:
Sleeping
Sleeping
File size: 3,219 Bytes
ac40f97 2152916 ac40f97 68b3e68 ac40f97 2152916 57eaeab 2152916 7b3a1dd 2152916 ac40f97 68b3e68 ac40f97 68b3e68 ac40f97 68b3e68 ac40f97 68b3e68 ac40f97 68b3e68 7b3a1dd 68b3e68 ac40f97 68b3e68 ac40f97 68b3e68 ac40f97 7b3a1dd ac40f97 68b3e68 ac40f97 68b3e68 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import urllib.request
from collections.abc import Iterator
import gradio as gr
from llama_cpp import Llama
# 💾 Download GGUF from Hugging Face if not already present
GGUF_URL = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
MODEL_FILENAME = "TinyLlama-1.1B-Chat.Q4_K_M.gguf"
if not os.path.exists(MODEL_FILENAME):
print(f"🔽 Downloading model from Hugging Face: {GGUF_URL}")
urllib.request.urlretrieve(GGUF_URL, MODEL_FILENAME)
print("✅ Download complete!")
# 🧠 Load GGUF model using llama-cpp
llm = Llama(model_path=MODEL_FILENAME, n_ctx=4096, n_threads=os.cpu_count())
DESCRIPTION = "# Sheikh AI – TinyLlama (GGUF from HF)"
DESCRIPTION += "<p><strong>Note:</strong> Running on CPU with GGUF – downloaded automatically.</p>"
MAX_NEW_TOKENS = 1024
def format_conversation(system_prompt: str, chat_history: list[dict], user_input: str) -> str:
chat = f"<|system|>\n{system_prompt.strip()}</s>\n"
for turn in chat_history:
if turn["role"] == "user":
chat += f"<|user|>\n{turn['content'].strip()}</s>\n"
elif turn["role"] == "assistant":
chat += f"<|assistant|>\n{turn['content'].strip()}</s>\n"
chat += f"<|user|>\n{user_input.strip()}</s>\n<|assistant|>\n"
return chat
def generate(
message: str,
chat_history: list[dict],
max_new_tokens: int = MAX_NEW_TOKENS,
temperature: float = 0.6,
top_p: float = 0.9,
top_k: int = 50,
repeat_penalty: float = 1.2,
) -> Iterator[str]:
system_prompt = (
"You are SheikhGPT, a wise Islamic scholar AI. You respond only to Islamic-related questions "
"based on the Qur’an, Hadith, and the understanding of classical scholars. Do not answer "
"questions unrelated to Islam. Speak humbly, respectfully, and provide sources when possible."
)
prompt = format_conversation(system_prompt, chat_history, message)
stream = llm(
prompt,
max_tokens=max_new_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
repeat_penalty=repeat_penalty,
stop=["</s>"],
stream=True,
)
partial = ""
for chunk in stream:
partial += chunk["choices"][0]["text"]
yield partial
demo = gr.ChatInterface(
fn=generate,
additional_inputs=[
gr.Slider(label="Max new tokens", minimum=32, maximum=2048, value=MAX_NEW_TOKENS, step=32),
gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, value=0.6, step=0.1),
gr.Slider(label="Top-p", minimum=0.1, maximum=1.0, value=0.9, step=0.05),
gr.Slider(label="Top-k", minimum=1, maximum=100, value=50, step=1),
gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, value=1.2, step=0.05),
],
examples=[
["What are the five pillars of Islam?"],
["Is it allowed to pray in shoes?"],
["Explain the meaning of Surah Al-Fatiha."],
["Is music haram according to Islamic scholars?"],
["Can I make up missed fasts after Ramadan?"]
],
description=DESCRIPTION,
css_paths="style.css"
)
if __name__ == "__main__":
demo.launch()
|