Spaces:
Sleeping
Sleeping
| import os | |
| import urllib.request | |
| from collections.abc import Iterator | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| # 💾 Download GGUF from Hugging Face if not already present | |
| GGUF_URL = "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" | |
| MODEL_FILENAME = "TinyLlama-1.1B-Chat.Q4_K_M.gguf" | |
| if not os.path.exists(MODEL_FILENAME): | |
| print(f"🔽 Downloading model from Hugging Face: {GGUF_URL}") | |
| urllib.request.urlretrieve(GGUF_URL, MODEL_FILENAME) | |
| print("✅ Download complete!") | |
| # 🧠 Load GGUF model using llama-cpp | |
| llm = Llama(model_path=MODEL_FILENAME, n_ctx=4096, n_threads=os.cpu_count()) | |
| DESCRIPTION = "# Sheikh AI – TinyLlama (GGUF from HF)" | |
| DESCRIPTION += "<p><strong>Note:</strong> Running on CPU with GGUF – downloaded automatically.</p>" | |
| MAX_NEW_TOKENS = 1024 | |
| def format_conversation(system_prompt: str, chat_history: list[dict], user_input: str) -> str: | |
| chat = f"<|system|>\n{system_prompt.strip()}</s>\n" | |
| for turn in chat_history: | |
| if turn["role"] == "user": | |
| chat += f"<|user|>\n{turn['content'].strip()}</s>\n" | |
| elif turn["role"] == "assistant": | |
| chat += f"<|assistant|>\n{turn['content'].strip()}</s>\n" | |
| chat += f"<|user|>\n{user_input.strip()}</s>\n<|assistant|>\n" | |
| return chat | |
| def generate( | |
| message: str, | |
| chat_history: list[dict], | |
| max_new_tokens: int = MAX_NEW_TOKENS, | |
| temperature: float = 0.6, | |
| top_p: float = 0.9, | |
| top_k: int = 50, | |
| repeat_penalty: float = 1.2, | |
| ) -> Iterator[str]: | |
| system_prompt = ( | |
| "You are SheikhGPT, a wise Islamic scholar AI. You respond only to Islamic-related questions " | |
| "based on the Qur’an, Hadith, and the understanding of classical scholars. Do not answer " | |
| "questions unrelated to Islam. Speak humbly, respectfully, and provide sources when possible." | |
| ) | |
| prompt = format_conversation(system_prompt, chat_history, message) | |
| stream = llm( | |
| prompt, | |
| max_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| top_k=top_k, | |
| repeat_penalty=repeat_penalty, | |
| stop=["</s>"], | |
| stream=True, | |
| ) | |
| partial = "" | |
| for chunk in stream: | |
| partial += chunk["choices"][0]["text"] | |
| yield partial | |
| demo = gr.ChatInterface( | |
| fn=generate, | |
| additional_inputs=[ | |
| gr.Slider(label="Max new tokens", minimum=32, maximum=2048, value=MAX_NEW_TOKENS, step=32), | |
| gr.Slider(label="Temperature", minimum=0.1, maximum=2.0, value=0.6, step=0.1), | |
| gr.Slider(label="Top-p", minimum=0.1, maximum=1.0, value=0.9, step=0.05), | |
| gr.Slider(label="Top-k", minimum=1, maximum=100, value=50, step=1), | |
| gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, value=1.2, step=0.05), | |
| ], | |
| examples=[ | |
| ["What are the five pillars of Islam?"], | |
| ["Is it allowed to pray in shoes?"], | |
| ["Explain the meaning of Surah Al-Fatiha."], | |
| ["Is music haram according to Islamic scholars?"], | |
| ["Can I make up missed fasts after Ramadan?"] | |
| ], | |
| description=DESCRIPTION, | |
| css_paths="style.css" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |