import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama model_path = hf_hub_download(repo_id="Ansnaeem/tinyllama-peft-gguf", filename="tinyllama-ans.gguf") llm = Llama(model_path=model_path, n_ctx=2048) def respond(message, history): prompt = "<|system|>\nYou are a helpful assistant who answers questions about Muhammad Ans based on his resume.\n" for user, assistant in history: prompt += f"<|user|>\n{user}\n" prompt += f"<|assistant|>\n{assistant}\n" prompt += f"<|user|>\n{message}\n<|assistant|>\n" response = llm(prompt, max_tokens=300, temperature=0.1, stop=["", "<|user|>"]) return response['choices'][0]['text'] demo = gr.ChatInterface(respond, title="Muhammad Ans AI Assistant", description="Ask me anything about Muhammad Ans.") demo.launch()