import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

model_path = hf_hub_download(repo_id="Ansnaeem/tinyllama-peft-gguf", filename="tinyllama-ans.gguf")

llm = Llama(model_path=model_path, n_ctx=2048)

def respond(message, history):
    prompt = "<|system|>\nYou are a helpful assistant who answers questions about Muhammad Ans based on his resume.</s>\n"
    for user, assistant in history:
        prompt += f"<|user|>\n{user}</s>\n"
        prompt += f"<|assistant|>\n{assistant}</s>\n"
    prompt += f"<|user|>\n{message}</s>\n<|assistant|>\n"
    
    response = llm(prompt, max_tokens=300, temperature=0.1, stop=["</s>", "<|user|>"])
    return response['choices'][0]['text']

demo = gr.ChatInterface(respond, title="Muhammad Ans AI Assistant", description="Ask me anything about Muhammad Ans.")
demo.launch()