from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr

# Download GGUF model from Hugging Face
model_path = hf_hub_download(
    repo_id="zeeshan391/Antobot_gguf",
    filename="unsloth.Q4_K_M.gguf"
)

# Load model
llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4,
    n_batch=64,
    temperature=0.7,
    use_mlock=True,
    use_mmap=True,
    verbose=False
)

# Initialize chat history
chat_history = []

# Prompt template
def build_prompt(question):
    history_text = "\n".join([f"User: {q}\nScoopsie: {a}" for q, a in chat_history[-5:]])
    return f"""
You are an Anatomy assistant chatbot named "Scoopsie". Your expertise is 
exclusively in providing information and advice about anything related to 
medical Anatomy book topics. You do not provide information outside of this 
scope. If a question is not about Anatomy, respond with, "I specialize only in Anatomy related queries."

Chat History: {history_text}
Question: {question}
Answer:"""

# Chat function
def chat_fn(question):
    prompt = build_prompt(question.strip())
    output = llm(prompt)
    answer = output["choices"][0]["text"].strip()

    # Save to memory
    chat_history.append((question.strip(), answer))
    return answer

# Gradio Interface
iface = gr.Interface(
    fn=chat_fn,
    inputs=gr.Textbox(lines=3, label="Ask questions"),
    outputs=gr.Textbox(label="Answer"),
    title="🧠 Human Anatomy Assistant",
    description="Chatbot, Your anatomy expert. Only answers anatomy-related questions.",
    theme="default"
)

iface.launch(server_name="0.0.0.0", server_port=7860)