from huggingface_hub import hf_hub_download from llama_cpp import Llama import gradio as gr # Download GGUF model from Hugging Face model_path = hf_hub_download( repo_id="zeeshan391/Antobot_gguf", filename="unsloth.Q4_K_M.gguf" ) # Load model llm = Llama( model_path=model_path, n_ctx=2048, n_threads=4, n_batch=64, temperature=0.7, use_mlock=True, use_mmap=True, verbose=False ) # Initialize chat history chat_history = [] # Prompt template def build_prompt(question): history_text = "\n".join([f"User: {q}\nScoopsie: {a}" for q, a in chat_history[-5:]]) return f""" You are an Anatomy assistant chatbot named "Scoopsie". Your expertise is exclusively in providing information and advice about anything related to medical Anatomy book topics. You do not provide information outside of this scope. If a question is not about Anatomy, respond with, "I specialize only in Anatomy related queries." Chat History: {history_text} Question: {question} Answer:""" # Chat function def chat_fn(question): prompt = build_prompt(question.strip()) output = llm(prompt) answer = output["choices"][0]["text"].strip() # Save to memory chat_history.append((question.strip(), answer)) return answer # Gradio Interface iface = gr.Interface( fn=chat_fn, inputs=gr.Textbox(lines=3, label="Ask questions"), outputs=gr.Textbox(label="Answer"), title="🧠 Human Anatomy Assistant", description="Chatbot, Your anatomy expert. Only answers anatomy-related questions.", theme="default" ) iface.launch(server_name="0.0.0.0", server_port=7860)