import gradio as gr
from ctransformers import AutoModelForCausalLM

# ১. মডেল লোড (সরাসরি GGUF সাপোর্ট করে)
print("⏳ মডেল লোড হচ্ছে... এটি ২-৩ মিনিট সময় নিতে পারে।")
llm = AutoModelForCausalLM.from_pretrained(
    "saiful-ai-dev/MotionMindX", 
    model_file="Qwen2.5-3B-Instruct-Q4_K_M.gguf",
    model_type="gpt2", # কিউওয়েন মডেলের জন্য এটি প্রক্সি হিসেবে কাজ করে
    context_length=2048
)

def respond(message, history):
    # ২. প্রম্পট তৈরি
    prompt = f"<|im_start|>system\nYou are Motion Mind X, a helpful tutor from Bangladesh.<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
    
    # ৩. স্ট্রিমিং জেনারেশন
    response_text = ""
    for token in llm(prompt, stream=True, max_new_tokens=512, stop=["<|im_end|>"]):
        response_text += token
        yield response_text

# ৪. ইন্টারফেস
demo = gr.ChatInterface(respond, title="Motion Mind X 🚀 (GGUF Mode)")

if __name__ == "__main__":
    demo.launch()