import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline model_id = "psy191190/LimitlessAGI" # e.g. your Llama 3.2 3B Instruct tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=1024) def respond(history): messages = [] for msg in history: messages.append({"role": msg["role"], "content": msg["content"]}) prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) output = pipe(prompt)[0]["generated_text"] response = output[len(prompt):].strip() yield response with gr.Blocks() as demo: gr.ChatInterface(respond) demo.queue().launch()