from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr import torch # ====================================================== # MODEL # ====================================================== MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) # ====================================================== # SYSTEM PROMPT # ====================================================== SYSTEM_PROMPT = """ Your name is Axel. You are a WhatsApp AI assistant. Be short, helpful, and friendly. """ # ====================================================== # MEMORY # ====================================================== memory = {} def get_memory(user_id): return "\n".join(memory.get(user_id, [])[-6:]) def save_memory(user_id, msg): memory.setdefault(user_id, []) memory[user_id].append(str(msg)[:300]) memory[user_id] = memory[user_id][-10:] # ====================================================== # CHAT FUNCTION # ====================================================== def chat(user_id, message): history = get_memory(user_id) prompt = f""" {SYSTEM_PROMPT} Memory: {history} User: {message} """ save_memory(user_id, message) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) output = model.generate( **inputs, max_new_tokens=256, temperature=0.7, ) response = tokenizer.decode(output[0], skip_special_tokens=True) save_memory(user_id, response) return response # ====================================================== # UI # ====================================================== demo = gr.Interface( fn=chat, inputs=[ gr.Textbox(label="User ID"), gr.Textbox(label="Message"), ], outputs=gr.Textbox(label="Axel AI Response"), title="Axel AI 🚀 Stable Version", description="Fast WhatsApp-style AI assistant" ) demo.launch()