| from transformers import AutoModelForCausalLM, AutoTokenizer |
| import gradio as gr |
| import torch |
|
|
| |
| |
| |
|
|
| MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct" |
|
|
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_NAME, |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
| device_map="auto" |
| ) |
|
|
| |
| |
| |
|
|
| SYSTEM_PROMPT = """ |
| Your name is Axel. You are a WhatsApp AI assistant. |
| Be short, helpful, and friendly. |
| """ |
|
|
| |
| |
| |
|
|
| memory = {} |
|
|
| def get_memory(user_id): |
| return "\n".join(memory.get(user_id, [])[-6:]) |
|
|
| def save_memory(user_id, msg): |
| memory.setdefault(user_id, []) |
| memory[user_id].append(str(msg)[:300]) |
| memory[user_id] = memory[user_id][-10:] |
|
|
| |
| |
| |
|
|
| def chat(user_id, message): |
|
|
| history = get_memory(user_id) |
|
|
| prompt = f""" |
| {SYSTEM_PROMPT} |
| |
| Memory: |
| {history} |
| |
| User: |
| {message} |
| """ |
|
|
| save_memory(user_id, message) |
|
|
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
|
| output = model.generate( |
| **inputs, |
| max_new_tokens=256, |
| temperature=0.7, |
| ) |
|
|
| response = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
| save_memory(user_id, response) |
|
|
| return response |
|
|
| |
| |
| |
|
|
| demo = gr.Interface( |
| fn=chat, |
| inputs=[ |
| gr.Textbox(label="User ID"), |
| gr.Textbox(label="Message"), |
| ], |
| outputs=gr.Textbox(label="Axel AI Response"), |
| title="Axel AI 🚀 Stable Version", |
| description="Fast WhatsApp-style AI assistant" |
| ) |
|
|
| demo.launch() |