import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch MODEL_NAME = "fibonacciai/RealRobot-Chatbot-Ecommerce-Robot-Fibonacci-Nano-llm" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16, device_map="auto" ) def chat(message, history): messages = "" for user_msg, bot_msg in history: messages += f"User: {user_msg}\nAssistant: {bot_msg}\n" messages += f"User: {message}\nAssistant:" inputs = tokenizer(messages, return_tensors="pt").to(model.device) output = model.generate( **inputs, max_new_tokens=500, temperature=0.7, do_sample=True ) answer = tokenizer.decode(output[0], skip_special_tokens=True) answer = answer.split("Assistant:")[-1].strip() return answer gr.ChatInterface( fn=chat, title="RealRobot Ecommerce Chatbot", description="Powered by Fibonacci Nano LLM" ).launch()