import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer # Load Llama 2 model model_name = "meta-llama/Llama-2-7b-chat-hf" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True) # Set pad token tokenizer.pad_token = tokenizer.eos_token # Define chatbot function def chat(user_input): inputs = tokenizer(user_input, return_tensors="pt", padding=True) output = model.generate(inputs.input_ids, max_length=128, do_sample=True, temperature=0.6) return tokenizer.decode(output[0], skip_special_tokens=True) # Create Gradio UI iface = gr.Interface(fn=chat, inputs="text", outputs="text", title="Llama 2 Chatbot") iface.launch()