from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b") def chatbot_response(input_text): inputs = tokenizer(input_text, return_tensors="pt") outputs = model.generate(inputs['input_ids'], max_length=200, num_return_sequences=1) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response interface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", live=True) interface.launch()