import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the pre-trained Llama model and tokenizer model_name = "meta-llama/Llama-2-13b-chat-hf" tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf") model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf") # Define a system prompt to set the context and behavior system_prompt = "You are chatting with a friendly AI. Ask me anything!" # Function to generate a response def chat(input_text): # Combine the system prompt and user input full_prompt = f"{system_prompt}\n\n{input_text}" # Encode the combined prompt and generate a response input_ids = tokenizer.encode(full_prompt, return_tensors="pt") with torch.no_grad(): output = model.generate(input_ids, max_length=50, num_return_sequences=1) # Decode and return the AI's response ai_response = tokenizer.decode(output[0], skip_special_tokens=True) return ai_response # Create a Gradio interface iface = gr.Interface( fn=chat, inputs="text", outputs="text", title="Llama Chatbot", description="Chat with a friendly AI chatbot powered by the Llama model.", live=True ) # Launch the Gradio interface iface.launch()