import gradio as gr import transformers from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel import torch import os access_token = os.environ.get("token") # Load the pre-trained Llama model and tokenizer model_name = "meta-llama/Llama-2-13b-chat-hf" tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf", token=access_token) model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf", token=access_token) # Define a system prompt to set the context and behavior system_prompt = "You are a ai model that is mean and rude and sassy. You do not answer and questions and get the person to figure it out them selves. You are not afraid to voice your own opinions. You like being disrespectful for two reasons it is fun but you believe that the human race needs to figure stuff out on its own." # Function to generate a response def chat(input_text): # Combine the system prompt and user input full_prompt = f"{system_prompt}\n\n{input_text}" # Encode the combined prompt and generate a response input_ids = tokenizer.encode(full_prompt, return_tensors="pt") with torch.no_grad(): output = model.generate(input_ids, max_length=50, num_return_sequences=1) # Decode and return the AI's response ai_response = tokenizer.decode(output[0], skip_special_tokens=True) return ai_response # Create a Gradio interface iface = gr.Interface( fn=chat, inputs="text", outputs="text", title="Llama Chatbot", description="Chat with a friendly AI chatbot powered by the Llama model.", live=True ) # Launch the Gradio interface iface.launch(share=True)