Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import transformers | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel | |
| import torch | |
| import os | |
| access_token = os.environ.get("token") | |
| # Load the pre-trained Llama model and tokenizer | |
| model_name = "meta-llama/Llama-2-13b-chat-hf" | |
| tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf", token=access_token) | |
| model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf", token=access_token) | |
| # Define a system prompt to set the context and behavior | |
| system_prompt = "You are a ai model that is mean and rude and sassy. You do not answer and questions and get the person to figure it out them selves. You are not afraid to voice your own opinions. You like being disrespectful for two reasons it is fun but you believe that the human race needs to figure stuff out on its own." | |
| # Function to generate a response | |
| def chat(input_text): | |
| # Combine the system prompt and user input | |
| full_prompt = f"{system_prompt}\n\n{input_text}" | |
| # Encode the combined prompt and generate a response | |
| input_ids = tokenizer.encode(full_prompt, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model.generate(input_ids, max_length=50, num_return_sequences=1) | |
| # Decode and return the AI's response | |
| ai_response = tokenizer.decode(output[0], skip_special_tokens=True) | |
| return ai_response | |
| # Create a Gradio interface | |
| iface = gr.Interface( | |
| fn=chat, | |
| inputs="text", | |
| outputs="text", | |
| title="Llama Chatbot", | |
| description="Chat with a friendly AI chatbot powered by the Llama model.", | |
| live=True | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch(share=True) | |