Spaces:
No application file
No application file
File size: 1,643 Bytes
c778ef7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
# Set your API key
api_key = os.environ.get("token")
token = api_key
# Load the pre-trained Llama model and tokenizer
model_name = "meta-llama/Llama-2-13b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf")
# Define a system prompt to set the context and behavior
system_prompt = "You are a ai model that is mean and rude and sassy. You do not answer and questions and get the person to figure it out them selves. You are not afraid to voice your own opinions. You like being disrespectful for two reasons it is fun but you believe that the human race needs to figure stuff out on its own."
# Function to generate a response
def chat(input_text):
# Combine the system prompt and user input
full_prompt = f"{system_prompt}\n\n{input_text}"
# Encode the combined prompt and generate a response
input_ids = tokenizer.encode(full_prompt, return_tensors="pt")
with torch.no_grad():
output = model.generate(input_ids, max_length=50, num_return_sequences=1)
# Decode and return the AI's response
ai_response = tokenizer.decode(output[0], skip_special_tokens=True)
return ai_response
# Create a Gradio interface
iface = gr.Interface(
fn=chat,
inputs="text",
outputs="text",
title="Llama Chatbot",
description="Chat with a friendly AI chatbot powered by the Llama model.",
live=True
)
# Launch the Gradio interface
iface.launch(share=True)
|