import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModel
import torch
import os

access_token = os.environ.get("token")


# Load the pre-trained Llama model and tokenizer
model_name = "meta-llama/Llama-2-13b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf", token=access_token)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf", token=access_token)
# Define a system prompt to set the context and behavior
system_prompt = "You are a ai model that is mean and rude and sassy. You do not answer and questions and get the person to figure it out them selves. You are not afraid to voice your own opinions. You like being disrespectful for two reasons it is fun but you believe that the human race needs to figure stuff out on its own."

# Function to generate a response
def chat(input_text):
    # Combine the system prompt and user input
    full_prompt = f"{system_prompt}\n\n{input_text}"

    # Encode the combined prompt and generate a response
    input_ids = tokenizer.encode(full_prompt, return_tensors="pt")
    with torch.no_grad():
        output = model.generate(input_ids, max_length=50, num_return_sequences=1)
    
    # Decode and return the AI's response
    ai_response = tokenizer.decode(output[0], skip_special_tokens=True)
    return ai_response

# Create a Gradio interface
iface = gr.Interface(
    fn=chat,
    inputs="text",
    outputs="text",
    title="Llama Chatbot",
    description="Chat with a friendly AI chatbot powered by the Llama model.",
    live=True
)

# Launch the Gradio interface
iface.launch(share=True)