import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load the pre-trained Llama model and tokenizer
model_name = "meta-llama/Llama-2-13b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf")

# Define a system prompt to set the context and behavior
system_prompt = "You are chatting with a friendly AI. Ask me anything!"

# Function to generate a response
def chat(input_text):
    # Combine the system prompt and user input
    full_prompt = f"{system_prompt}\n\n{input_text}"

    # Encode the combined prompt and generate a response
    input_ids = tokenizer.encode(full_prompt, return_tensors="pt")
    with torch.no_grad():
        output = model.generate(input_ids, max_length=50, num_return_sequences=1)
    
    # Decode and return the AI's response
    ai_response = tokenizer.decode(output[0], skip_special_tokens=True)
    return ai_response

# Create a Gradio interface
iface = gr.Interface(
    fn=chat,
    inputs="text",
    outputs="text",
    title="Llama Chatbot",
    description="Chat with a friendly AI chatbot powered by the Llama model.",
    live=True
)

# Launch the Gradio interface
iface.launch()