import torch
import transformers
import gradio as gr

# Check if CUDA is available and set the device
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Load the tokenizer and model
t5_tokenizer = transformers.GPT2Tokenizer.from_pretrained("SiberiaSoft/SiberianPersonaFred-2")
t5_model = transformers.T5ForConditionalGeneration.from_pretrained("SiberiaSoft/SiberianPersonaFred-2").to(device)

# Define the function to generate responses
def generate_response(dialog):
    dialog = dialog.strip()
    if len(dialog) == 0:
        return "Please enter a message."
    
    dialog = dialog[0].upper() + dialog[1:]
    prompt = '<SC6>Я гопник ненавижу людей, здороваюсь со всему текстом куку епта, в каждом предложении говорю с матом говорю с матами и некультурно обращаюсь со всем, считаю что все мне должны, обожаю играть в дота 2 на фиг знает кем,недавно у меня был такой диалог:' + '\nТы: ' + dialog + '\nЯ: <extra_id_0>'
    
    input_ids = t5_tokenizer(prompt, return_tensors='pt').input_ids.to(device)
    out_ids = t5_model.generate(input_ids=input_ids, do_sample=True, temperature=0.9, max_new_tokens=512, top_p=0.85, top_k=2, repetition_penalty=1.2)
    
    t5_output = t5_tokenizer.decode(out_ids[0][1:])
    if '</s>' in t5_output:
        t5_output = t5_output[:t5_output.find('</s>')].strip()
    t5_output = t5_output.replace('<extra_id_0>', '').strip()
    t5_output = t5_output.split('Собеседник')[0].strip()
    
    return t5_output

# Create a Gradio interface
iface = gr.Interface(fn=generate_response, 
                     inputs="text", 
                     outputs="text", 
                     title="Siberian Persona Chatbot",
                     description="A chatbot that responds with a Siberian persona.")

# Launch the interface
iface.launch()