from huggingface_hub import login login(token="seu_token_aqui") %%capture !pip install unsloth transformers #dependência do modelo: biblioteca unsloth e transformers from unsloth import FastLanguageModel from transformers import TextStreamer def initialize_model(): model, tokenizer = FastLanguageModel.from_pretrained( model_name="Ey-luccas/modelo_tc", max_seq_length=10000, dtype="float16", load_in_4bit=True ) FastLanguageModel.for_inference(model) return model, tokenizer def chat_with_model(model, tokenizer): messages = [] # Histórico de mensagens text_streamer = TextStreamer(tokenizer, skip_prompt=True) print("\nBem-vindo ao Chat com o Modelo! Digite 'sair' para encerrar.\n") while True: user_input = input("Você: ") if user_input.lower() == "sair": print("Encerrando o chat. Até logo!") break messages.append({"role": "user", "content": user_input}) inputs = tokenizer.apply_chat_template( messages, tokenize=True, add_generation_prompt=True, return_tensors="pt", ).to("cuda") # Gera a resposta print("Modelo:", end=" ") _ = model.generate( input_ids=inputs, streamer=text_streamer, max_new_tokens=10000, use_cache=True, temperature=1.0, min_p=0.1 ) if __name__ == "__main__": model, tokenizer = initialize_model() chat_with_model(model, tokenizer)