File size: 1,595 Bytes
0188a3d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | from huggingface_hub import login
login(token="seu_token_aqui")
%%capture
!pip install unsloth transformers #dependência do modelo: biblioteca unsloth e transformers
from unsloth import FastLanguageModel
from transformers import TextStreamer
def initialize_model():
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="Ey-luccas/modelo_tc",
max_seq_length=10000,
dtype="float16",
load_in_4bit=True
)
FastLanguageModel.for_inference(model)
return model, tokenizer
def chat_with_model(model, tokenizer):
messages = [] # Histórico de mensagens
text_streamer = TextStreamer(tokenizer, skip_prompt=True)
print("\nBem-vindo ao Chat com o Modelo! Digite 'sair' para encerrar.\n")
while True:
user_input = input("Você: ")
if user_input.lower() == "sair":
print("Encerrando o chat. Até logo!")
break
messages.append({"role": "user", "content": user_input})
inputs = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
).to("cuda")
# Gera a resposta
print("Modelo:", end=" ")
_ = model.generate(
input_ids=inputs,
streamer=text_streamer,
max_new_tokens=10000,
use_cache=True,
temperature=1.0,
min_p=0.1
)
if __name__ == "__main__":
model, tokenizer = initialize_model()
chat_with_model(model, tokenizer)
|