from transformers import LlamaForCausalLM, AutoTokenizer
import torch

model_path = "./result"  

model = LlamaForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

def chat():
    print("Chat")
    print("Type 'exit' to quit.")
    
    chat_history = "" 
    
    while True:
        user_input = input("You: ").strip()

        if user_input.lower() in ["exit", "quit"]:
            print("Bye")
            break
        
        chat_history += f"You: {user_input}\n"
        prompt = chat_history + "Bot:"
        inputs = tokenizer(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)

        with torch.no_grad():
            tokens = model.generate(**inputs, max_new_tokens=150, do_sample=True, temperature=0.7, top_p=0.9)

        response = tokenizer.decode(tokens[0], skip_special_tokens=True)
        bot_response = response[len(prompt):].strip()
        print(f"Bot: {bot_response}")
        
        chat_history += f"Bot: {bot_response}\n"

if __name__ == "__main__":
    chat()