Llama-135M-FT / test_inference.py
MebinThattil's picture
Upload folder using huggingface_hub
ed133b5 verified
from transformers import LlamaForCausalLM, AutoTokenizer
import torch
model_path = "./result"
model = LlamaForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
def chat():
print("Chat")
print("Type 'exit' to quit.")
chat_history = ""
while True:
user_input = input("You: ").strip()
if user_input.lower() in ["exit", "quit"]:
print("Bye")
break
chat_history += f"You: {user_input}\n"
prompt = chat_history + "Bot:"
inputs = tokenizer(prompt, add_special_tokens=False, return_tensors="pt").to(model.device)
with torch.no_grad():
tokens = model.generate(**inputs, max_new_tokens=150, do_sample=True, temperature=0.7, top_p=0.9)
response = tokenizer.decode(tokens[0], skip_special_tokens=True)
bot_response = response[len(prompt):].strip()
print(f"Bot: {bot_response}")
chat_history += f"Bot: {bot_response}\n"
if __name__ == "__main__":
chat()