| from transformers import AutoTokenizer, TextStreamer |
| from unsloth import FastLanguageModel |
| import torch |
|
|
| |
| model_name = "Rafay17/Llama3.2_1b_customModle2" |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| model = FastLanguageModel.from_pretrained( |
| model_name=model_name, |
| max_seq_length=512, |
| dtype="float16", |
| load_in_4bit=True |
| ) |
|
|
| FastLanguageModel.for_inference(model) |
|
|
| |
| def generate_response(input_text): |
| |
| labeled_prompt = f"User Input: {input_text}\nResponse:" |
|
|
| |
| inputs = tokenizer( |
| [labeled_prompt], |
| return_tensors="pt", |
| padding=True, |
| truncation=True, |
| max_length=512, |
| ).to("cuda") |
|
|
| |
| text_streamer = TextStreamer(tokenizer, skip_prompt=True) |
|
|
| |
| with torch.no_grad(): |
| model.generate( |
| input_ids=inputs.input_ids, |
| attention_mask=inputs.attention_mask, |
| streamer=text_streamer, |
| max_new_tokens=100, |
| pad_token_id=tokenizer.eos_token_id, |
| ) |
|
|
| |
| def user_interaction(): |
| print("Welcome to the Chatbot! Type 'exit' to quit.") |
| while True: |
| user_input = input("You: ") |
| if user_input.lower() == 'exit': |
| print("Exiting the chatbot. Goodbye!") |
| break |
| print("Chatbot is generating a response...") |
| generate_response(user_input) |
|
|
| |
| user_interaction() |
|
|