--- base_model: - unsloth/Llama-3.2-1B-Instruct library_name: peft license: apache-2.0 language: - en pipeline_tag: text-generation --- ## Model Details - **Developed by:** HackWeasel - **Funded by:** GT Edge AI - **Model type:** LLM - **Language(s) (NLP):** English - **License:** Apache license 2.0 - **Finetuned from model:** unsloth/llama3.2-1b-instruct ## Uses Ask questions about movies which have been rated on IMDB ## How to Get Started with the Model Use the code below to get started with the model. ``` Python from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Set device device = "cuda" if torch.cuda.is_available() else "cpu" def load_model(base_model_id, adapter_model_id): print("Loading models...") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(base_model_id) # Load base model (using model's built-in quantization) base_model = AutoModelForCausalLM.from_pretrained( base_model_id, device_map="auto", low_cpu_mem_usage=True ) # Load the PEFT model model = PeftModel.from_pretrained( base_model, adapter_model_id, device_map="auto" ) model.eval() print("Models loaded!") return model, tokenizer def generate_response(model, tokenizer, prompt, max_length=4096, temperature=0.7): with torch.no_grad(): inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( **inputs, max_length=max_length, temperature=temperature, do_sample=True, top_p=0.95, top_k=40, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id ) return tokenizer.decode(outputs[0], skip_special_tokens=True) def main(): model, tokenizer = load_model( "unsloth/llama-3.2-1b-instruct-bnb-4bit", "HackWeasel/llama-3.2-1b-QLORA-IMDB" ) conversation_history = "" print("\nWelcome! Start chatting with the model (type 'quit' to exit)") print("Note: This model is fine-tuned on IMDB reviews data") while True: try: user_input = input("\nYou: ").strip() if user_input.lower() == 'quit': print("Goodbye!") break if conversation_history: full_prompt = f"{conversation_history}\nHuman: {user_input}\nAssistant:" else: full_prompt = f"Human: {user_input}\nAssistant:" response = generate_response(model, tokenizer, full_prompt) new_response = response.split("Assistant:")[-1].strip() conversation_history = f"{conversation_history}\nHuman: {user_input}\nAssistant: {new_response}" print("\nAssistant:", new_response) except Exception as e: print(f"An error occurred: {e}") print("Continuing conversation...") if __name__ == "__main__": main() ``` ### Training Data datasets/mteb/imdb/tree/main/test.jsonl ### Training Procedure QLoRA via unsloth - PEFT 0.14.0