File size: 3,161 Bytes
17dec92 390ac47 cfe5aa7 17dec92 4ab6397 9e75c73 17dec92 9e75c73 17dec92 9e75c73 17dec92 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc e031ec9 3e2e5dc 17dec92 d1bacd4 17dec92 9e75c73 17dec92 390ac47 17dec92 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | ---
base_model:
- unsloth/Llama-3.2-1B-Instruct
library_name: peft
license: apache-2.0
language:
- en
pipeline_tag: text-generation
---
## Model Details
- **Developed by:** HackWeasel
- **Funded by:** GT Edge AI
- **Model type:** LLM
- **Language(s) (NLP):** English
- **License:** Apache license 2.0
- **Finetuned from model:** unsloth/llama3.2-1b-instruct
## Uses
Ask questions about movies which have been rated on IMDB
## How to Get Started with the Model
Use the code below to get started with the model.
``` Python
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
def load_model(base_model_id, adapter_model_id):
print("Loading models...")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
# Load base model (using model's built-in quantization)
base_model = AutoModelForCausalLM.from_pretrained(
base_model_id,
device_map="auto",
low_cpu_mem_usage=True
)
# Load the PEFT model
model = PeftModel.from_pretrained(
base_model,
adapter_model_id,
device_map="auto"
)
model.eval()
print("Models loaded!")
return model, tokenizer
def generate_response(model, tokenizer, prompt, max_length=4096, temperature=0.7):
with torch.no_grad():
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_length=max_length,
temperature=temperature,
do_sample=True,
top_p=0.95,
top_k=40,
num_return_sequences=1,
pad_token_id=tokenizer.eos_token_id
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
def main():
model, tokenizer = load_model(
"unsloth/llama-3.2-1b-instruct-bnb-4bit",
"HackWeasel/llama-3.2-1b-QLORA-IMDB"
)
conversation_history = ""
print("\nWelcome! Start chatting with the model (type 'quit' to exit)")
print("Note: This model is fine-tuned on IMDB reviews data")
while True:
try:
user_input = input("\nYou: ").strip()
if user_input.lower() == 'quit':
print("Goodbye!")
break
if conversation_history:
full_prompt = f"{conversation_history}\nHuman: {user_input}\nAssistant:"
else:
full_prompt = f"Human: {user_input}\nAssistant:"
response = generate_response(model, tokenizer, full_prompt)
new_response = response.split("Assistant:")[-1].strip()
conversation_history = f"{conversation_history}\nHuman: {user_input}\nAssistant: {new_response}"
print("\nAssistant:", new_response)
except Exception as e:
print(f"An error occurred: {e}")
print("Continuing conversation...")
if __name__ == "__main__":
main()
```
### Training Data
datasets/mteb/imdb/tree/main/test.jsonl
### Training Procedure
QLoRA via unsloth
- PEFT 0.14.0 |