| | --- |
| | base_model: |
| | - unsloth/Llama-3.2-1B-Instruct |
| | library_name: peft |
| | license: apache-2.0 |
| | language: |
| | - en |
| | pipeline_tag: text-generation |
| | --- |
| | |
| | ## Model Details |
| |
|
| | - **Developed by:** HackWeasel |
| | - **Funded by:** GT Edge AI |
| | - **Model type:** LLM |
| | - **Language(s) (NLP):** English |
| | - **License:** Apache license 2.0 |
| | - **Finetuned from model:** unsloth/llama3.2-1b-instruct |
| |
|
| | ## Uses |
| |
|
| | Ask questions about movies which have been rated on IMDB |
| |
|
| | ## How to Get Started with the Model |
| |
|
| | Use the code below to get started with the model. |
| |
|
| | ``` Python |
| | from peft import PeftModel, PeftConfig |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | import torch |
| | |
| | # Set device |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | |
| | def load_model(base_model_id, adapter_model_id): |
| | print("Loading models...") |
| | |
| | # Load tokenizer |
| | tokenizer = AutoTokenizer.from_pretrained(base_model_id) |
| | |
| | # Load base model (using model's built-in quantization) |
| | base_model = AutoModelForCausalLM.from_pretrained( |
| | base_model_id, |
| | device_map="auto", |
| | low_cpu_mem_usage=True |
| | ) |
| | |
| | # Load the PEFT model |
| | model = PeftModel.from_pretrained( |
| | base_model, |
| | adapter_model_id, |
| | device_map="auto" |
| | ) |
| | |
| | model.eval() |
| | print("Models loaded!") |
| | return model, tokenizer |
| | |
| | def generate_response(model, tokenizer, prompt, max_length=4096, temperature=0.7): |
| | with torch.no_grad(): |
| | inputs = tokenizer(prompt, return_tensors="pt").to(device) |
| | outputs = model.generate( |
| | **inputs, |
| | max_length=max_length, |
| | temperature=temperature, |
| | do_sample=True, |
| | top_p=0.95, |
| | top_k=40, |
| | num_return_sequences=1, |
| | pad_token_id=tokenizer.eos_token_id |
| | ) |
| | return tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | |
| | def main(): |
| | model, tokenizer = load_model( |
| | "unsloth/llama-3.2-1b-instruct-bnb-4bit", |
| | "HackWeasel/llama-3.2-1b-QLORA-IMDB" |
| | ) |
| | |
| | conversation_history = "" |
| | print("\nWelcome! Start chatting with the model (type 'quit' to exit)") |
| | print("Note: This model is fine-tuned on IMDB reviews data") |
| | |
| | while True: |
| | try: |
| | user_input = input("\nYou: ").strip() |
| | if user_input.lower() == 'quit': |
| | print("Goodbye!") |
| | break |
| | |
| | if conversation_history: |
| | full_prompt = f"{conversation_history}\nHuman: {user_input}\nAssistant:" |
| | else: |
| | full_prompt = f"Human: {user_input}\nAssistant:" |
| | |
| | response = generate_response(model, tokenizer, full_prompt) |
| | new_response = response.split("Assistant:")[-1].strip() |
| | conversation_history = f"{conversation_history}\nHuman: {user_input}\nAssistant: {new_response}" |
| | print("\nAssistant:", new_response) |
| | |
| | except Exception as e: |
| | print(f"An error occurred: {e}") |
| | print("Continuing conversation...") |
| | |
| | if __name__ == "__main__": |
| | main() |
| | ``` |
| |
|
| | ### Training Data |
| |
|
| | datasets/mteb/imdb/tree/main/test.jsonl |
| |
|
| | ### Training Procedure |
| |
|
| | QLoRA via unsloth |
| |
|
| |
|
| |
|
| | - PEFT 0.14.0 |