HackWeasel commited on
Commit
e031ec9
·
verified ·
1 Parent(s): ea8faa2

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +46 -28
README.md CHANGED
@@ -26,61 +26,79 @@ Ask questions about movies which have been rated on IMDB
26
  Use the code below to get started with the model.
27
 
28
  ``` Python
29
- from peft import PeftModel
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
  import torch
32
 
33
  # Set device
34
  device = "cuda" if torch.cuda.is_available() else "cpu"
35
 
36
- # Load tokenizer and models
37
- print("Loading models...")
38
- tokenizer = AutoTokenizer.from_pretrained("unsloth/llama-3.2-1b-instruct-bnb-4bit")
39
- base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3.2-1b-instruct-bnb-4bit").to(device)
40
- model = PeftModel.from_pretrained(base_model, "HackWeasel/llama-3.2-1b-QLORA-IMDB").to(device)
41
- model.eval()
42
- print("Models loaded!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- def generate_response(prompt, max_length=4096, temperature=0.7):
45
  with torch.no_grad():
46
- inputs = tokenizer(prompt, return_tensors="pt").to(device) # Move inputs to GPU
47
  outputs = model.generate(
48
  **inputs,
49
  max_length=max_length,
50
  temperature=temperature,
51
  do_sample=True,
 
 
 
52
  pad_token_id=tokenizer.eos_token_id
53
  )
54
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
55
 
56
  def main():
 
 
 
 
 
57
  conversation_history = ""
58
  print("\nWelcome! Start chatting with the model (type 'quit' to exit)")
 
59
 
60
  while True:
61
- user_input = input("\nYou: ").strip()
62
- if user_input.lower() == 'quit':
63
- print("Goodbye!")
64
- break
65
-
66
- # Construct the prompt with conversation history
67
- if conversation_history:
68
- full_prompt = f"{conversation_history}\nHuman: {user_input}\nAssistant:"
69
- else:
70
- full_prompt = f"Human: {user_input}\nAssistant:"
71
-
72
  try:
73
- # Generate response
74
- response = generate_response(full_prompt)
 
 
75
 
76
- # Extract just the new response
77
- new_response = response.split("Assistant:")[-1].strip()
 
 
78
 
79
- # Update conversation history
 
80
  conversation_history = f"{conversation_history}\nHuman: {user_input}\nAssistant: {new_response}"
81
-
82
- # Print the response
83
  print("\nAssistant:", new_response)
 
84
  except Exception as e:
85
  print(f"An error occurred: {e}")
86
  print("Continuing conversation...")
 
26
  Use the code below to get started with the model.
27
 
28
  ``` Python
29
+ from peft import PeftModel, PeftConfig
30
  from transformers import AutoModelForCausalLM, AutoTokenizer
31
  import torch
32
 
33
  # Set device
34
  device = "cuda" if torch.cuda.is_available() else "cpu"
35
 
36
+ def load_model(base_model_id, adapter_model_id):
37
+ print("Loading models...")
38
+
39
+ # Load tokenizer
40
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id)
41
+
42
+ # Load base model (using model's built-in quantization)
43
+ base_model = AutoModelForCausalLM.from_pretrained(
44
+ base_model_id,
45
+ device_map="auto",
46
+ low_cpu_mem_usage=True
47
+ )
48
+
49
+ # Load the PEFT model
50
+ model = PeftModel.from_pretrained(
51
+ base_model,
52
+ adapter_model_id,
53
+ device_map="auto"
54
+ )
55
+
56
+ model.eval()
57
+ print("Models loaded!")
58
+ return model, tokenizer
59
 
60
+ def generate_response(model, tokenizer, prompt, max_length=4096, temperature=0.7):
61
  with torch.no_grad():
62
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
63
  outputs = model.generate(
64
  **inputs,
65
  max_length=max_length,
66
  temperature=temperature,
67
  do_sample=True,
68
+ top_p=0.95,
69
+ top_k=40,
70
+ num_return_sequences=1,
71
  pad_token_id=tokenizer.eos_token_id
72
  )
73
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
74
 
75
  def main():
76
+ model, tokenizer = load_model(
77
+ "unsloth/llama-3.2-1b-instruct-bnb-4bit",
78
+ "HackWeasel/llama-3.2-1b-QLORA-IMDB"
79
+ )
80
+
81
  conversation_history = ""
82
  print("\nWelcome! Start chatting with the model (type 'quit' to exit)")
83
+ print("Note: This model is fine-tuned on IMDB reviews data")
84
 
85
  while True:
 
 
 
 
 
 
 
 
 
 
 
86
  try:
87
+ user_input = input("\nYou: ").strip()
88
+ if user_input.lower() == 'quit':
89
+ print("Goodbye!")
90
+ break
91
 
92
+ if conversation_history:
93
+ full_prompt = f"{conversation_history}\nHuman: {user_input}\nAssistant:"
94
+ else:
95
+ full_prompt = f"Human: {user_input}\nAssistant:"
96
 
97
+ response = generate_response(model, tokenizer, full_prompt)
98
+ new_response = response.split("Assistant:")[-1].strip()
99
  conversation_history = f"{conversation_history}\nHuman: {user_input}\nAssistant: {new_response}"
 
 
100
  print("\nAssistant:", new_response)
101
+
102
  except Exception as e:
103
  print(f"An error occurred: {e}")
104
  print("Continuing conversation...")