rakshath1 commited on
Commit
d653aa3
·
verified ·
1 Parent(s): 6acef7d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +19 -15
README.md CHANGED
@@ -59,31 +59,35 @@ To verify results locally using the transformers and peft libraries:
59
  ```
60
 
61
  from peft import PeftModel
62
- from transformers import AutoModelForCausalLM, AutoTokenizer
63
  import torch
64
 
65
- # 1. Load the base model in 4-bit for efficiency
66
- base_model = "mistralai/Mistral-7B-v0.3"
67
- model = AutoModelForCausalLM.from_pretrained(
68
- base_model,
69
- load_in_4bit=True,
70
- torch_dtype=torch.float16,
 
 
 
 
 
 
 
 
71
  device_map="auto"
72
  )
73
 
74
- # 2. Load the Expert Adapter and Tokenizer
75
- adapter_path = "rakshath1/it-support-mistral-7b-expert"
76
- model = PeftModel.from_pretrained(model, adapter_path)
77
- tokenizer = AutoTokenizer.from_pretrained(adapter_path)
78
- tokenizer.pad_token = tokenizer.eos_token
79
 
80
- # 3. Test a ticket
81
  ticket = "### Instruction:\nTicket: 'VPN access denied for user in Mangalore office.'\n\n### Response:\n"
82
  inputs = tokenizer(ticket, return_tensors="pt").to("cuda")
83
 
84
- # Generate response
85
  with torch.no_grad():
86
- outputs = model.generate(**inputs, max_new_tokens=64, pad_token_id=tokenizer.eos_token_id)
87
 
88
  print(tokenizer.decode(outputs[0], skip_special_tokens=True).split("### Response:\n")[-1])
89
 
 
59
  ```
60
 
61
  from peft import PeftModel
62
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
63
  import torch
64
 
65
+ # 1. Setup 4-bit configuration
66
+ quantization_config = BitsAndBytesConfig(
67
+ load_in_4bit=True,
68
+ bnb_4bit_compute_dtype=torch.float16,
69
+ bnb_4bit_quant_type="nf4"
70
+ )
71
+
72
+ # 2. Load Base Model and Adapter
73
+ base_model_id = "mistralai/Mistral-7B-v0.3"
74
+ adapter_id = "rakshath1/it-support-mistral-7b-expert"
75
+
76
+ base_model = AutoModelForCausalLM.from_pretrained(
77
+ base_model_id,
78
+ quantization_config=quantization_config,
79
  device_map="auto"
80
  )
81
 
82
+ model = PeftModel.from_pretrained(base_model, adapter_id)
83
+ tokenizer = AutoTokenizer.from_pretrained(base_model_id)
 
 
 
84
 
85
+ # 3. Inference
86
  ticket = "### Instruction:\nTicket: 'VPN access denied for user in Mangalore office.'\n\n### Response:\n"
87
  inputs = tokenizer(ticket, return_tensors="pt").to("cuda")
88
 
 
89
  with torch.no_grad():
90
+ outputs = model.generate(**inputs, max_new_tokens=64)
91
 
92
  print(tokenizer.decode(outputs[0], skip_special_tokens=True).split("### Response:\n")[-1])
93