DisgustingOzil commited on
Commit
47fb3fa
·
verified ·
1 Parent(s): e4c3b73

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +16 -10
README.md CHANGED
@@ -19,22 +19,29 @@ os.environ["WANDB_DISABLED"] = "true"
19
 
20
  ### Inference
21
  ```python
 
22
  from peft import AutoPeftModelForCausalLM
23
  from transformers import AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
24
  model = AutoPeftModelForCausalLM.from_pretrained(
25
- "DisgustingOzil/Academic-ShortQA-Generator", # YOUR MODEL YOU USED FOR TRAINING
26
  load_in_4bit = load_in_4bit,
 
27
  )
28
- tokenizer = AutoTokenizer.from_pretrained("DisgustingOzil/Academic-ShortQA-Generator")
29
 
30
  from unsloth import FastLanguageModel
31
- model, tokenizer = FastLanguageModel.from_pretrained(
32
- model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
33
- max_seq_length = max_seq_length,
34
- dtype = dtype,
35
- load_in_4bit = load_in_4bit,
36
- )
37
- FastLanguageModel.for_inference(model)
38
 
39
  inputs = tokenizer(
40
  [
@@ -43,7 +50,6 @@ inputs = tokenizer(
43
  "", # output - leave this blank for generation!
44
  )
45
  ], return_tensors = "pt").to("cuda")
46
-
47
  outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
48
  tokenizer.batch_decode(outputs)
49
 
 
19
 
20
  ### Inference
21
  ```python
22
+ load_in_4bit = True
23
  from peft import AutoPeftModelForCausalLM
24
  from transformers import AutoTokenizer
25
+ import torch
26
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
27
+
28
+ ### Instruction:
29
+ Generate Answer of the question asked :
30
+
31
+ ### Input:
32
+ {}
33
+
34
+ ### Response:
35
+ {}"""
36
  model = AutoPeftModelForCausalLM.from_pretrained(
37
+ "DisgustingOzil/qalaw-mistral-model", # YOUR MODEL YOU USED FOR TRAINING
38
  load_in_4bit = load_in_4bit,
39
+ torch_dtype=torch.float16,
40
  )
41
+ tokenizer = AutoTokenizer.from_pretrained("DisgustingOzil/qalaw-mistral-model")
42
 
43
  from unsloth import FastLanguageModel
44
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
 
 
 
 
 
 
45
 
46
  inputs = tokenizer(
47
  [
 
50
  "", # output - leave this blank for generation!
51
  )
52
  ], return_tensors = "pt").to("cuda")
 
53
  outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
54
  tokenizer.batch_decode(outputs)
55