findthehead commited on
Commit
148a060
·
verified ·
1 Parent(s): 2cd04b6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -1
README.md CHANGED
@@ -28,4 +28,42 @@ language:
28
 
29
  ### Framework versions
30
 
31
- - PEFT 0.17.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  ### Framework versions
30
 
31
+ - PEFT 0.17.1
32
+
33
+
34
+ ### Inference Code
35
+
36
+ ```python
37
+ import torch
38
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
39
+
40
+ model_name = "Prachir-AI/Thinkmini"
41
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
42
+
43
+ # Create a BitsAndBytesConfig to enable 4-bit loading
44
+ bnb_config = BitsAndBytesConfig(
45
+ load_in_4bit=True, # Enable 4-bit loading as intended for this model
46
+ bnb_4bit_quant_type="nf4", # This is a common default for 4-bit models
47
+ bnb_4bit_compute_dtype=torch.bfloat16, # Use bfloat16 for computation
48
+ bnb_4bit_use_double_quant=True, # Often used with nf4
49
+ )
50
+
51
+ # Load the model with the configured 4-bit quantization
52
+ model = AutoModelForCausalLM.from_pretrained(
53
+ model_name,
54
+ quantization_config=bnb_config,
55
+ torch_dtype=torch.bfloat16 # Ensure the model itself is loaded with bfloat16 dtypes where applicable
56
+ )
57
+
58
+ inputs = tokenizer("How do you plan for a full pentest of a web application?", return_tensors="pt").to('cuda')
59
+ # inference mode
60
+
61
+ output_ids = model.generate(
62
+ **inputs,
63
+ max_new_tokens=500,
64
+ temperature=0.7,
65
+ top_p=0.9
66
+ )
67
+
68
+ print(tokenizer.decode(output_ids[0], skip_special_tokens=True))
69
+ ```