vsrinivas commited on
Commit
c500491
·
1 Parent(s): db39f16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -3
app.py CHANGED
@@ -2,13 +2,31 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import transformers
3
  import torch
4
 
 
 
 
 
 
 
 
 
 
 
5
  # model = "tiiuae/falcon-40b-instruct"
6
- model = "tiiuae/falcon-7b-instruct"
 
 
 
 
 
 
 
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(model, )
9
  pipeline = transformers.pipeline(
10
  "text-generation",
11
- model=model,
 
12
  tokenizer=tokenizer,
13
  use_safetensors=True,
14
  # torch_dtype=torch.bfloat16,
 
2
  import transformers
3
  import torch
4
 
5
+ from transformers import BitsAndBytesConfig
6
+ quantization_config = BitsAndBytesConfig(
7
+ load_in_4bit=True,
8
+ bnb_4bit_compute_dtype=torch.float16,
9
+ bnb_4bit_quant_type="nf4",
10
+ bnb_4bit_use_double_quant=True,
11
+ )
12
+
13
+ model_id = "vilsonrodrigues/falcon-7b-instruct-sharded"
14
+
15
  # model = "tiiuae/falcon-40b-instruct"
16
+ # model = "tiiuae/falcon-7b-instruct"
17
+
18
+ model_4bit = AutoModelForCausalLM.from_pretrained(
19
+ model_id,
20
+ device_map="auto",
21
+ quantization_config=quantization_config,
22
+ )
23
+ tokenizer = AutoTokenizer.from_pretrained(model_id, )
24
 
25
+ # tokenizer = AutoTokenizer.from_pretrained(model, )
26
  pipeline = transformers.pipeline(
27
  "text-generation",
28
+ # model=model,
29
+ model = model_4bit
30
  tokenizer=tokenizer,
31
  use_safetensors=True,
32
  # torch_dtype=torch.bfloat16,