ahmedembedded commited on
Commit
a504372
Β·
verified Β·
1 Parent(s): 90a91f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -1,14 +1,14 @@
1
  import gradio as gr
2
  from peft import AutoPeftModelForCausalLM
3
- from transformers import AutoTokenizer
4
  import torch
5
 
6
  # Load the model and tokenizer
7
  model_name = "ahmedembedded/AskFAST"
8
- load_in_4bit = True
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
11
- model = AutoPeftModelForCausalLM.from_pretrained(model_name, load_in_4bit=load_in_4bit).to(device)
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  # Define the prompt context
 
1
  import gradio as gr
2
  from peft import AutoPeftModelForCausalLM
3
+ from transformers import AutoTokenizer, BitsAndBytesConfig
4
  import torch
5
 
6
  # Load the model and tokenizer
7
  model_name = "ahmedembedded/AskFAST"
8
+ quantization_config = BitsAndBytesConfig(load_in_4bit=True)
9
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
 
11
+ model = AutoPeftModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config).to(device)
12
  tokenizer = AutoTokenizer.from_pretrained(model_name)
13
 
14
  # Define the prompt context