Monimoy commited on
Commit
219c6aa
·
verified ·
1 Parent(s): 393615e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +10 -4
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import gradio as gr
4
  import torch
5
  from PIL import Image
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
  import timm
8
  from torchvision import transforms
9
  from llama_cpp import Llama
@@ -72,9 +72,15 @@ image_encoder.eval() # Set to evaluation mode
72
 
73
  base_model_name="microsoft/Phi-3-mini-4k-instruct"
74
  device = "cpu"
75
-
76
- base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32, device_map={"": device})
77
-
 
 
 
 
 
 
78
 
79
  # Load and merge
80
  model = PeftModel.from_pretrained(base_model, peft_model_path, offload_dir='./offload')
 
3
  import gradio as gr
4
  import torch
5
  from PIL import Image
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
7
  import timm
8
  from torchvision import transforms
9
  from llama_cpp import Llama
 
72
 
73
  base_model_name="microsoft/Phi-3-mini-4k-instruct"
74
  device = "cpu"
75
+ bnb_config = BitsAndBytesConfig(
76
+ load_in_4bit=True,
77
+ bnb_4bit_use_double_quant=True,
78
+ bnb_4bit_quant_type="nf4",
79
+ bnb_4bit_compute_dtype=torch.bfloat16
80
+ )
81
+ #base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.float32, device_map={"": device})
82
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, # Important for some Phi-3 variants
83
+ quantization_config=bnb_config, device_map={"": device})
84
 
85
  # Load and merge
86
  model = PeftModel.from_pretrained(base_model, peft_model_path, offload_dir='./offload')
requirements.txt CHANGED
@@ -5,4 +5,5 @@ timm
5
  Pillow
6
  transformers
7
  llama-cpp-python
8
- peft
 
 
5
  Pillow
6
  transformers
7
  llama-cpp-python
8
+ peft
9
+ bitsandbytes