arjunanand13 commited on
Commit
edb7051
·
verified ·
1 Parent(s): 24f8284

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -6
app.py CHANGED
@@ -8,15 +8,39 @@ import spaces
8
  import subprocess
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
 
 
 
11
 
12
- processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")
13
 
14
- model = Idefics2ForConditionalGeneration.from_pretrained(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "HuggingFaceM4/idefics2-8b",
16
- torch_dtype=torch.bfloat16,
17
- # _attn_implementation="flash_attention_2",
18
- # trust_remote_code=True
19
- ).to("cuda")
 
 
 
 
 
 
 
20
 
21
  import gradio as gr
22
  from huggingface_hub import InferenceApi
 
8
  import subprocess
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
11
+ DEVICE = "cuda:0"
12
+ USE_LORA = False
13
+ USE_QLORA = True
14
 
15
+ processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b", do_image_splitting=False)
16
 
17
+ if USE_QLORA or USE_LORA:
18
+ lora_config = LoraConfig(
19
+ r=8,
20
+ lora_alpha=8,
21
+ lora_dropout=0.1,
22
+ target_modules='.*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*',
23
+ use_dora=False if USE_QLORA else True,
24
+ init_lora_weights="gaussian"
25
+ )
26
+ bnb_config = BitsAndBytesConfig(
27
+ load_in_4bit=True,
28
+ bnb_4bit_quant_type="nf4",
29
+ bnb_4bit_compute_dtype=torch.float16
30
+ ) if USE_QLORA else None
31
+ model = Idefics2ForConditionalGeneration.from_pretrained(
32
  "HuggingFaceM4/idefics2-8b",
33
+ torch_dtype=torch.float16,
34
+ quantization_config=bnb_config,
35
+ )
36
+ model.add_adapter(lora_config)
37
+ model.enable_adapters()
38
+ else:
39
+ model = Idefics2ForConditionalGeneration.from_pretrained(
40
+ "HuggingFaceM4/idefics2-8b",
41
+ torch_dtype=torch.float16,
42
+ _attn_implementation="flash_attention_2"
43
+ ).to("cuda")
44
 
45
  import gradio as gr
46
  from huggingface_hub import InferenceApi