FOLZi commited on
Commit
d97bb2a
·
verified ·
1 Parent(s): 99ffeb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -5,7 +5,7 @@ from typing import Iterator
5
  import gradio as gr
6
  import spaces
7
  import torch
8
- from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList
9
 
10
  # DESCRIPTION = """\
11
  # # FinID
@@ -35,6 +35,7 @@ Your responses should only be within the financial subject, any other prompt or
35
  # DEFAULT_MAX_NEW_TOKENS = 512
36
 
37
  model_id = "FOLZi/FinID_v2_8B_Chat"
 
38
 
39
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
40
  model = AutoModelForCausalLM.from_pretrained(
@@ -42,6 +43,7 @@ model = AutoModelForCausalLM.from_pretrained(
42
  trust_remote_code=True,
43
  device_map="auto",
44
  torch_dtype=torch.float16,
 
45
  )
46
  # model.config.sliding_window = 4096
47
  # model.eval()
 
5
  import gradio as gr
6
  import spaces
7
  import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, StoppingCriteria, StoppingCriteriaList, BitsAndBytesConfig
9
 
10
  # DESCRIPTION = """\
11
  # # FinID
 
35
  # DEFAULT_MAX_NEW_TOKENS = 512
36
 
37
  model_id = "FOLZi/FinID_v2_8B_Chat"
38
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
39
 
40
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
41
  model = AutoModelForCausalLM.from_pretrained(
 
43
  trust_remote_code=True,
44
  device_map="auto",
45
  torch_dtype=torch.float16,
46
+ quantization_config=quantization_config
47
  )
48
  # model.config.sliding_window = 4096
49
  # model.eval()