BoostedJonP commited on
Commit
6cbf469
·
1 Parent(s): 9c0c216

remove cache usage in model

Browse files
Files changed (1) hide show
  1. app.py +2 -6
app.py CHANGED
@@ -4,8 +4,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
4
  from functools import lru_cache
5
  import logging
6
 
7
- MODEL_NAME = "BoostedJonP/powell-phi3-mini"
8
-
9
  logger = logging.getLogger(__name__)
10
 
11
  logging.basicConfig(level=logging.INFO)
@@ -42,7 +40,6 @@ def load_model():
42
  torch_dtype=torch.float16,
43
  device_map="auto",
44
  attn_implementation="eager",
45
- use_cache=True,
46
  cache_dir="/tmp/model_cache",
47
  )
48
  else:
@@ -56,7 +53,6 @@ def load_model():
56
  trust_remote_code=True,
57
  torch_dtype=torch.float32,
58
  attn_implementation="eager",
59
- use_cache=True,
60
  cache_dir="/tmp/model_cache",
61
  low_cpu_mem_usage=True,
62
  )
@@ -89,7 +85,7 @@ def load_model():
89
  model, tokenizer = load_model()
90
 
91
 
92
- def generate_powell_response(question, max_length=256, num_beams=3, temperature=0.3):
93
  """Generate a response in Jerome Powell's style"""
94
 
95
  if model is None or tokenizer is None:
@@ -113,7 +109,7 @@ def generate_powell_response(question, max_length=256, num_beams=3, temperature=
113
  prompt,
114
  return_tensors="pt",
115
  truncation=True,
116
- max_length=256,
117
  padding=False,
118
  )
119
 
 
4
  from functools import lru_cache
5
  import logging
6
 
 
 
7
  logger = logging.getLogger(__name__)
8
 
9
  logging.basicConfig(level=logging.INFO)
 
40
  torch_dtype=torch.float16,
41
  device_map="auto",
42
  attn_implementation="eager",
 
43
  cache_dir="/tmp/model_cache",
44
  )
45
  else:
 
53
  trust_remote_code=True,
54
  torch_dtype=torch.float32,
55
  attn_implementation="eager",
 
56
  cache_dir="/tmp/model_cache",
57
  low_cpu_mem_usage=True,
58
  )
 
85
  model, tokenizer = load_model()
86
 
87
 
88
+ def generate_powell_response(question, max_length=256, num_beams=1, temperature=0.3):
89
  """Generate a response in Jerome Powell's style"""
90
 
91
  if model is None or tokenizer is None:
 
109
  prompt,
110
  return_tensors="pt",
111
  truncation=True,
112
+ max_length=max_length,
113
  padding=False,
114
  )
115