Spaces:
Sleeping
Sleeping
Commit ·
6cbf469
1
Parent(s): 9c0c216
remove cache usage in model
Browse files
app.py
CHANGED
|
@@ -4,8 +4,6 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
|
|
| 4 |
from functools import lru_cache
|
| 5 |
import logging
|
| 6 |
|
| 7 |
-
MODEL_NAME = "BoostedJonP/powell-phi3-mini"
|
| 8 |
-
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -42,7 +40,6 @@ def load_model():
|
|
| 42 |
torch_dtype=torch.float16,
|
| 43 |
device_map="auto",
|
| 44 |
attn_implementation="eager",
|
| 45 |
-
use_cache=True,
|
| 46 |
cache_dir="/tmp/model_cache",
|
| 47 |
)
|
| 48 |
else:
|
|
@@ -56,7 +53,6 @@ def load_model():
|
|
| 56 |
trust_remote_code=True,
|
| 57 |
torch_dtype=torch.float32,
|
| 58 |
attn_implementation="eager",
|
| 59 |
-
use_cache=True,
|
| 60 |
cache_dir="/tmp/model_cache",
|
| 61 |
low_cpu_mem_usage=True,
|
| 62 |
)
|
|
@@ -89,7 +85,7 @@ def load_model():
|
|
| 89 |
model, tokenizer = load_model()
|
| 90 |
|
| 91 |
|
| 92 |
-
def generate_powell_response(question, max_length=256, num_beams=
|
| 93 |
"""Generate a response in Jerome Powell's style"""
|
| 94 |
|
| 95 |
if model is None or tokenizer is None:
|
|
@@ -113,7 +109,7 @@ def generate_powell_response(question, max_length=256, num_beams=3, temperature=
|
|
| 113 |
prompt,
|
| 114 |
return_tensors="pt",
|
| 115 |
truncation=True,
|
| 116 |
-
max_length=
|
| 117 |
padding=False,
|
| 118 |
)
|
| 119 |
|
|
|
|
| 4 |
from functools import lru_cache
|
| 5 |
import logging
|
| 6 |
|
|
|
|
|
|
|
| 7 |
logger = logging.getLogger(__name__)
|
| 8 |
|
| 9 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 40 |
torch_dtype=torch.float16,
|
| 41 |
device_map="auto",
|
| 42 |
attn_implementation="eager",
|
|
|
|
| 43 |
cache_dir="/tmp/model_cache",
|
| 44 |
)
|
| 45 |
else:
|
|
|
|
| 53 |
trust_remote_code=True,
|
| 54 |
torch_dtype=torch.float32,
|
| 55 |
attn_implementation="eager",
|
|
|
|
| 56 |
cache_dir="/tmp/model_cache",
|
| 57 |
low_cpu_mem_usage=True,
|
| 58 |
)
|
|
|
|
| 85 |
model, tokenizer = load_model()
|
| 86 |
|
| 87 |
|
| 88 |
+
def generate_powell_response(question, max_length=256, num_beams=1, temperature=0.3):
|
| 89 |
"""Generate a response in Jerome Powell's style"""
|
| 90 |
|
| 91 |
if model is None or tokenizer is None:
|
|
|
|
| 109 |
prompt,
|
| 110 |
return_tensors="pt",
|
| 111 |
truncation=True,
|
| 112 |
+
max_length=max_length,
|
| 113 |
padding=False,
|
| 114 |
)
|
| 115 |
|